blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp Source File#
blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp
Go to the documentation of this file.
168 static constexpr auto async_vmcnt_encoding = 3952 + async_vmcnt % 16 + async_vmcnt / 16 * 16384;
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_static_buffer(Number< N >)
Definition static_buffer.hpp:186
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition utility/statically_indexed_array.hpp:45
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static __device__ auto GetWaveIdx()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:118
float AccType
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:36
ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, A_K1, B_K1, A_K1, B_K1, MRepeat, NRepeat, MPerXDL, NPerXDL, xdlops_gemm.KPerXdlops,(packed_size_v< ComputeTypeA > > 1||packed_size_v< ComputeTypeB > > 1)> HotLoopInstList
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:88
ADataType ComputeTypeA
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:34
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_M_N &c_grid_desc_m_n)
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:344
static constexpr index_t AMmaKStride
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:68
static constexpr index_t MWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:49
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:220
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:269
static constexpr BMmaTileDesc b_block_desc_n0_n1_n2_n3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:382
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:297
decltype(CalculateAThreadOriginDataIndex()) Tuple5
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:184
static constexpr auto I0
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:41
static constexpr auto I1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:42
static constexpr index_t KRepeat
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:79
static __device__ auto CalculateCThreadOriginDataIndex(Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:154
static constexpr index_t MXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:84
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:283
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_G_M_N &c_grid_desc_g_m_n)
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:361
static constexpr auto xdlops_gemm
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:60
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_m3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:381
static constexpr index_t KThreadChunk
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:74
static constexpr index_t KXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:86
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:234
__host__ __device__ constexpr auto & GetCThreadBuffer()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:116
BDataType ComputeTypeB
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:35
static constexpr index_t APackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:38
__host__ __device__ BlockwiseGemmXdlops_mx_pipeline_base(Tuple5 a_origin=CalculateAThreadOriginDataIndex(), Tuple5 b_origin=CalculateBThreadOriginDataIndex())
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:204
static constexpr index_t BMmaKStride
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:69
static constexpr index_t NXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:85
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:327
static constexpr index_t A_K1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:55
static constexpr index_t NWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:50
static constexpr index_t WaveSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:51
static constexpr index_t BPackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:39
static constexpr auto scale_pack_size_a
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:182
static constexpr auto I1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:42
static constexpr auto scale_pack_size_b
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:183
static constexpr index_t MXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:84
static constexpr index_t MWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:49
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_m3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:381
static constexpr auto I0
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:41
static constexpr index_t KThreadChunk
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:74
__device__ void Run(const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_bufs, const BBlockTransferStep &b_block_copy_step, CThreadBuffer &c_thread_buf, const AScaleGridDesc &a_scale_grid_desc, AScaleThreadTransfer &a_scale_thread_copy, const AScaleGridBuffer &a_scale_grid_buf, const BScaleGridDesc &b_scale_grid_desc, BScaleThreadTransfer &b_scale_thread_copy, const BScaleGridBuffer &b_scale_grid_buf, index_t num_loop) const
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:261
typename Base::ComputeTypeB ComputeTypeB
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:157
AThreadCopy a_thread_copy_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:423
BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Base
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:102
static constexpr auto b_thread_desc_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:392
static constexpr auto c_thread_desc_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:396
static constexpr index_t GlobalBufferNum
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:161
static constexpr index_t KXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:86
static constexpr auto b_scale_thread_desc
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:878
static constexpr auto b_scale_thread_vec_size
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:189
e8m0_bexp_t mx_scale_t
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:181
static constexpr index_t NXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:85
static constexpr index_t PrefillStages
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:160
static __device__ constexpr auto HotLoopScheduler()
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:201
static constexpr auto async_vmcnt
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:166
static constexpr index_t KRepeat
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:79
static constexpr auto ScalesPerXdlopsRun
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:174
static __host__ constexpr TailNumber BlockLoopTailNum(index_t num_loop)
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:196
static constexpr auto async_vmcnt_encoding
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:168
static constexpr auto ScalesPerKBlockSize
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:170
typename Base::ComputeTypeA ComputeTypeA
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:156
static constexpr index_t PrefetchStages
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:159
static constexpr auto a_scale_thread_vec_size
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:188
static constexpr auto xdlops_gemm
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:60
static constexpr auto a_thread_desc_
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:388
static constexpr index_t HotloopLocalBufSwitch
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:162
static constexpr index_t APackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:38
static constexpr auto num_buffer_load_b_scale
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:165
typename Base::Tuple5 Tuple5
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:155
static constexpr auto ScalesPerXdlopsRunPerThread
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:178
static constexpr index_t BPackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:39
static constexpr auto a_scale_thread_desc
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:871
typename Base::AccType AccType
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:154
static constexpr auto num_buffer_load_a_scale
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:164
static __host__ constexpr bool BlockHasHotloop(index_t num_loop)
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:191
static constexpr index_t NWaves
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:50
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_mx_moe_v1.hpp:38
static constexpr index_t A_LDS_Read_Inst_Num
Definition blockwise_gemm_pipeline_xdlops.hpp:49
static constexpr index_t A_Buffer_Load_Inst_Num
Definition blockwise_gemm_pipeline_xdlops.hpp:39
static constexpr index_t B_Buffer_Load_Inst_Num
Definition blockwise_gemm_pipeline_xdlops.hpp:41
Unsigned representation of a conventional biased Float32 exponent.
Definition utility/e8m0.hpp:26
Definition functional2.hpp:33
Definition dtype_vector.hpp:10