Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
Protected Attributes |
Static Protected Attributes |
List of all members
ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference
#include <blockwise_gemm_pipeline_xdlops_v1_mx.hpp>
Inheritance diagram for ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >:
Public Types | |
| using | Base |
| using | AccType = typename Base::AccType |
| using | Tuple5 = typename Base::Tuple5 |
| using | ComputeTypeA = typename Base::ComputeTypeA |
| using | ComputeTypeB = typename Base::ComputeTypeB |
| using | mx_scale_t = e8m0_bexp_t |
| Public Types inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| using | ComputeTypeA |
| using | ComputeTypeB |
| using | AccType |
| using | ThisThreadBlock |
| using | HotLoopInstList |
| using | Tuple5 |
Public Member Functions | |
| template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CThreadBuffer, typename AScaleGridBuffer, typename AScaleGridDesc, typename AScaleThreadTransfer, typename BScaleGridBuffer, typename BScaleGridDesc, typename BScaleThreadTransfer> | |
| __device__ void | Run (const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, CThreadBuffer &c_thread_buf, const AScaleGridDesc &a_scale_grid_desc, AScaleThreadTransfer &a_scale_thread_copy, const AScaleGridBuffer &a_scale_grid_buf, const BScaleGridDesc &b_scale_grid_desc, BScaleThreadTransfer &b_scale_thread_copy, const BScaleGridBuffer &b_scale_grid_buf, index_t num_loop) const |
| __host__ __device__ constexpr auto & | GetCThreadBuffer () |
| Public Member Functions inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| __host__ __device__ constexpr auto & | GetCThreadBuffer () |
| __host__ __device__ | BlockwiseGemmXdlops_mx_pipeline_base (Tuple5 a_origin=CalculateAThreadOriginDataIndex(), Tuple5 b_origin=CalculateBThreadOriginDataIndex()) |
| Constructor for BlockwiseGemmXdlops_mx_pipeline_base. | |
Static Public Member Functions | |
| static __host__ constexpr bool | BlockHasHotloop (index_t num_loop) |
| static __host__ constexpr TailNumber | BlockLoopTailNum (index_t num_loop) |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| static __device__ auto | GetWaveIdx () |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| Static Public Member Functions inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| static __device__ auto | GetWaveIdx () |
| static __device__ auto | CalculateAThreadOriginDataIndex () |
| static __device__ auto | CalculateBThreadOriginDataIndex () |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_M3_M4_M5_N3 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_M3_M4_M5_N3 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static __device__ constexpr auto | GetCThreadDesc () |
Static Public Attributes | |
| static constexpr index_t | PrefetchStages = 1 |
| static constexpr index_t | PrefillStages = 1 |
| static constexpr index_t | GlobalBufferNum = 1 |
| static constexpr auto | ScalesPerKBlockSize |
| static constexpr auto | AScalesPerXdlopsRun |
| static constexpr auto | BScalesPerXdlopsRun |
| static constexpr auto | ScalesPerXdlopsRunPerThreadA |
| static constexpr auto | ScalesPerXdlopsRunPerThreadB |
| static constexpr auto | scale_pack_size_a = sizeof(AScaleDataType) / sizeof(mx_scale_t) |
| static constexpr auto | scale_pack_size_b = sizeof(BScaleDataType) / sizeof(mx_scale_t) |
| static constexpr auto | a_scale_thread_vec_size = KXdlPack * MXdlPack / scale_pack_size_a |
| static constexpr auto | b_scale_thread_vec_size = KXdlPack * NXdlPack / scale_pack_size_b |
| static constexpr auto | a_scale_thread_desc |
| static constexpr auto | b_scale_thread_desc |
| static constexpr auto | I0 |
| static constexpr auto | I1 |
| static constexpr index_t | KRepeat |
| static constexpr index_t | MWaves |
| static constexpr index_t | NWaves |
| static constexpr index_t | WaveSize |
| static constexpr auto | xdlops_gemm |
| static constexpr AMmaTileDesc | a_block_desc_m0_m1_m2_m3_k |
| static constexpr BMmaTileDesc | b_block_desc_n0_n1_n2_n3_k |
| static constexpr index_t | AMmaKStride |
| static constexpr index_t | APackedSize |
| static constexpr index_t | BMmaKStride |
| static constexpr index_t | BPackedSize |
| static constexpr index_t | KThreadChunk |
| static constexpr index_t | KXdlPack |
| static constexpr index_t | MXdlPack |
| static constexpr index_t | NXdlPack |
| Static Public Attributes inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| static constexpr index_t | APackedSize |
| static constexpr index_t | BPackedSize |
| static constexpr auto | I0 |
| static constexpr auto | I1 |
| static constexpr auto | I2 |
| static constexpr auto | I3 |
| static constexpr index_t | MWaves |
| static constexpr index_t | NWaves |
| static constexpr index_t | WaveSize |
| static constexpr index_t | A_K0 |
| static constexpr index_t | B_K0 |
| static constexpr index_t | A_K1 |
| static constexpr index_t | B_K1 |
| static constexpr auto | xdlops_gemm |
| static constexpr index_t | AMmaKStride |
| static constexpr index_t | BMmaKStride |
| static constexpr index_t | KThreadChunk |
| static constexpr index_t | KPerThread |
| static constexpr index_t | KRepeat |
| static constexpr index_t | KPerInnerLoop |
| static constexpr index_t | MXdlPack |
| static constexpr index_t | NXdlPack |
| static constexpr index_t | KXdlPack |
| static constexpr AMmaTileDesc | a_block_desc_m0_m1_m2_m3_k |
| static constexpr BMmaTileDesc | b_block_desc_n0_n1_n2_n3_k |
Static Protected Attributes | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr auto | c_thread_desc_ |
| Static Protected Attributes inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr auto | c_thread_desc_ |
Additional Inherited Members | |
| Public Attributes inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccType, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > | c_thread_buf_ |
| Protected Types inherited from ck::BlockwiseGemmXdlops_mx_pipeline_base< ThreadBlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > | |
| using | AThreadCopy |
| using | BThreadCopy |
Member Typedef Documentation
◆ AccType
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::AccType = typename Base::AccType |
◆ Base
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Base |
Initial value:
BlockwiseGemmXdlops_mx_pipeline_base<ThreadBlockSize,
ADataType,
BDataType,
ATileDesc,
BTileDesc,
AMmaTileDesc,
BMmaTileDesc,
ABlockTransferSrcScalarPerVector,
BBlockTransferSrcScalarPerVector,
MPerBlock,
NPerBlock,
KPerBlock,
MPerXDL,
NPerXDL,
MRepeat,
NRepeat,
KPack>
__host__ __device__ BlockwiseGemmXdlops_mx_pipeline_base(Tuple5 a_origin=CalculateAThreadOriginDataIndex(), Tuple5 b_origin=CalculateBThreadOriginDataIndex())
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:204
◆ ComputeTypeA
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeA = typename Base::ComputeTypeA |
◆ ComputeTypeB
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeTypeB = typename Base::ComputeTypeB |
◆ mx_scale_t
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::mx_scale_t = e8m0_bexp_t |
◆ Tuple5
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_v1_mx< BlockGemmPipelineScheduler::Intrawave, ThreadBlockSize, ScaleBlockSize, ADataType, AScaleDataType, BDataType, BScaleDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Tuple5 = typename Base::Tuple5 |
Member Function Documentation
◆ BlockHasHotloop()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ BlockLoopTailNum()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ CalculateCThreadOriginDataIndex()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestatic |
◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCThreadBuffer()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlineconstexpr |
◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetWaveIdx()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestatic |
◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ Run()
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CThreadBuffer, typename AScaleGridBuffer, typename AScaleGridDesc, typename AScaleThreadTransfer, typename BScaleGridBuffer, typename BScaleGridDesc, typename BScaleThreadTransfer>
|
inline |
Member Data Documentation
◆ a_block_desc_m0_m1_m2_m3_k
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ a_scale_thread_desc
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr index_t KXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:86
static constexpr index_t KRepeat
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:79
◆ a_scale_thread_vec_size
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ a_thread_copy_
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
protected |
◆ a_thread_desc_
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
◆ AMmaKStride
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ APackedSize
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ AScalesPerXdlopsRun
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
(APackedSize * KPack * xdlops_gemm.K0PerXdlops) / ScaleBlockSize
static constexpr auto xdlops_gemm
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:60
static constexpr index_t APackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:38
◆ b_block_desc_n0_n1_n2_n3_k
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ b_scale_thread_desc
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ b_scale_thread_vec_size
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ b_thread_copy_
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
protected |
◆ b_thread_desc_
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
◆ BMmaKStride
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ BPackedSize
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ BScalesPerXdlopsRun
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
(BPackedSize * KPack * xdlops_gemm.K0PerXdlops) / ScaleBlockSize
static constexpr index_t BPackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:39
◆ c_thread_desc_
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
◆ GlobalBufferNum
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ I0
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ I1
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ KRepeat
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ KThreadChunk
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ KXdlPack
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ MWaves
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ MXdlPack
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ NWaves
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ NXdlPack
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefetchStages
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefillStages
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ scale_pack_size_a
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ scale_pack_size_b
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ ScalesPerKBlockSize
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
KPerBlock / ScaleBlockSize
◆ ScalesPerXdlopsRunPerThreadA
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
AScalesPerXdlopsRun / xdlops_gemm.mfma_instr.num_input_blks
static constexpr auto AScalesPerXdlopsRun
Definition blockwise_gemm_pipeline_xdlops_v1_mx.hpp:165
◆ ScalesPerXdlopsRunPerThreadB
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
BScalesPerXdlopsRun / xdlops_gemm.mfma_instr.num_input_blks
static constexpr auto BScalesPerXdlopsRun
Definition blockwise_gemm_pipeline_xdlops_v1_mx.hpp:167
◆ WaveSize
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ xdlops_gemm
template<index_t ThreadBlockSize, index_t ScaleBlockSize, typename ADataType, typename AScaleDataType, typename BDataType, typename BScaleDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
The documentation for this struct was generated from the following file: