BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Public Attributes |
Static Public Attributes |
Protected Types |
Protected Attributes |
Static Protected Attributes |
List of all members
ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC > Struct Template Reference
#include <blockwise_gemm_mx_pipeline_xdlops_base.hpp>
Public Types | |
| using | ComputeTypeA = ADataType |
| using | ComputeTypeB = BDataType |
| using | AccType = float |
| using | ThisThreadBlock = ThisThreadBlock<BlockSize> |
| using | HotLoopInstList |
| using | Tuple5 = decltype(CalculateAThreadOriginDataIndex()) |
Public Member Functions | |
| __host__ __device__ constexpr auto & | GetCThreadBuffer () |
| __host__ __device__ | BlockwiseGemmXdlops_mx_pipeline_base (Tuple5 a_origin=CalculateAThreadOriginDataIndex(), Tuple5 b_origin=CalculateBThreadOriginDataIndex()) |
| Constructor for BlockwiseGemmXdlops_mx_pipeline_base. | |
Static Public Member Functions | |
| static __device__ auto | GetWaveIdx () |
| static __device__ auto | CalculateAThreadOriginDataIndex () |
| static __device__ auto | CalculateBThreadOriginDataIndex () |
| template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i> | |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_M3_M4_M5_N3 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_M3_M4_M5_N3 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| template<typename CGridDesc_M_N> | |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| template<typename CGridDesc_G_M_N> | |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static __device__ constexpr auto | GetCThreadDesc () |
Public Attributes | |
| StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccType, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > | c_thread_buf_ |
Static Public Attributes | |
| static constexpr index_t | APackedSize = packed_size_v<ComputeTypeA> |
| static constexpr index_t | BPackedSize = packed_size_v<ComputeTypeB> |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr index_t | MWaves = MPerBlock / (MRepeat * MPerXDL) |
| static constexpr index_t | NWaves = NPerBlock / (NRepeat * NPerXDL) |
| static constexpr index_t | WaveSize = BlockSize / MWaves / NWaves |
| static constexpr index_t | A_K0 = ATileDesc{}.GetLength(I0) |
| static constexpr index_t | B_K0 = BTileDesc{}.GetLength(I0) |
| static constexpr index_t | A_K1 = ATileDesc{}.GetLength(I2) |
| static constexpr index_t | B_K1 |
| static constexpr auto | xdlops_gemm |
| static constexpr index_t | AMmaKStride = KPack |
| static constexpr index_t | BMmaKStride = KPack |
| static constexpr index_t | KThreadChunk = (APackedSize == 1) ? 16 : 32 / APackedSize |
| static constexpr index_t | KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops |
| static constexpr index_t | KRepeat = KPerThread / KPack |
| static constexpr index_t | KPerInnerLoop = KPack |
| static constexpr index_t | MXdlPack = 2 |
| static constexpr index_t | NXdlPack = 2 |
| static constexpr index_t | KXdlPack = 2 |
| static constexpr AMmaTileDesc | a_block_desc_m0_m1_m2_m3_k |
| static constexpr BMmaTileDesc | b_block_desc_n0_n1_n2_n3_k |
Protected Types | |
| using | AThreadCopy |
| using | BThreadCopy |
Protected Attributes | |
| AThreadCopy | a_thread_copy_ |
| BThreadCopy | b_thread_copy_ |
Static Protected Attributes | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr auto | c_thread_desc_ |
Member Typedef Documentation
◆ AccType
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
| using ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::AccType = float |
◆ AThreadCopy
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
protected |
Initial value:
ThreadwiseTensorSliceTransfer_v4<ADataType,
decltype(a_block_desc_m0_m1_m2_m3_k),
decltype(a_thread_desc_),
4,
A_K1,
A_K1>
static constexpr auto a_thread_desc_
Definition blockwise_gemm_dpp.hpp:312
static constexpr index_t A_K1
Definition blockwise_gemm_dpp.hpp:52
ADataType ComputeTypeA
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:34
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_m3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:381
Definition utility/sequence.hpp:43
Definition threadwise_tensor_slice_transfer.hpp:1260
◆ BThreadCopy
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
protected |
Initial value:
ThreadwiseTensorSliceTransfer_v4<BDataType,
decltype(b_block_desc_n0_n1_n2_n3_k),
decltype(b_thread_desc_),
4,
B_K1,
B_K1>
static constexpr index_t B_K1
Definition blockwise_gemm_dpp.hpp:53
static constexpr auto b_thread_desc_
Definition blockwise_gemm_dpp.hpp:316
static constexpr BMmaTileDesc b_block_desc_n0_n1_n2_n3_k
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:382
BDataType ComputeTypeB
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:35
◆ ComputeTypeA
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
| using ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::ComputeTypeA = ADataType |
◆ ComputeTypeB
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
| using ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::ComputeTypeB = BDataType |
◆ HotLoopInstList
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
| using ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::HotLoopInstList |
Initial value:
BlockSize,
MPerBlock,
NPerBlock,
KPerBlock,
ABlockTransferSrcScalarPerVector,
BBlockTransferSrcScalarPerVector,
A_K1,
B_K1,
A_K1,
B_K1,
MRepeat,
NRepeat,
MPerXDL,
NPerXDL,
xdlops_gemm.KPerXdlops,
(packed_size_v<ComputeTypeA> > 1 || packed_size_v<ComputeTypeB> > 1)>
static constexpr auto xdlops_gemm
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:60
static constexpr index_t B_K1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:57
static constexpr index_t A_K1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:55
Definition blockwise_gemm_pipeline_xdlops.hpp:34
◆ ThisThreadBlock
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
| using ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::ThisThreadBlock = ThisThreadBlock<BlockSize> |
◆ Tuple5
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
| using ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::Tuple5 = decltype(CalculateAThreadOriginDataIndex()) |
Constructor & Destructor Documentation
◆ BlockwiseGemmXdlops_mx_pipeline_base()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inline |
Constructor for BlockwiseGemmXdlops_mx_pipeline_base.
This constructor initializes the thread copy objects for matrices A and B. It also performs several compile-time checks to ensure the correctness of the matrix tile descriptors.
- Parameters
-
a_origin The origin data index for matrix A. b_origin The origin data index for matrix B.
- Note
- The constructor includes static assertions to ensure that:
- The matrix tile descriptors for A and B are known at compile-time.
- The number of threads in the thread block matches the product of MWaves, NWaves, and WaveSize.
- The dimensions of the block are divisible by the product of the corresponding XDL and repeat dimensions.
Member Function Documentation
◆ CalculateAThreadOriginDataIndex()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestatic |
◆ CalculateBThreadOriginDataIndex()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestatic |
◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_M3_M4_M5_N3()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCThreadBuffer()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlineconstexpr |
◆ GetCThreadDesc()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_M3_M4_M5_N3()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestaticconstexpr |
◆ GetWaveIdx()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
inlinestatic |
◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<typename CGridDesc_G_M_N>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<typename CGridDesc_M_N>
|
inlinestaticconstexpr |
Member Data Documentation
◆ a_block_desc_m0_m1_m2_m3_k
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ A_K0
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ A_K1
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ a_thread_copy_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
protected |
◆ a_thread_desc_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexprprotected |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr auto I1
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:42
◆ AMmaKStride
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ APackedSize
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ b_block_desc_n0_n1_n2_n3_k
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ B_K0
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ B_K1
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
Initial value:
=
BTileDesc{}.GetLength(Number < BTileDesc{}.GetNumOfDimension() == 4 ? 3 : 2 > {})
◆ b_thread_copy_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
protected |
◆ b_thread_desc_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexprprotected |
Initial value:
◆ BMmaKStride
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ BPackedSize
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ c_thread_buf_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
| StaticBufferTupleOfVector<AddressSpaceEnum::Vgpr, AccType, MRepeat * NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true> ck::BlockwiseGemmXdlops_mx_pipeline_base< BlockSize, ADataType, BDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::c_thread_buf_ |
◆ c_thread_desc_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexprprotected |
Initial value:
=
xdlops_gemm.GetRegSizePerXdlops()))
static constexpr index_t NXdlPack
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:85
◆ I0
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ I1
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ I2
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ I3
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ KPerInnerLoop
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ KPerThread
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ KRepeat
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ KThreadChunk
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ KXdlPack
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ MWaves
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ MXdlPack
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ NWaves
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ NXdlPack
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ WaveSize
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
◆ xdlops_gemm
template<index_t BlockSize, typename ADataType, typename BDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
|
staticconstexpr |
Initial value:
MPerXDL,
NPerXDL,
KPack * APackedSize,
TransposeC,
true>{}
static constexpr index_t APackedSize
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:38
Definition xdlops_gemm.hpp:1821
The documentation for this struct was generated from the following file: