Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC > Struct Template Reference

Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC > Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC > Struct Template Reference
ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC > Struct Template Reference

#include <blockwise_gemm_pipeline_wmmaops_v1.hpp>

Inheritance diagram for ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >:
ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >

Public Types

using Base
Public Types inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
using ThisThreadBlock
using HotLoopInstList
using Tuple6

Public Member Functions

template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CThreadBuffer, typename BScaleStruct>
__device__ void Run (const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, CThreadBuffer &c_thread_buf, BScaleStruct &b_scale_struct, index_t num_loop, index_t num_loop_per_scale) const
__host__ __device__ constexpr auto & GetCThreadBuffer ()
Public Member Functions inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmWmmaops_pipeline_base (Tuple6 a_origin=CalculateAThreadOriginDataIndex(), Tuple6 b_origin=CalculateBThreadOriginDataIndex())
 Constructor for BlockwiseGemmWmmaops_pipeline_base.

Static Public Member Functions

static bool BlockHasHotloop (index_t num_loop)
static TailNumber BlockLoopTailNum (index_t num_loop)
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >)
Static Public Member Functions inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex ()
static __device__ auto CalculateBThreadOriginDataIndex ()
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs ()

Static Public Attributes

static constexpr index_t NumKClusters = CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS
static constexpr index_t KRepeatPerCluster = math::max(KRepeat / NumKClusters, 1)
static constexpr index_t PrefetchStages = 1
static constexpr index_t PrefillStages = 1
static constexpr index_t GlobalBufferNum = 1
static constexpr auto I0
static constexpr auto I1
static constexpr index_t A_K1
static constexpr index_t A_KRow
static constexpr index_t B_K1
static constexpr index_t B_KRow
static constexpr index_t KRepeat
static constexpr auto WmmaK
static constexpr auto wmma_gemm
static constexpr AWmmaTileDesc a_block_desc_k0_m0_m1_m2_k1
static constexpr BWmmaTileDesc b_block_desc_k0_n0_n1_n2_k1
Static Public Attributes inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
static constexpr auto I0
static constexpr auto I1
static constexpr auto I2
static constexpr auto I3
static constexpr auto I5
static constexpr index_t WaveSize
static constexpr index_t MWaves
static constexpr index_t NWaves
static constexpr index_t A_KRow
static constexpr index_t B_KRow
static constexpr index_t A_K1
static constexpr index_t B_K1
static constexpr auto wmma_gemm
static constexpr index_t KRepeat
static constexpr auto WmmaK
static constexpr auto MAccVgprs
static constexpr AWmmaTileDesc a_block_desc_k0_m0_m1_m2_k1
static constexpr BWmmaTileDesc b_block_desc_k0_n0_n1_n2_k1

Protected Types

using AThreadCopy
using BThreadCopy
Protected Types inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
using AThreadCopy
using BThreadCopy

Protected Attributes

AThreadCopy a_thread_copy_ {Base::CalculateAThreadOriginDataIndex()}
BThreadCopy b_thread_copy_ {Base::CalculateBThreadOriginDataIndex()}
Protected Attributes inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
AThreadCopy a_thread_copy_
BThreadCopy b_thread_copy_

Static Protected Attributes

static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_
Static Protected Attributes inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_

Additional Inherited Members

Public Attributes inherited from ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, MRepeat *NRepeat, wmma_gemm.GetRegSizePerWmma(), true > c_thread_buf_

Member Typedef Documentation

◆ AThreadCopy

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
using ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::AThreadCopy
protected
Initial value:
decltype(a_thread_desc_),
Sequence<KPack / A_K1 / A_KRow, 1, 1, 1, 1, A_K1>,
5,
static constexpr auto a_thread_desc_
Definition blockwise_gemm_dpp.hpp:312
static constexpr index_t A_K1
Definition blockwise_gemm_dpp.hpp:52
static constexpr AWmmaTileDesc a_block_desc_k0_m0_m1_m2_k1
Definition blockwise_gemm_pipeline_wmmaops_base.hpp:336
static constexpr index_t A_KRow
Definition blockwise_gemm_pipeline_wmmaops_base.hpp:53
ADataType ComputeTypeA
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:34
Definition utility/sequence.hpp:43
Definition threadwise_tensor_slice_transfer.hpp:1260

◆ Base

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
using ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::Base
Initial value:
ADataType,
BDataType,
ComputeTypeA,
ComputeTypeB,
AccDataType,
AWmmaTileDesc,
BWmmaTileDesc,
ABlockTransferSrcScalarPerVector,
BBlockTransferSrcScalarPerVector,
MPerBlock,
NPerBlock,
KPerBlock,
MPerWmma,
NPerWmma,
MRepeat,
NRepeat,
KPack,
TransposeC>

◆ BThreadCopy

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
using ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BThreadCopy
protected
Initial value:
decltype(b_thread_desc_),
Sequence<KPack / B_K1 / B_KRow, 1, 1, 1, 1, B_K1>,
5,
static constexpr index_t B_K1
Definition blockwise_gemm_dpp.hpp:53
static constexpr auto b_thread_desc_
Definition blockwise_gemm_dpp.hpp:316
static constexpr BWmmaTileDesc b_block_desc_k0_n0_n1_n2_k1
Definition blockwise_gemm_pipeline_wmmaops_base.hpp:337
static constexpr index_t B_KRow
Definition blockwise_gemm_pipeline_wmmaops_base.hpp:54
BDataType ComputeTypeB
Definition blockwise_gemm_mx_pipeline_xdlops_base.hpp:35

Member Function Documentation

◆ BlockHasHotloop()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
bool ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BlockHasHotloop ( index_t num_loop)
inlinestatic

◆ BlockLoopTailNum()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
TailNumber ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BlockLoopTailNum ( index_t num_loop)
inlinestatic

◆ CalculateCThreadOriginDataIndex()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
__device__ auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::CalculateCThreadOriginDataIndex ( Number< m0 > ,
Number< n0 >  )
inlinestatic

◆ GetCThreadBuffer()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
__host__ __device__ constexpr auto & ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::GetCThreadBuffer ( )
inlineconstexpr

◆ Run()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CThreadBuffer, typename BScaleStruct>
__device__ void ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::Run ( const AGridDesc & a_grid_desc,
const ABlockDesc & a_block_desc,
ABlockTransfer & a_blockwise_copy,
const AGridBuffer & a_grid_buf,
ABlockBuffer & a_block_buf,
const ABlockTransferStep & a_block_copy_step,
const BGridDesc & b_grid_desc,
const BBlockDesc & b_block_desc,
BBlockTransfer & b_blockwise_copy,
const BGridBuffer & b_grid_buf,
BBlockBuffer & b_block_buf,
const BBlockTransferStep & b_block_copy_step,
CThreadBuffer & c_thread_buf,
BScaleStruct & b_scale_struct,
index_t num_loop,
index_t num_loop_per_scale ) const
inline

Member Data Documentation

◆ a_block_desc_k0_m0_m1_m2_k1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
AWmmaTileDesc ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::a_block_desc_k0_m0_m1_m2_k1
staticconstexpr

◆ A_K1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::A_K1
staticconstexpr

◆ A_KRow

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::A_KRow
staticconstexpr

◆ a_thread_copy_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
AThreadCopy ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::a_thread_copy_ {Base::CalculateAThreadOriginDataIndex()}
protected

◆ a_thread_desc_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
auto ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::a_thread_desc_
staticconstexprprotected
Initial value:
=
I1,
I1,
I0,
I0,
I1))
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
Definition blockwise_gemm_pipeline_wmmaops_v1.hpp:37

◆ b_block_desc_k0_n0_n1_n2_k1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
BWmmaTileDesc ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::b_block_desc_k0_n0_n1_n2_k1
staticconstexpr

◆ B_K1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::B_K1
staticconstexpr

◆ B_KRow

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::B_KRow
staticconstexpr

◆ b_thread_copy_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
BThreadCopy ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::b_thread_copy_ {Base::CalculateBThreadOriginDataIndex()}
protected

◆ b_thread_desc_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
auto ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::b_thread_desc_
staticconstexprprotected

◆ c_thread_desc_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::c_thread_desc_
staticconstexprprotected

◆ GlobalBufferNum

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::GlobalBufferNum = 1
staticconstexpr

◆ I0

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::I0
staticconstexpr

◆ I1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::I1
staticconstexpr

◆ KRepeat

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::KRepeat
staticconstexpr

◆ KRepeatPerCluster

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::KRepeatPerCluster = math::max(KRepeat / NumKClusters, 1)
staticconstexpr

◆ NumKClusters

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::NumKClusters = CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS
staticconstexpr

◆ PrefetchStages

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::PrefetchStages = 1
staticconstexpr

◆ PrefillStages

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
index_t ck::BlockwiseGemmWmmaops_pipeline_v1< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::PrefillStages = 1
staticconstexpr

◆ wmma_gemm

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::wmma_gemm
staticconstexpr

◆ WmmaK

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC>
auto ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::WmmaK
staticconstexpr

The documentation for this struct was generated from the following file: