EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize > Struct Template Reference

EpilogueWelfordCShuffle&lt; DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize &gt; Struct Template Reference#

Composable Kernel: ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize > Struct Template Reference
ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize > Struct Template Reference

#include <epilogue_cshuffle_v3_welford_wmma.hpp>

Inheritance diagram for ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >:
ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >

Public Types

using Base
using GemmMeanVarGridDesc_M_N
using GemmCountGridDesc_M_N
Public Types inherited from ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >
using SpaceFillingCurveVgpr
using SpaceFillingCurveVmem

Public Member Functions

__device__ EpilogueWelfordCShuffle (EDataType *p_welford_mean_grid_, EDataType *p_welford_var_grid_, int32_t *p_welford_count_grid_, index_t MRaw_, index_t NRaw_)
template<InMemoryDataOperationEnum EGlobalMemoryDataOperation, typename CThreadBuf, typename DsGridPointer, typename DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock, typename EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock>
__device__ void Run (CThreadBuf &c_thread_buf, DsGridPointer p_ds_grid, EDataType *p_e_grid, void *p_shared, const DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock &ds_grid_desc_mblock_mperblock_nblock_nperblock, const EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock &e_grid_desc_mblock_mperblock_nblock_nperblock, CDEElementwiseOperation &cde_element_op, const index_t &block_m_id, const index_t &block_n_id)

Static Public Member Functions

template<typename DoPads, index_t MPerTile, index_t NPerTile>
__host__ static __device__ auto MakeMeanVarDescriptor_M_N (index_t M, index_t N)
template<typename DoPads, index_t MPerTile, index_t NPerTile>
__host__ static __device__ auto MakeCountDescriptor_M_N (index_t M, index_t N)
template<typename GridDescriptor_M_N>
__host__ static __device__ constexpr auto MakeMeanVarCountGridDescriptor_MBlock_MPerBlock_NBlock (const GridDescriptor_M_N &grid_desc_m_n)
static __device__ constexpr auto GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat ()
static __device__ constexpr auto GetCShuffleLDSDescriptor ()
static __device__ auto GetVgprToLDSEpilogueDescriptor ()
Static Public Member Functions inherited from ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >
static __device__ constexpr auto GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat ()
static __device__ constexpr auto GetCShuffleLDSDescriptor ()
static __device__ auto GetVgprToLDSEpilogueDescriptor ()
template<InMemoryDataOperationEnum EGlobalMemoryDataOperation, typename InterDataType, typename CDsDescRefs, typename EGridDesc>
static __device__ auto GetLDSToVmemEpilogueDescriptor (CDsDescRefs &c_ds_desc_refs, EGridDesc &e_grid_desc_mblock_mperblock_nblock_nperblock, CDEElementwiseOperation &cde_element_op, const index_t &block_m_id, const index_t &block_n_id)

Public Attributes

EDataType * p_welford_mean_grid
EDataType * p_welford_var_grid
int32_tp_welford_count_grid
index_t NRaw
GemmMeanVarGridDesc_M_N gemm_mean_var_grid_desc_m_nblock
GemmCountGridDesc_M_N gemm_count_grid_desc_m_nblock

Static Public Attributes

static constexpr auto I0
static constexpr auto I1
static constexpr auto I2
static constexpr auto I3
static constexpr index_t NumDTensor
Static Public Attributes inherited from ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto I4 = Number<4>{}
static constexpr auto I5 = Number<5>{}
static constexpr auto I6 = Number<6>{}
static constexpr index_t NumDTensor = DsDataType::Size()
static constexpr auto EShuffleBlockTransferScalarPerVector

Member Typedef Documentation

◆ Base

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
using ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::Base
Initial value:
DsDataType,
EDataType,
AccDataType,
CShuffleDataType,
MPerBlock,
NPerBlock,
MPerWmma,
NPerWmma,
MRepeat,
NRepeat,
CShuffleMRepeatPerShuffle,
CShuffleNRepeatPerShuffle,
CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock,
CDEShuffleBlockTransferScalarPerVectors,
CDEElementwiseOperation,
BlockwiseGemmPipe>
Definition epilogue_cshuffle_v3_wmma_base.hpp:29
Definition thread_group.hpp:12

◆ GemmCountGridDesc_M_N

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
using ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::GemmCountGridDesc_M_N
Initial value:
decltype(MakeCountDescriptor_M_N<Sequence<true, false>, MPerBlock, 1>(1, 1))
__host__ static __device__ auto MakeCountDescriptor_M_N(index_t M, index_t N)
Definition epilogue_cshuffle_v3_welford_wmma.hpp:88

◆ GemmMeanVarGridDesc_M_N

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
using ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::GemmMeanVarGridDesc_M_N
Initial value:
decltype(MakeMeanVarDescriptor_M_N<Sequence<true, false>, MPerBlock, 1>(1, 1))
__host__ static __device__ auto MakeMeanVarDescriptor_M_N(index_t M, index_t N)
Definition epilogue_cshuffle_v3_welford_wmma.hpp:79

Constructor & Destructor Documentation

◆ EpilogueWelfordCShuffle()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
__device__ ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::EpilogueWelfordCShuffle ( EDataType * p_welford_mean_grid_,
EDataType * p_welford_var_grid_,
int32_t * p_welford_count_grid_,
index_t MRaw_,
index_t NRaw_ )
inline

Member Function Documentation

◆ GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
__device__ constexpr auto ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat ( )
inlinestaticconstexpr

◆ GetCShuffleLDSDescriptor()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
__device__ constexpr auto ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::GetCShuffleLDSDescriptor ( )
inlinestaticconstexpr

◆ GetVgprToLDSEpilogueDescriptor()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
__device__ auto ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::GetVgprToLDSEpilogueDescriptor ( )
inlinestatic

◆ MakeCountDescriptor_M_N()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
template<typename DoPads, index_t MPerTile, index_t NPerTile>
__host__ static __device__ auto ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::MakeCountDescriptor_M_N ( index_t M,
index_t N )
inlinestatic

◆ MakeMeanVarCountGridDescriptor_MBlock_MPerBlock_NBlock()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
template<typename GridDescriptor_M_N>
__host__ static __device__ constexpr auto ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::MakeMeanVarCountGridDescriptor_MBlock_MPerBlock_NBlock ( const GridDescriptor_M_N & grid_desc_m_n)
inlinestaticconstexpr

◆ MakeMeanVarDescriptor_M_N()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
template<typename DoPads, index_t MPerTile, index_t NPerTile>
__host__ static __device__ auto ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::MakeMeanVarDescriptor_M_N ( index_t M,
index_t N )
inlinestatic

◆ Run()

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
template<InMemoryDataOperationEnum EGlobalMemoryDataOperation, typename CThreadBuf, typename DsGridPointer, typename DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock, typename EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock>
__device__ void ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::Run ( CThreadBuf & c_thread_buf,
DsGridPointer p_ds_grid,
EDataType * p_e_grid,
void * p_shared,
const DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock & ds_grid_desc_mblock_mperblock_nblock_nperblock,
const EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock & e_grid_desc_mblock_mperblock_nblock_nperblock,
CDEElementwiseOperation & cde_element_op,
const index_t & block_m_id,
const index_t & block_n_id )
inline

Member Data Documentation

◆ gemm_count_grid_desc_m_nblock

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
GemmCountGridDesc_M_N ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::gemm_count_grid_desc_m_nblock

◆ gemm_mean_var_grid_desc_m_nblock

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
GemmMeanVarGridDesc_M_N ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::gemm_mean_var_grid_desc_m_nblock

◆ I0

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
auto ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::I0
staticconstexpr

◆ I1

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
auto ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::I1
staticconstexpr

◆ I2

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
auto ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::I2
staticconstexpr

◆ I3

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
auto ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::I3
staticconstexpr

◆ NRaw

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
index_t ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::NRaw

◆ NumDTensor

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
index_t ck::EpilogueCShuffleBase< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe >::NumDTensor
staticconstexpr

◆ p_welford_count_grid

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
int32_t* ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::p_welford_count_grid

◆ p_welford_mean_grid

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
EDataType* ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::p_welford_mean_grid

◆ p_welford_var_grid

template<typename DsDataType, typename EDataType, typename AccDataType, typename CShuffleDataType, index_t MPerBlock, index_t NPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, typename CDEElementwiseOperation, typename ThisThreadBlock, typename BlockwiseGemmPipe, index_t BlockSize>
EDataType* ck::EpilogueWelfordCShuffle< DsDataType, EDataType, AccDataType, CShuffleDataType, MPerBlock, NPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, CDEElementwiseOperation, ThisThreadBlock, BlockwiseGemmPipe, BlockSize >::p_welford_var_grid

The documentation for this struct was generated from the following file: