BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference

BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1&lt; BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference
ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference

#include <blockwise_gemm_xdlops_skip_b_lds.hpp>

Public Member Functions

__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1 ()
__device__ void MoveABlockSliceWindow ()
__device__ void ResetABlockStartWindow ()
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_thread_buf, CThreadBuffer &c_thread_buf) const

Static Public Member Functions

static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex ()
static __device__ auto CalculateBThreadOriginDataIndex ()
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n)
__host__ static __device__ constexpr auto MakeABlockDescriptor_M0_M1_M2_K ()

Public Attributes

StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > c_thread_buf_

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr index_t KPerBlock = K0PerBlock * KPack
static constexpr index_t A_K0 = AK0MK1BlockDesc{}.GetLength(I0)
static constexpr index_t A_K1 = AK0MK1BlockDesc{}.GetLength(I2)
static constexpr auto xdlops_gemm = XdlopsGemm<FloatAB, MPerXDL, NPerXDL, KPack>{}
static constexpr index_t KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops
static constexpr index_t K0PerThread = K0PerBlock / xdlops_gemm.K0PerXdlops
static constexpr index_t MWaves = MPerBlock / (MRepeat * MPerXDL)
static constexpr index_t NWaves = NPerBlock / (NRepeat * NPerXDL)
static constexpr index_t WaveSize = BlockSize / MWaves / NWaves
static constexpr auto a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K()

Constructor & Destructor Documentation

◆ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ __device__ ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1 ( )
inline

Member Function Documentation

◆ CalculateAThreadOriginDataIndex()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::CalculateAThreadOriginDataIndex ( )
inlinestatic

◆ CalculateBThreadOriginDataIndex()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::CalculateBThreadOriginDataIndex ( )
inlinestatic

◆ CalculateCThreadOriginDataIndex()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::CalculateCThreadOriginDataIndex ( Number< m0 > ,
Number< n0 > ,
Number< xdlops_i > ,
Number< blk_i >  )
inlinestatic

◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadBuffer()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ __device__ constexpr auto & ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadBuffer ( )
inlineconstexpr

◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetWaveIdx()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GetWaveIdx ( )
inlinestatic

◆ MakeABlockDescriptor_M0_M1_M2_K()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeABlockDescriptor_M0_M1_M2_K ( )
inlinestaticconstexpr

◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_G_M_N & c_grid_desc_g_m_n)
inlinestaticconstexpr

◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ MoveABlockSliceWindow()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MoveABlockSliceWindow ( )
inline

◆ ResetABlockStartWindow()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ResetABlockStartWindow ( )
inline

◆ Run()

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Run ( const ABlockBuffer & a_block_buf,
const BBlockBuffer & b_thread_buf,
CThreadBuffer & c_thread_buf ) const
inline

Member Data Documentation

◆ a_block_desc_m0_m1_m2_k

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_block_desc_m0_m1_m2_k = MakeABlockDescriptor_M0_M1_M2_K()
staticconstexpr

◆ A_K0

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::A_K0 = AK0MK1BlockDesc{}.GetLength(I0)
staticconstexpr

◆ A_K1

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::A_K1 = AK0MK1BlockDesc{}.GetLength(I2)
staticconstexpr

◆ c_thread_buf_

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
StaticBufferTupleOfVector<AddressSpaceEnum::Vgpr, FloatAcc, MRepeat * NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true> ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::c_thread_buf_

◆ I0

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::I3 = Number<3>{}
staticconstexpr

◆ K0PerThread

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::K0PerThread = K0PerBlock / xdlops_gemm.K0PerXdlops
staticconstexpr

◆ KPerBlock

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KPerBlock = K0PerBlock * KPack
staticconstexpr

◆ KPerThread

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops
staticconstexpr

◆ MWaves

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::MWaves = MPerBlock / (MRepeat * MPerXDL)
staticconstexpr

◆ NWaves

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::NWaves = NPerBlock / (NRepeat * NPerXDL)
staticconstexpr

◆ WaveSize

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::WaveSize = BlockSize / MWaves / NWaves
staticconstexpr

◆ xdlops_gemm

template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0K0BN0N1N2N3K1BlockDesc, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1r1< BlockSize, FloatAB, FloatAcc, AK0MK1BlockDesc, BK0K0BN0N1N2N3K1BlockDesc, MPerBlock, NPerBlock, K0PerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::xdlops_gemm = XdlopsGemm<FloatAB, MPerXDL, NPerXDL, KPack>{}
staticconstexpr

The documentation for this struct was generated from the following file: