BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ > Struct Template Reference

BlockToCTileMap_GemmStreamK&lt; MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ &gt; Struct Template Reference#

Composable Kernel: ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ > Struct Template Reference
ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ > Struct Template Reference

#include <block_to_ctile_map.hpp>

Public Member Functions

 BlockToCTileMap_GemmStreamK (uint32_t m, uint32_t n, uint32_t k, uint32_t num_cu, uint32_t occupancy, uint32_t sk_blocks=0xffffffff)
__host__ __device__ uint32_t get_sk_total_iters () const
__host__ __device__ uint32_t get_sk_tiles () const
__host__ __device__ dim3 get_grid_dims () const
__device__ uint32_t get_block_idx () const
__device__ void get_block_itr (uint32_t block_idx, uint32_t &iter_start, uint32_t &iter_end) const
__device__ uint32_t get_current_iter_length (uint32_t iter_start, uint32_t iter_end, uint32_t total_iter_length) const
__device__ uint32_t get_tile_idx (uint32_t iter) const
__device__ void get_tile_idx_with_offset (uint32_t iter, uint32_t &tile_idx, uint32_t &iter_offset) const
__device__ auto tile_to_spatial (uint32_t tile_idx, uint32_t m, uint32_t n) const
__host__ __device__ uint32_t get_workspace_size_for_acc (uint32_t acc_element_bytes) const
__host__ __device__ uint32_t get_workspace_size_for_semaphore () const
__host__ __device__ uint32_t get_workspace_size (uint32_t acc_element_bytes) const
__host__ __device__ uint32_t get_tile_intersections (uint32_t tiles_, const MDiv &eqav_tiles_) const
__host__ __device__ uint32_t get_tiles_cover_sk_block (uint32_t num_sk_blocks_, uint32_t iters_per_sk_block_) const
__host__ __device__ uint32_t get_total_acc_buffers () const
__device__ uint32_t get_acc_buffer_offset_from_tile (uint32_t tile_idx_) const
__device__ uint32_t get_acc_buffer_offset_from_block (uint32_t block_idx_) const

Public Attributes

uint32_t sk_num_blocks
uint32_t sk_num_big_blocks
uint32_t dp_start_block_idx
uint32_t reduction_start_block_idx
uint32_t k_iters_per_big_block
MDiv2 n_tiles
MDiv k_iters_per_tile
MDiv eqav_tiles_big
MDiv eqav_tiles_little

Static Public Attributes

static constexpr uint32_t min_k_iters_per_sk_block = 2
static constexpr uint32_t MPerBlock = MPerBlock_
static constexpr uint32_t NPerBlock = NPerBlock_
static constexpr uint32_t KPerBlock = KPerBlock_
static constexpr StreamKReductionStrategy ReductionStrategy = ReductionStrategy_
static constexpr uint32_t tile_swizzle_sub_m = TileSwizzleSubM_

Constructor & Destructor Documentation

◆ BlockToCTileMap_GemmStreamK()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::BlockToCTileMap_GemmStreamK ( uint32_t m,
uint32_t n,
uint32_t k,
uint32_t num_cu,
uint32_t occupancy,
uint32_t sk_blocks = 0xffffffff )
inline

Member Function Documentation

◆ get_acc_buffer_offset_from_block()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_acc_buffer_offset_from_block ( uint32_t block_idx_) const
inline

◆ get_acc_buffer_offset_from_tile()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_acc_buffer_offset_from_tile ( uint32_t tile_idx_) const
inline

◆ get_block_idx()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_block_idx ( ) const
inline

◆ get_block_itr()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ void ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_block_itr ( uint32_t block_idx,
uint32_t & iter_start,
uint32_t & iter_end ) const
inline

◆ get_current_iter_length()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_current_iter_length ( uint32_t iter_start,
uint32_t iter_end,
uint32_t total_iter_length ) const
inline

◆ get_grid_dims()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ dim3 ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_grid_dims ( ) const
inline

◆ get_sk_tiles()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_sk_tiles ( ) const
inline

◆ get_sk_total_iters()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_sk_total_iters ( ) const
inline

◆ get_tile_idx()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_tile_idx ( uint32_t iter) const
inline

◆ get_tile_idx_with_offset()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ void ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_tile_idx_with_offset ( uint32_t iter,
uint32_t & tile_idx,
uint32_t & iter_offset ) const
inline

◆ get_tile_intersections()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_tile_intersections ( uint32_t tiles_,
const MDiv & eqav_tiles_ ) const
inline

◆ get_tiles_cover_sk_block()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_tiles_cover_sk_block ( uint32_t num_sk_blocks_,
uint32_t iters_per_sk_block_ ) const
inline

◆ get_total_acc_buffers()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_total_acc_buffers ( ) const
inline

◆ get_workspace_size()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_workspace_size ( uint32_t acc_element_bytes) const
inline

◆ get_workspace_size_for_acc()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_workspace_size_for_acc ( uint32_t acc_element_bytes) const
inline

◆ get_workspace_size_for_semaphore()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__host__ __device__ uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::get_workspace_size_for_semaphore ( ) const
inline

◆ tile_to_spatial()

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
__device__ auto ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::tile_to_spatial ( uint32_t tile_idx,
uint32_t m,
uint32_t n ) const
inline

Member Data Documentation

◆ dp_start_block_idx

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::dp_start_block_idx

◆ eqav_tiles_big

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
MDiv ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::eqav_tiles_big

◆ eqav_tiles_little

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
MDiv ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::eqav_tiles_little

◆ k_iters_per_big_block

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::k_iters_per_big_block

◆ k_iters_per_tile

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
MDiv ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::k_iters_per_tile

◆ KPerBlock

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::KPerBlock = KPerBlock_
staticconstexpr

◆ min_k_iters_per_sk_block

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::min_k_iters_per_sk_block = 2
staticconstexpr

◆ MPerBlock

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::MPerBlock = MPerBlock_
staticconstexpr

◆ n_tiles

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
MDiv2 ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::n_tiles

◆ NPerBlock

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::NPerBlock = NPerBlock_
staticconstexpr

◆ reduction_start_block_idx

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::reduction_start_block_idx

◆ ReductionStrategy

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
StreamKReductionStrategy ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::ReductionStrategy = ReductionStrategy_
staticconstexpr

◆ sk_num_big_blocks

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::sk_num_big_blocks

◆ sk_num_blocks

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::sk_num_blocks

◆ tile_swizzle_sub_m

template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::tile_swizzle_sub_m = TileSwizzleSubM_
staticconstexpr

The documentation for this struct was generated from the following file: