BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ > Struct Template Reference#
ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ > Struct Template Reference
#include <block_to_ctile_map.hpp>
Public Member Functions | |
| BlockToCTileMap_GemmStreamK (uint32_t m, uint32_t n, uint32_t k, uint32_t num_cu, uint32_t occupancy, uint32_t sk_blocks=0xffffffff) | |
| __host__ __device__ uint32_t | get_sk_total_iters () const |
| __host__ __device__ uint32_t | get_sk_tiles () const |
| __host__ __device__ dim3 | get_grid_dims () const |
| __device__ uint32_t | get_block_idx () const |
| __device__ void | get_block_itr (uint32_t block_idx, uint32_t &iter_start, uint32_t &iter_end) const |
| __device__ uint32_t | get_current_iter_length (uint32_t iter_start, uint32_t iter_end, uint32_t total_iter_length) const |
| __device__ uint32_t | get_tile_idx (uint32_t iter) const |
| __device__ void | get_tile_idx_with_offset (uint32_t iter, uint32_t &tile_idx, uint32_t &iter_offset) const |
| __device__ auto | tile_to_spatial (uint32_t tile_idx, uint32_t m, uint32_t n) const |
| __host__ __device__ uint32_t | get_workspace_size_for_acc (uint32_t acc_element_bytes) const |
| __host__ __device__ uint32_t | get_workspace_size_for_semaphore () const |
| __host__ __device__ uint32_t | get_workspace_size (uint32_t acc_element_bytes) const |
| __host__ __device__ uint32_t | get_tile_intersections (uint32_t tiles_, const MDiv &eqav_tiles_) const |
| __host__ __device__ uint32_t | get_tiles_cover_sk_block (uint32_t num_sk_blocks_, uint32_t iters_per_sk_block_) const |
| __host__ __device__ uint32_t | get_total_acc_buffers () const |
| __device__ uint32_t | get_acc_buffer_offset_from_tile (uint32_t tile_idx_) const |
| __device__ uint32_t | get_acc_buffer_offset_from_block (uint32_t block_idx_) const |
Static Public Attributes | |
| static constexpr uint32_t | min_k_iters_per_sk_block = 2 |
| static constexpr uint32_t | MPerBlock = MPerBlock_ |
| static constexpr uint32_t | NPerBlock = NPerBlock_ |
| static constexpr uint32_t | KPerBlock = KPerBlock_ |
| static constexpr StreamKReductionStrategy | ReductionStrategy = ReductionStrategy_ |
| static constexpr uint32_t | tile_swizzle_sub_m = TileSwizzleSubM_ |
Constructor & Destructor Documentation
◆ BlockToCTileMap_GemmStreamK()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
Member Function Documentation
◆ get_acc_buffer_offset_from_block()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_acc_buffer_offset_from_tile()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_block_idx()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_block_itr()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_current_iter_length()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_grid_dims()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_sk_tiles()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_sk_total_iters()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_tile_idx()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_tile_idx_with_offset()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_tile_intersections()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_tiles_cover_sk_block()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_total_acc_buffers()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_workspace_size()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_workspace_size_for_acc()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ get_workspace_size_for_semaphore()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
◆ tile_to_spatial()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
inline |
Member Data Documentation
◆ dp_start_block_idx
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::dp_start_block_idx |
◆ eqav_tiles_big
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| MDiv ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::eqav_tiles_big |
◆ eqav_tiles_little
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| MDiv ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::eqav_tiles_little |
◆ k_iters_per_big_block
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::k_iters_per_big_block |
◆ k_iters_per_tile
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| MDiv ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::k_iters_per_tile |
◆ KPerBlock
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
staticconstexpr |
◆ min_k_iters_per_sk_block
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
staticconstexpr |
◆ MPerBlock
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
staticconstexpr |
◆ n_tiles
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| MDiv2 ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::n_tiles |
◆ NPerBlock
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
staticconstexpr |
◆ reduction_start_block_idx
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::reduction_start_block_idx |
◆ ReductionStrategy
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
staticconstexpr |
◆ sk_num_big_blocks
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::sk_num_big_blocks |
◆ sk_num_blocks
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
| uint32_t ck::BlockToCTileMap_GemmStreamK< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_ >::sk_num_blocks |
◆ tile_swizzle_sub_m
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8>
|
staticconstexpr |
The documentation for this struct was generated from the following file: