Flatmm_32x512x128_1x4x1_16x16x32_BF16 Struct Reference#
ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_BF16 Struct Reference
#include <flatmm_32x512x128_1x4x1_16x16x32.hpp>
Inheritance diagram for ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_BF16:
Public Types | |
| using | ADataType = bf16_t |
| using | BDataType = bf16_t |
Public Member Functions | |
| template<typename ARes, typename ACoords, typename BRes, typename BCoords, bool Is2B = false> | |
| CK_TILE_DEVICE auto | operator() (const ARes &res_a, const ACoords &cached_coords_a, const BRes &res_b, const BCoords &cached_coords_b, CK_TILE_LDS_ADDR void *smem, index_t k, index_t tile_offset_a, index_t tile_offset_b, bool_constant< Is2B >={}) |
Additional Inherited Members | |
| Static Public Member Functions inherited from ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base | |
| static CK_TILE_DEVICE constexpr auto | MakeCBlockDist () |
| static CK_TILE_DEVICE constexpr auto | MakeCBlockTile () |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeLdsStoreDesc_A () |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeLdsLoadDesc_A () |
| static constexpr auto | GetGemm_AWarpEnc () |
| static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t | GetSmemSize () |
| Static Public Attributes inherited from ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base | |
| static constexpr index_t | Block_M = 32 |
| static constexpr index_t | Block_N = 512 |
| static constexpr index_t | Block_K = 128 |
| static constexpr index_t | WarpPerBlock_M = 1 |
| static constexpr index_t | WarpPerBlock_N = 4 |
| static constexpr index_t | WarpPerBlock_K = 1 |
| static constexpr index_t | NumWarps = 4 |
| static constexpr index_t | Warp_M = 16 |
| static constexpr index_t | Warp_N = 16 |
| static constexpr index_t | Warp_K = 32 |
| static constexpr index_t | BlockSize = 256 |
| static constexpr index_t | SubKPacks = 2 |
| static constexpr index_t | Block_W = Warp_N * Warp_K |
| static constexpr index_t | Block_Nr = Block_N / Warp_N |
| static constexpr index_t | Block_Kr = Block_K / Warp_K |
| static constexpr index_t | Repeat_M = Block_M / (Warp_M * WarpPerBlock_M) |
| static constexpr index_t | Repeat_N = Block_N / (Warp_N * WarpPerBlock_N) |
| static constexpr index_t | Repeat_K = Block_K / (Warp_K * WarpPerBlock_K) |
Member Typedef Documentation
◆ ADataType
◆ BDataType
Member Function Documentation
◆ operator()()
template<typename ARes, typename ACoords, typename BRes, typename BCoords, bool Is2B = false>
|
inline |
The documentation for this struct was generated from the following file: