UniversalGemmBasePolicy< Derived > Struct Template Reference#
ck_tile::UniversalGemmBasePolicy< Derived > Struct Template Reference
#include <gemm_universal_pipeline_ag_bg_cr_policy.hpp>
Static Public Member Functions | |
| static constexpr auto | getATileAccessPattern () |
| static constexpr auto | getBTileAccessPattern () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeALdsBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | MakeBLdsBlockDescriptor () |
| Create LDS block descriptor for B tensor. | |
| template<typename Problem, typename DataType, index_t MNPerBlock, index_t XPerTile, bool IsWave32Host> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetGlobalVectorLoadSize () |
| Get the maximum global memory vector load size. | |
| template<typename Problem, bool IsWave32Host = false> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetVectorSizeA () |
| template<typename Problem, bool IsWave32Host = false> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetVectorSizeB () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetVectorSizeC () |
| Get the vector store size for C tensor. | |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | IsTransposeC () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeADramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeBDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledARegTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledBRegTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemPackA () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemPackB () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr index_t | GetSmemSizeA () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr index_t | GetSmemSizeB () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr index_t | GetSmemSize () |
Static Public Attributes | |
| template<typename Problem> | |
| static constexpr bool | is_a_load_tr = false |
| template<typename Problem> | |
| static constexpr bool | is_b_load_tr = false |
| static constexpr auto | I0 = number<0>{} |
| static constexpr auto | I1 = number<1>{} |
| static constexpr auto | I2 = number<2>{} |
| static constexpr auto | DefaultATileAccessPattern = tile_distribution_pattern::thread_raked |
| static constexpr auto | DefaultBTileAccessPattern = tile_distribution_pattern::thread_raked |
Member Function Documentation
◆ getATileAccessPattern()
template<typename Derived>
|
inlinestaticconstexpr |
◆ getBTileAccessPattern()
template<typename Derived>
|
inlinestaticconstexpr |
◆ GetGlobalVectorLoadSize()
template<typename Derived>
template<typename Problem, typename DataType, index_t MNPerBlock, index_t XPerTile, bool IsWave32Host>
|
inlinestaticconstexpr |
Get the maximum global memory vector load size.
- Template Parameters
-
Problem The UniversalGemmPipelineProblem object. DataType The tensor data type we're considering. MNPerBlock The MPerBlock or NPerBlock value depending on tensor (A/B). XPerTile The contiguous Tile dimension size.
- Returns
- Maximum DRAM vector load size.
◆ GetSmemPackA()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemPackB()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSize()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeA()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeB()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetVectorSizeA()
template<typename Derived>
template<typename Problem, bool IsWave32Host = false>
|
inlinestaticconstexpr |
◆ GetVectorSizeB()
template<typename Derived>
template<typename Problem, bool IsWave32Host = false>
|
inlinestaticconstexpr |
◆ GetVectorSizeC()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
Get the vector store size for C tensor.
- Template Parameters
-
Problem - Gemm pipeline problem class.
- Note
- The vector store size for output C tensor would depend on multiple factors like its data layout and warp gemm C transposition. In general it would be the number of consecutive elements in contiguous C dimension hold by single thread.
- Returns
- The vector store size for C tensor.
◆ IsTransposeC()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeADramTileDistribution()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeALdsBlockDescriptor()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeBDramTileDistribution()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeBLdsBlockDescriptor()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
Create LDS block descriptor for B tensor.
- Template Parameters
-
Problem Gemm pipeline problem.
- Returns
- B tensor LDS block descriptor.
◆ MakeShuffledARegTileDistribution()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledBRegTileDistribution()
template<typename Derived>
template<typename Problem>
|
inlinestaticconstexpr |
Member Data Documentation
◆ DefaultATileAccessPattern
template<typename Derived>
|
staticconstexpr |
◆ DefaultBTileAccessPattern
template<typename Derived>
|
staticconstexpr |
◆ I0
template<typename Derived>
|
staticconstexpr |
◆ I1
template<typename Derived>
|
staticconstexpr |
◆ I2
template<typename Derived>
|
staticconstexpr |
◆ is_a_load_tr
template<typename Derived>
template<typename Problem>
|
staticconstexpr |
◆ is_b_load_tr
template<typename Derived>
template<typename Problem>
|
staticconstexpr |
The documentation for this struct was generated from the following file: