DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched > Struct Template Reference
#include <device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched >:
Classes | |
| struct | Argument |
| struct | Invoker |
Public Types | |
| template<index_t NXdlPerWave_> | |
| using | GridwiseGemmBase |
| using | GridwiseGemm64 = GridwiseGemmBase<math::max(NXdlPerWave64, 1)> |
| using | GridwiseGemm32 = GridwiseGemmBase<NXdlPerWave32> |
| using | DefaultBlock2CTileMap = typename GridwiseGemm64::DefaultBlock2CTileMap |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op, ck::index_t KBatch=1) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
| Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetInstanceString () const |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static constexpr bool | IsValidCompilationParameter () |
| static bool | IsSupportedArgument (const Argument &karg) |
| static auto | MakeArgument (const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op, index_t KBatch) |
| static auto | MakeInvoker () |
Static Public Attributes | |
| static GET_NXDL_PER_WAVE_IMPL constexpr auto | NXdlPerWave64 = GetNXdlPerWave<true>() |
| static constexpr auto | NXdlPerWave32 = GetNXdlPerWave<false>() |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
Member Typedef Documentation
◆ DefaultBlock2CTileMap
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
| using ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched >::DefaultBlock2CTileMap = typename GridwiseGemm64::DefaultBlock2CTileMap |
◆ GridwiseGemm32
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
| using ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched >::GridwiseGemm32 = GridwiseGemmBase<NXdlPerWave32> |
◆ GridwiseGemm64
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
| using ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched >::GridwiseGemm64 = GridwiseGemmBase<math::max(NXdlPerWave64, 1)> |
◆ GridwiseGemmBase
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
template<index_t NXdlPerWave_>
| using ck::tensor_operation::device::DeviceGemmXdlSplitKCShuffle_LdsDirectLoad< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferScalarPerVector, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferScalarPerVector, BBlockLdsAddExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXDL, ComputeType, PipelineVer, LoopSched >::GridwiseGemmBase |
Member Function Documentation
◆ GetTypeString()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument() [1/2]
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlinestatic |
◆ IsSupportedArgument() [2/2]
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsValidCompilationParameter()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlinestaticconstexpr |
◆ MakeArgument()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlinestatic |
◆ MakeArgumentPointer()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlineoverridevirtual |
◆ MakeInvoker()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlinestatic |
◆ MakeInvokerPointer()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
inlineoverridevirtual |
Member Data Documentation
◆ I0
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
staticconstexpr |
◆ I1
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
staticconstexpr |
◆ I2
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
staticconstexpr |
◆ I3
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
staticconstexpr |
◆ NXdlPerWave32
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
staticconstexpr |
◆ NXdlPerWave64
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferScalarPerVector, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferScalarPerVector, bool BBlockLdsAddExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CBlockTransferScalarPerVector_NWaveNPerXDL, typename ComputeType = CDataType, PipelineVersion PipelineVer = PipelineVersion::v4, LoopScheduler LoopSched = make_default_loop_scheduler()>
|
staticconstexpr |
The documentation for this struct was generated from the following file: