GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference
The Grouped Convolution Backward Weight kernel template. More...
#include <grouped_convolution_backward_weight_kernel.hpp>
Classes | |
| struct | SplitKBatchOffset |
Public Types | |
| using | TilePartitioner = remove_cvref_t<TilePartitioner_> |
| using | GemmPipeline = remove_cvref_t<GemmPipeline_> |
| using | EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
| using | GemmALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
| using | GemmBLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
| using | GemmCLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
| using | InLayout = remove_cvref_t<typename GroupedConvTraitsType_::InLayout> |
| using | WeiLayout = remove_cvref_t<typename GroupedConvTraitsType_::WeiLayout> |
| using | OutLayout = remove_cvref_t<typename GroupedConvTraitsType_::OutLayout> |
| using | DsLayout = remove_cvref_t<typename GroupedConvTraitsType_::DsLayout> |
| using | GemmDsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout> |
| using | OutDataType = remove_cvref_t<typename GemmPipeline::ADataType> |
| using | InDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
| using | DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType> |
| using | WeiDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
| using | GroupedConvBwdWeightKernelArgsSpecialized |
Public Member Functions | |
| CK_TILE_DEVICE void | operator() (GroupedConvBwdWeightKernelArgsSpecialized kargs) const |
Static Public Member Functions | |
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST constexpr auto | GridSize (const GroupedConvBwdWeightKernelArgsSpecialized &kargs) |
| static CK_TILE_HOST constexpr auto | BlockSize () |
| static CK_TILE_HOST constexpr GroupedConvBwdWeightKernelArgsSpecialized | MakeKernelArgs (const GroupedConvBwdWeightHostArgs &hostArgs) |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSize () |
| static CK_TILE_HOST auto | Preprocess (const GroupedConvBwdWeightKernelArgsSpecialized &kargs, const stream_config &s) |
| static CK_TILE_HOST bool | IsSupportedArgument (const GroupedConvBwdWeightKernelArgsSpecialized &kargs) |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set> | |
| static CK_TILE_DEVICE auto | MakeGemmTensorViews (const OutDataType *a_ptr, const InDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, WeiDataType *c_ptr, const GroupedConvBwdWeightKernelArgsSpecialized &kargs) |
| template<typename TensorView> | |
| static CK_TILE_DEVICE auto | MakeGemmPadViews (const TensorView &views, const index_t k_batch) |
| template<typename PadView> | |
| static CK_TILE_DEVICE auto | MakeGemmTileWindows (const PadView &views, const index_t i_m, const index_t i_n, const index_t i_k) |
| Create views to the data that each workgroup will process. | |
| static CK_TILE_DEVICE void | RunGemm (const OutDataType *a_ptr, const InDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, WeiDataType *c_ptr, void *smem_ptr_0, const GroupedConvBwdWeightKernelArgsSpecialized &kargs, const index_t num_loop, const index_t block_idx_m, const index_t block_idx_n, const index_t block_idx_k) |
| Runs single GEMM problem cooperatively by whole workgroup. | |
| static CK_TILE_DEVICE void | RunGemm2LDS (const OutDataType *a_ptr, const InDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, WeiDataType *c_ptr, void *__restrict__ smem_ptr_0, void *__restrict__ smem_ptr_1, const GroupedConvBwdWeightKernelArgsSpecialized &kargs, const index_t num_loop, const index_t block_idx_m, const index_t block_idx_n, const index_t block_idx_k) |
| Runs single GEMM problem cooperatively by whole workgroup. | |
Static Public Attributes | |
| static constexpr index_t | NDimSpatial = GroupedConvTraitsType_::NDimSpatial_ |
| static constexpr ConvolutionSpecialization | ConvSpecialization |
| static constexpr index_t | NumDTensor = GroupedConvTraitsType_::NumDTensor |
| static constexpr index_t | kBlockSize = GemmPipeline::BlockSize |
| static constexpr bool | IsSplitKSupported = true |
| static constexpr auto | I0 = number<0>() |
| static constexpr auto | I1 = number<1>() |
| static constexpr auto | I2 = number<2>() |
| static constexpr auto | I3 = number<3>() |
Detailed Description
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
struct ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >
struct ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >
The Grouped Convolution Backward Weight kernel template.
Overview
This class provides the grouped convolution backward weight kernel template. By semantic division of Implicit GEMM algorithm into following parts we achieve flexible, versatile and robust kernel implementation.
- Prolog - The start of GEMM kernel implementation in operator() function call operator" which determines the work scope of each workgroup. @li @b GemmPipeline - The core part @a "heart" of matrix multiplication algorithm. This is the place where each workgroup is loading data from global memory and carrying out dot products. @li @b Epilogue - The @a "final" part of matrix multiplication implementation responsible for storing results to global memory. This is also the place where any additional operator fusion may take place. Additionally both @ref GemmPipeline_ "GemmPipeline" and @ref EpiloguePipeline_ "EpiloguePipeline" are parameterized with so called @a Policy which determines all internal details of those functional parts. You can think of it like both gemm and epilogue pipelines provides the control-flow logic controlled by policies. Moreover the policy is responsible for definition of all necessary data layouts and thread's work distribution. @tparam GroupedConvTraitsType_ The type of class providing traits for grouped convolution. @tparam TilePartitioner_ The type of class providing mapping of workgroup index into the output data tile to be calculated. It determines the workgroup to data relationship (or in other words - which data would be processed and calculated by which workgroup). @tparam GemmPipeline_ The type of class which provides the core part of matrix multiplication. This class should provide implementation of data loading from global memory and performing block-wise matrix multiplication. You can think of it as a work done by single workgroup point of view. @tparam EpiloguePipeline_ The type of class providing the final part of matrix multiplication implementation. It is responsible for storing results calculated by @ref GemmPipeline_ "GemmPipeline" to the output C tensor in global memory.
Member Typedef Documentation
◆ DsDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType> |
◆ DsLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DsLayout = remove_cvref_t<typename GroupedConvTraitsType_::DsLayout> |
◆ EpiloguePipeline
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
◆ GemmALayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
◆ GemmBLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmBLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
◆ GemmCLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmCLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
◆ GemmDsLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmDsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout> |
◆ GemmPipeline
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmPipeline = remove_cvref_t<GemmPipeline_> |
◆ GroupedConvBwdWeightKernelArgsSpecialized
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GroupedConvBwdWeightKernelArgsSpecialized |
Initial value:
The Grouped Convolution kernel device arguments.
Definition grouped_convolution_backward_weight_kernel.hpp:22
◆ InDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::InDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
◆ InLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::InLayout = remove_cvref_t<typename GroupedConvTraitsType_::InLayout> |
◆ OutDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::OutDataType = remove_cvref_t<typename GemmPipeline::ADataType> |
◆ OutLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::OutLayout = remove_cvref_t<typename GroupedConvTraitsType_::OutLayout> |
◆ TilePartitioner
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_> |
◆ WeiDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::WeiDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
◆ WeiLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionBackwardWeightKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::WeiLayout = remove_cvref_t<typename GroupedConvTraitsType_::WeiLayout> |
Member Function Documentation
◆ BlockSize()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticconstexpr |
◆ GetName()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticnodiscard |
◆ GetSmemSize()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticconstexpr |
◆ GridSize()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticconstexpr |
◆ IsSupportedArgument()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
◆ MakeGemmPadViews()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<typename TensorView>
|
inlinestatic |
◆ MakeGemmTensorViews()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<memory_operation_enum DstInMemOp = memory_operation_enum::set>
|
inlinestatic |
◆ MakeGemmTileWindows()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<typename PadView>
|
inlinestatic |
Create views to the data that each workgroup will process.
- Parameters
-
views padded views of A, B, D and C tensors i_m block m-index i_n block n-index i_k block k-index
- Returns
- tuple of tile windows for A, B, D and C tensors
◆ MakeKernelArgs()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticconstexpr |
◆ operator()()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inline |
◆ Preprocess()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
◆ RunGemm()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Runs single GEMM problem cooperatively by whole workgroup.
- Parameters
-
a_ptr input A pointer b_ptr input B pointer c_ptr output C pointer smem_ptr_0 The start memory pointer of the shared memory block. kargs Grouped Convolution Backward Weight kernel arguments block_idx_m The GEMM's output M dimension tile index processed by this workgroup. block_idx_n The GEMM's output N dimension tile index processed by this workgroup.
◆ RunGemm2LDS()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Runs single GEMM problem cooperatively by whole workgroup.
- Note
- RunGEMM2LDS in with two shared memory buffers using the ping pong buffer mechanism.
- Parameters
-
a_ptr input A pointer b_ptr input B pointer c_ptr output C pointer smem_ptr_0 The starting pointer of 1st shared memory block. smem_ptr_1 The starting pointer of 2nd shared memory block. kargs Grouped Convolution Backward Weight kernel arguments block_idx_m The GEMM's output M dimension tile index processed by this workgroup. block_idx_n The GEMM's output N dimension tile index processed by this workgroup.
Member Data Documentation
◆ ConvSpecialization
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
Initial value:
=
GroupedConvTraitsType_::ConvSpecialization
◆ I0
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I1
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I2
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I3
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ IsSplitKSupported
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ kBlockSize
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ NDimSpatial
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ NumDTensor
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
The documentation for this struct was generated from the following file: