DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference
#include <device_multiple_reduce_multiblock.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >:
Classes | |
| struct | Argument |
| struct | Invoker |
Public Types | |
| using | OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple()) |
| using | InGridDesc_M_K |
| using | OutGridDesc_M_Tuple = decltype(GenerateOutGrid1dDescTuple()) |
| using | OutGridDesc_M_Tuple_2 = decltype(GenerateOutGrid1dDescTuple_2()) |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const std::array< index_t, NumInputDim > inLengths, const std::array< index_t, NumInputDim > inStrides, const std::array< index_t, NumOutputDim > outLengths, const std::array< std::array< index_t, NumOutputDim >, NumReduction > outStridesArray, const std::array< int, NumReduceDim > reduceDims, const std::array< double, NumReduction > alphas, const std::array< double, NumReduction > betas, const void *in_dev, const std::array< void *, NumReduction > out_dev_buffers, const InElementwiseOperationTuple in_elementwise_op_tuple, const AccElementwiseOperationTuple acc_elementwise_op_tuple) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
| Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetInstanceString () const |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static constexpr bool | CheckDataTypeTuple () |
| static auto | GenerateOutDataTypePointerTuple () |
| static auto | MakeSrc2dDescriptor (const std::array< index_t, NumInputDim > &inLengths, const std::array< index_t, NumInputDim > &inStrides, int blkGroupSize, int numBlockTileIteration) |
| static auto | MakeDst1dDescriptor (const std::array< index_t, NumOutputDim > &outLengths, const std::array< index_t, NumOutputDim > &outStrides) |
| static auto | GenerateOutGrid1dDescTuple () |
| static auto | MakeDst1dDescriptorForBufferSet (const std::array< index_t, NumOutputDim > &outLengths, const std::array< index_t, NumOutputDim > &outStrides) |
| static auto | GenerateOutGrid1dDescTuple_2 () |
Static Public Attributes | |
| static constexpr index_t | NumInvariantDim = Rank - NumReduceDim |
| static constexpr index_t | NumInputDim = Rank |
| static constexpr index_t | NumOutputDim = (NumInvariantDim == 0) ? 1 : NumInvariantDim |
| static constexpr bool | reduceAllDim = (NumInvariantDim == 0) |
| static constexpr bool | use_multiblock |
| static constexpr index_t | M_BlockTileSize = MThreadClusterSize * MThreadSliceSize |
| static constexpr index_t | K_BlockTileSize = KThreadClusterSize * KThreadSliceSize |
| Static Public Attributes inherited from ck::tensor_operation::device::DeviceMultipleReduce< Rank, NumReduceDim, NumReduction, InElementwiseOperationTuple, AccElementwiseOperationTuple > | |
| static constexpr index_t | NumInputDim = Rank |
| static constexpr index_t | NumOutputDim = (Rank - NumReduceDim > 1) ? Rank - NumReduceDim : 1 |
Member Typedef Documentation
◆ InGridDesc_M_K
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::InGridDesc_M_K |
Initial value:
decltype(MakeSrc2dDescriptor(
std::array<index_t, NumInputDim>{}, std::array<index_t, NumInputDim>{}, 1, 1))
static auto MakeSrc2dDescriptor(const std::array< index_t, NumInputDim > &inLengths, const std::array< index_t, NumInputDim > &inStrides, int blkGroupSize, int numBlockTileIteration)
Definition device_multiple_reduce_multiblock.hpp:117
◆ OutDataTypePointerTuple
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple()) |
◆ OutGridDesc_M_Tuple
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::OutGridDesc_M_Tuple = decltype(GenerateOutGrid1dDescTuple()) |
◆ OutGridDesc_M_Tuple_2
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
| using ck::tensor_operation::device::DeviceMultipleReduceMultiBlock< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::OutGridDesc_M_Tuple_2 = decltype(GenerateOutGrid1dDescTuple_2()) |
Member Function Documentation
◆ CheckDataTypeTuple()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestaticconstexpr |
◆ GenerateOutDataTypePointerTuple()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestatic |
◆ GenerateOutGrid1dDescTuple()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestatic |
◆ GenerateOutGrid1dDescTuple_2()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestatic |
◆ GetTypeString()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ MakeArgumentPointer()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlineoverridevirtual |
◆ MakeDst1dDescriptor()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestatic |
◆ MakeDst1dDescriptorForBufferSet()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestatic |
◆ MakeInvokerPointer()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlineoverridevirtual |
◆ MakeSrc2dDescriptor()
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
inlinestatic |
Member Data Documentation
◆ K_BlockTileSize
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
◆ M_BlockTileSize
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
◆ NumInputDim
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
◆ NumInvariantDim
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
◆ NumOutputDim
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
◆ reduceAllDim
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
◆ use_multiblock
template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
|
staticconstexpr |
Initial value:
=
(OutMemoryDataOperation == InMemoryDataOperationEnum::AtomicAdd)
The documentation for this struct was generated from the following file: