DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference

DeviceMultipleReduceThreadWise&lt; NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference
ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference

#include <device_multiple_reduce_threadwise.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >:
ck::tensor_operation::device::DeviceMultipleReduce< Rank, NumReduceDim, NumReduction, InElementwiseOperationTuple, AccElementwiseOperationTuple > ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
struct  Invoker

Public Types

using OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple())
using InGridDesc_M_K
using OutGridDesc_M_Tuple = decltype(GenerateOutGrid1dDescTuple())

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::array< index_t, NumInputDim > inLengths, const std::array< index_t, NumInputDim > inStrides, const std::array< index_t, NumOutputDim > outLengths, const std::array< std::array< index_t, NumOutputDim >, NumReduction > outStridesArray, const std::array< int, NumReduceDim > reduceDims, const std::array< double, NumReduction > alphas, const std::array< double, NumReduction > betas, const void *in_dev, const std::array< void *, NumReduction > out_dev_buffers, const InElementwiseOperationTuple in_elementwise_op_tuple, const AccElementwiseOperationTuple acc_elementwise_op_tuple) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Static Public Member Functions

static auto GenerateOutDataTypePointerTuple ()
static auto MakeSrc2dDescriptor (const std::array< index_t, NumInputDim > &inLengths, const std::array< index_t, NumInputDim > &inStrides)
static auto MakeDst1dDescriptor (const std::array< index_t, NumOutputDim > &outLengths, const std::array< index_t, NumOutputDim > &outStrides)
static auto GenerateOutGrid1dDescTuple ()

Static Public Attributes

static constexpr index_t NumInvariantDim = Rank - NumReduceDim
static constexpr index_t NumInputDim = Rank
static constexpr index_t NumOutputDim = (NumInvariantDim == 0) ? 1 : NumInvariantDim
static constexpr bool reduceAllDim = (NumInvariantDim == 0)
static constexpr index_t M_BlockTileSize = BlockSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = 1 * KThreadSliceSize
Static Public Attributes inherited from ck::tensor_operation::device::DeviceMultipleReduce< Rank, NumReduceDim, NumReduction, InElementwiseOperationTuple, AccElementwiseOperationTuple >
static constexpr index_t NumInputDim = Rank
static constexpr index_t NumOutputDim = (Rank - NumReduceDim > 1) ? Rank - NumReduceDim : 1

Member Typedef Documentation

◆ InGridDesc_M_K

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::InGridDesc_M_K
Initial value:
decltype(MakeSrc2dDescriptor(std::array<index_t, NumInputDim>{},
std::array<index_t, NumInputDim>{}))
static auto MakeSrc2dDescriptor(const std::array< index_t, NumInputDim > &inLengths, const std::array< index_t, NumInputDim > &inStrides)
Definition device_multiple_reduce_threadwise.hpp:85

◆ OutDataTypePointerTuple

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::OutDataTypePointerTuple = decltype(GenerateOutDataTypePointerTuple())

◆ OutGridDesc_M_Tuple

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::OutGridDesc_M_Tuple = decltype(GenerateOutGrid1dDescTuple())

Member Function Documentation

◆ GenerateOutDataTypePointerTuple()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
auto ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::GenerateOutDataTypePointerTuple ( )
inlinestatic

◆ GenerateOutGrid1dDescTuple()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
auto ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::GenerateOutGrid1dDescTuple ( )
inlinestatic

◆ GetTypeString()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
std::string ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsSupportedArgument()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
bool ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ MakeArgumentPointer()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::MakeArgumentPointer ( const std::array< index_t, NumInputDim > inLengths,
const std::array< index_t, NumInputDim > inStrides,
const std::array< index_t, NumOutputDim > outLengths,
const std::array< std::array< index_t, NumOutputDim >, NumReduction > outStridesArray,
const std::array< int, NumReduceDim > reduceDims,
const std::array< double, NumReduction > alphas,
const std::array< double, NumReduction > betas,
const void * in_dev,
const std::array< void *, NumReduction > out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
const AccElementwiseOperationTuple acc_elementwise_op_tuple )
inlineoverridevirtual

◆ MakeDst1dDescriptor()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
auto ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::MakeDst1dDescriptor ( const std::array< index_t, NumOutputDim > & outLengths,
const std::array< index_t, NumOutputDim > & outStrides )
inlinestatic

◆ MakeInvokerPointer()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeSrc2dDescriptor()

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
auto ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::MakeSrc2dDescriptor ( const std::array< index_t, NumInputDim > & inLengths,
const std::array< index_t, NumInputDim > & inStrides )
inlinestatic

Member Data Documentation

◆ K_BlockTileSize

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
index_t ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::K_BlockTileSize = 1 * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
index_t ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::M_BlockTileSize = BlockSize * MThreadSliceSize
staticconstexpr

◆ NumInputDim

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
index_t ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::NumInputDim = Rank
staticconstexpr

◆ NumInvariantDim

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
index_t ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::NumInvariantDim = Rank - NumReduceDim
staticconstexpr

◆ NumOutputDim

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
index_t ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::NumOutputDim = (NumInvariantDim == 0) ? 1 : NumInvariantDim
staticconstexpr

◆ reduceAllDim

template<index_t NumReduction, typename InDataType, typename AccDataType, typename OutDataTypeTuple, index_t Rank, index_t NumReduceDim, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, bool PropagateNan, index_t BlockSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
bool ck::tensor_operation::device::DeviceMultipleReduceThreadWise< NumReduction, InDataType, AccDataType, OutDataTypeTuple, Rank, NumReduceDim, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, PropagateNan, BlockSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::reduceAllDim = (NumInvariantDim == 0)
staticconstexpr

The documentation for this struct was generated from the following file: