ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun > Struct Template Reference

ThreadGroupTensorSliceTransfer_v6r3&lt; ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun &gt; Struct Template Reference#

Composable Kernel: ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun > Struct Template Reference
ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun > Struct Template Reference

#include <thread_group_tensor_slice_transfer_v6r3.hpp>

Public Types

using Index = MultiIndex<nDim>

Public Member Functions

__device__ constexpr ThreadGroupTensorSliceTransfer_v6r3 (const Src0Desc &src0_desc, const Index &src0_block_slice_origin, const Src1Desc &src1_desc, const Index &src1_block_slice_origin, const Src2Desc &src2_desc, const Index &src2_block_slice_origin, const DstDesc &dst_desc, const Index &dst_block_slice_origin, const ElementwiseOperation &element_op)
template<typename Src0Buffer, typename Src1Buffer, typename Src2Buffer, typename DstBuffer>
__device__ void Run (const Src0Desc &src0_desc, const Src0Buffer &src0_buf, const Src1Desc &src1_desc, const Src1Buffer &src1_buf, const Src2Desc &src2_desc, const Src2Buffer &src2_buf, const DstDesc &dst_desc, DstBuffer &dst_buf)
__device__ void MoveSrc0SliceWindow (const Src0Desc &src0_desc, const Index &step)
__device__ void MoveSrc1SliceWindow (const Src1Desc &src1_desc, const Index &step)
__device__ void MoveSrc2SliceWindow (const Src2Desc &src2_desc, const Index &step)
__device__ void MoveDstSliceWindow (const DstDesc &dst_desc, const Index &step)

Static Public Attributes

static constexpr index_t nDim = remove_reference_t<Src0Desc>::GetNumOfDimension()
static constexpr auto thread_slice_lengths = SliceLengths{} / ThreadClusterLengths{}

Member Typedef Documentation

◆ Index

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
using ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Index = MultiIndex<nDim>

Constructor & Destructor Documentation

◆ ThreadGroupTensorSliceTransfer_v6r3()

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
__device__ constexpr ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::ThreadGroupTensorSliceTransfer_v6r3 ( const Src0Desc & src0_desc,
const Index & src0_block_slice_origin,
const Src1Desc & src1_desc,
const Index & src1_block_slice_origin,
const Src2Desc & src2_desc,
const Index & src2_block_slice_origin,
const DstDesc & dst_desc,
const Index & dst_block_slice_origin,
const ElementwiseOperation & element_op )
inlineconstexpr

Member Function Documentation

◆ MoveDstSliceWindow()

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
__device__ void ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::MoveDstSliceWindow ( const DstDesc & dst_desc,
const Index & step )
inline

◆ MoveSrc0SliceWindow()

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
__device__ void ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::MoveSrc0SliceWindow ( const Src0Desc & src0_desc,
const Index & step )
inline

◆ MoveSrc1SliceWindow()

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
__device__ void ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::MoveSrc1SliceWindow ( const Src1Desc & src1_desc,
const Index & step )
inline

◆ MoveSrc2SliceWindow()

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
__device__ void ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::MoveSrc2SliceWindow ( const Src2Desc & src2_desc,
const Index & step )
inline

◆ Run()

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
template<typename Src0Buffer, typename Src1Buffer, typename Src2Buffer, typename DstBuffer>
__device__ void ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Run ( const Src0Desc & src0_desc,
const Src0Buffer & src0_buf,
const Src1Desc & src1_desc,
const Src1Buffer & src1_buf,
const Src2Desc & src2_desc,
const Src2Buffer & src2_buf,
const DstDesc & dst_desc,
DstBuffer & dst_buf )
inline

Member Data Documentation

◆ nDim

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
index_t ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::nDim = remove_reference_t<Src0Desc>::GetNumOfDimension()
staticconstexpr

◆ thread_slice_lengths

template<typename ThreadGroup, typename ElementwiseOperation, InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename Src2Data, typename DstData, typename Src0Desc, typename Src1Desc, typename Src2Desc, typename DstDesc, typename DimAccessOrder, index_t VectorDim, index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferSrc2ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
auto ck::ThreadGroupTensorSliceTransfer_v6r3< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::thread_slice_lengths = SliceLengths{} / ThreadClusterLengths{}
staticconstexpr

The documentation for this struct was generated from the following file: