ThreadGroupTensorSliceTransfer_v7r3< ThreadGroup, SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags, NumThreadScratch, InterDatas > Struct Template Reference#
ck::ThreadGroupTensorSliceTransfer_v7r3< ThreadGroup, SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags, NumThreadScratch, InterDatas > Struct Template Reference
#include <thread_group_tensor_slice_transfer_v7r3.hpp>
Public Types | |
| using | Index = MultiIndex<nDim> |
| template<typename T> | |
| using | is_tuple = decltype(std::declval<T&>().IsTuple()) |
Public Member Functions | |
| __device__ constexpr | ThreadGroupTensorSliceTransfer_v7r3 (const SrcDescs &src_descs, const StaticallyIndexedArray< Index, nSrc > &src_block_slice_origins, const DstDescs &dst_descs, const StaticallyIndexedArray< Index, nDst > &dst_block_slice_origins, const ElementwiseOperation &element_op) |
| template<typename SrcBuffers, index_t ThreadScratchId = 0> | |
| __device__ void | RunRead (const SrcDescs &src_descs, const SrcBuffers &src_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| template<typename DstBuffers, index_t ThreadScratchId = 0> | |
| __device__ void | RunWrite (const DstDescs &dst_descs, DstBuffers dst_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| template<typename DstBuffers, typename DstVgprDescs, typename DstVgprBuffers, index_t ThreadScratchId = 0> | |
| __device__ void | RunWriteAndStoreVgpr (const DstDescs &dst_descs, DstBuffers dst_bufs, const DstVgprDescs &dst_vgpr_desc, DstVgprBuffers dst_vgpr_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| template<typename SrcBuffers, typename DstBuffers> | |
| __device__ void | Run (const SrcDescs &src_descs, const SrcBuffers &src_bufs, const DstDescs &dst_descs, DstBuffers dst_bufs) |
| template<index_t ISrc> | |
| __device__ void | MoveSrcSliceWindow (const SrcDescs &src_descs, Number< ISrc > iSrc, const Index &step) |
| __device__ void | MoveSrcSliceWindow (const SrcDescs &src_descs, const Index &step) |
| template<index_t IDst> | |
| __device__ void | MoveDstSliceWindow (const DstDescs &dst_descs, Number< IDst > iDst, const Index &step) |
| __device__ void | MoveDstSliceWindow (const DstDescs &dst_descs, const Index &step) |
Static Public Attributes | |
| static constexpr index_t | nDim |
| static constexpr index_t | nSrc = remove_cvref_t<SrcDescs>::Size() |
| static constexpr index_t | nDst = remove_cvref_t<DstDescs>::Size() |
| static constexpr auto | thread_slice_lengths = SliceLengths{} / ThreadClusterLengths{} |
Member Typedef Documentation
◆ Index
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
| using ck::ThreadGroupTensorSliceTransfer_v7r3< ThreadGroup, SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags, NumThreadScratch, InterDatas >::Index = MultiIndex<nDim> |
◆ is_tuple
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
template<typename T>
| using ck::ThreadGroupTensorSliceTransfer_v7r3< ThreadGroup, SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcScalarPerVectors, DstScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags, NumThreadScratch, InterDatas >::is_tuple = decltype(std::declval<T&>().IsTuple()) |
Constructor & Destructor Documentation
◆ ThreadGroupTensorSliceTransfer_v7r3()
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
|
inlineconstexpr |
Member Function Documentation
◆ MoveDstSliceWindow() [1/2]
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
|
inline |
◆ MoveDstSliceWindow() [2/2]
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
template<index_t IDst>
|
inline |
◆ MoveSrcSliceWindow() [1/2]
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
|
inline |
◆ MoveSrcSliceWindow() [2/2]
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
template<index_t ISrc>
|
inline |
◆ Run()
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
template<typename SrcBuffers, typename DstBuffers>
|
inline |
◆ RunRead()
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
template<typename SrcBuffers, index_t ThreadScratchId = 0>
|
inline |
◆ RunWrite()
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
template<typename DstBuffers, index_t ThreadScratchId = 0>
|
inline |
◆ RunWriteAndStoreVgpr()
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
template<typename DstBuffers, typename DstVgprDescs, typename DstVgprBuffers, index_t ThreadScratchId = 0>
|
inline |
Member Data Documentation
◆ nDim
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
|
staticconstexpr |
Initial value:
=
remove_cvref_t<tuple_element_t<0, SrcDescs>>::GetNumOfDimension()
◆ nDst
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
|
staticconstexpr |
◆ nSrc
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
|
staticconstexpr |
◆ thread_slice_lengths
template<typename ThreadGroup, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename ElementwiseOperation, typename DstInMemOps, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcScalarPerVectors, index_t DstScalarPerVector, typename ThreadTransferSrcResetCoordinateAfterRunFlags, typename ThreadTransferDstResetCoordinateAfterRunFlags, index_t NumThreadScratch = 1, typename InterDatas = DstDatas>
|
staticconstexpr |
The documentation for this struct was generated from the following file: