ThreadGroupTensorSliceTransfer_v4r2< ThreadGroup, ElementwiseOperation, DstInMemOps, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDatas, DstDatas, SrcDescs, DstDescs, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcsScalarPerVector, DstsScalarPerVector, SrcsScalarStrideInVector, DstsScalarStrideInVector, ThreadTransferSrcsResetCoordinateAfterRun, ThreadTransferDstsResetCoordinateAfterRun, NumThreadScratch > Struct Template Reference#
ck::ThreadGroupTensorSliceTransfer_v4r2< ThreadGroup, ElementwiseOperation, DstInMemOps, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDatas, DstDatas, SrcDescs, DstDescs, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcsScalarPerVector, DstsScalarPerVector, SrcsScalarStrideInVector, DstsScalarStrideInVector, ThreadTransferSrcsResetCoordinateAfterRun, ThreadTransferDstsResetCoordinateAfterRun, NumThreadScratch > Struct Template Reference
Blockwise data transfer. More...
#include <thread_group_tensor_slice_transfer_v4r2.hpp>
Public Types | |
| using | Index = MultiIndex<nDim> |
Public Member Functions | |
| __device__ constexpr | ThreadGroupTensorSliceTransfer_v4r2 (const SrcDescs &src_descs, const StaticallyIndexedArray< Index, nSrc > &src_block_slice_origins, const DstDescs &dst_descs, const StaticallyIndexedArray< Index, nDst > &dst_block_slice_origins, const ElementwiseOperation &element_op) |
| template<typename SrcBuffers, index_t ThreadScratchId = 0> | |
| __device__ void | RunRead (const SrcDescs &src_descs, const SrcBuffers &src_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| template<typename DstBuffers, index_t ThreadScratchId = 0> | |
| __device__ void | RunWrite (const DstDescs &dst_descs, DstBuffers &dst_bufs, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{}) |
| template<typename SrcBuffer, typename DstBuffer, index_t ThreadScratchId> | |
| __device__ void | Run (const SrcDescs &src_descs, const SrcBuffer &src_bufs, const DstDescs &dst_descs, DstBuffer &dst_bufs, Number< ThreadScratchId > thread_scratch_id) |
| __device__ void | MoveSrcSliceWindow (const SrcDescs &src_descs, const Index &step) |
| __device__ void | MoveDstSliceWindow (const DstDescs &dst_descs, const Index &step) |
Static Public Attributes | |
| static constexpr index_t | nDim |
| static constexpr index_t | nSrc = SrcDescs::Size() |
| static constexpr index_t | nDst = DstDescs::Size() |
| static constexpr auto | thread_slice_lengths = BlockSliceLengths{} / ThreadClusterLengths{} |
Detailed Description
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
struct ck::ThreadGroupTensorSliceTransfer_v4r2< ThreadGroup, ElementwiseOperation, DstInMemOps, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDatas, DstDatas, SrcDescs, DstDescs, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcsScalarPerVector, DstsScalarPerVector, SrcsScalarStrideInVector, DstsScalarStrideInVector, ThreadTransferSrcsResetCoordinateAfterRun, ThreadTransferDstsResetCoordinateAfterRun, NumThreadScratch >
struct ck::ThreadGroupTensorSliceTransfer_v4r2< ThreadGroup, ElementwiseOperation, DstInMemOps, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDatas, DstDatas, SrcDescs, DstDescs, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcsScalarPerVector, DstsScalarPerVector, SrcsScalarStrideInVector, DstsScalarStrideInVector, ThreadTransferSrcsResetCoordinateAfterRun, ThreadTransferDstsResetCoordinateAfterRun, NumThreadScratch >
Blockwise data transfer.
This version does following things to avoid scratch memory issue
- Use StaticallyIndexedArray instead of C array for thread buffer
- ThreadwiseTensorSliceTransfer_v3 does not keep reference to tensor descriptor
- ThreadwiseTensorSliceTransfer_v3::Run() does not construct new tensor coordinate
Member Typedef Documentation
◆ Index
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
| using ck::ThreadGroupTensorSliceTransfer_v4r2< ThreadGroup, ElementwiseOperation, DstInMemOps, BlockSliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, SrcDatas, DstDatas, SrcDescs, DstDescs, SrcDimAccessOrder, DstDimAccessOrder, SrcVectorDim, DstVectorDim, SrcsScalarPerVector, DstsScalarPerVector, SrcsScalarStrideInVector, DstsScalarStrideInVector, ThreadTransferSrcsResetCoordinateAfterRun, ThreadTransferDstsResetCoordinateAfterRun, NumThreadScratch >::Index = MultiIndex<nDim> |
Constructor & Destructor Documentation
◆ ThreadGroupTensorSliceTransfer_v4r2()
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
|
inlineconstexpr |
Member Function Documentation
◆ MoveDstSliceWindow()
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
|
inline |
◆ MoveSrcSliceWindow()
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
|
inline |
◆ Run()
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
template<typename SrcBuffer, typename DstBuffer, index_t ThreadScratchId>
|
inline |
◆ RunRead()
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
template<typename SrcBuffers, index_t ThreadScratchId = 0>
|
inline |
◆ RunWrite()
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
template<typename DstBuffers, index_t ThreadScratchId = 0>
|
inline |
Member Data Documentation
◆ nDim
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
|
staticconstexpr |
Initial value:
=
remove_reference_t<tuple_element_t<0, SrcDescs>>::GetNumOfDimension()
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
◆ nDst
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
|
staticconstexpr |
◆ nSrc
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
|
staticconstexpr |
◆ thread_slice_lengths
template<typename ThreadGroup, typename ElementwiseOperation, typename DstInMemOps, typename BlockSliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename SrcDatas, typename DstDatas, typename SrcDescs, typename DstDescs, typename SrcDimAccessOrder, typename DstDimAccessOrder, index_t SrcVectorDim, index_t DstVectorDim, typename SrcsScalarPerVector, typename DstsScalarPerVector, typename SrcsScalarStrideInVector, typename DstsScalarStrideInVector, typename ThreadTransferSrcsResetCoordinateAfterRun, typename ThreadTransferDstsResetCoordinateAfterRun, index_t NumThreadScratch = 1>
|
staticconstexpr |
The documentation for this struct was generated from the following file: