blockwise_gemm_dl_v2r3.hpp Source File#
blockwise_gemm_dl_v2r3.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_static_buffer(Number< N >)
Definition static_buffer.hpp:186
__host__ __device__ constexpr auto make_pass_through_transform(const LowLength &low_length)
Definition multi_index_transform_helper.hpp:12
__host__ __device__ constexpr auto chain_tensor_adaptors(const TensorAdaptor0 &adaptor0, const TensorAdaptor1 &adaptor1)
Definition tensor_description/tensor_adaptor.hpp:245
__host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition tensor_description/tensor_adaptor.hpp:425
__host__ __device__ constexpr auto make_merge_transform(const LowLengths &low_lengths)
Definition multi_index_transform_helper.hpp:55
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
__host__ __device__ constexpr auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition multi_index_transform_helper.hpp:90
static constexpr auto I0
Definition blockwise_gemm_dl_v2r3.hpp:52
static constexpr index_t BK0
Definition blockwise_gemm_dl_v2r3.hpp:57
static constexpr auto I2
Definition blockwise_gemm_dl_v2r3.hpp:54
MultiIndex< 4 > CIndex
Definition blockwise_gemm_dl_v2r3.hpp:50
static constexpr index_t BM0
Definition blockwise_gemm_dl_v2r3.hpp:74
__device__ BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2()
Definition blockwise_gemm_dl_v2r3.hpp:153
static constexpr index_t BM101
Definition blockwise_gemm_dl_v2r3.hpp:65
static constexpr index_t BM
Definition blockwise_gemm_dl_v2r3.hpp:59
static constexpr index_t BK1
Definition blockwise_gemm_dl_v2r3.hpp:58
static constexpr index_t BN101
Definition blockwise_gemm_dl_v2r3.hpp:66
static __device__ CIndex CalculateCThreadOriginOnBlock_BM0_BM1_BN0_BN1(index_t thread_id)
Definition blockwise_gemm_dl_v2r3.hpp:184
static constexpr index_t BM1
Definition blockwise_gemm_dl_v2r3.hpp:71
__host__ static __device__ constexpr auto MakeBBlockDescriptor_BK0_BN0_BN1_BK1(const BBlockDesc_BK0_BN_BK1 &b_block_desc_bk0_bn_bk1)
Definition blockwise_gemm_dl_v2r3.hpp:92
__device__ void Run(const CThreadDesc_BM0_BM11_BN0_BN11 &, const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const
Definition blockwise_gemm_dl_v2r3.hpp:212
static constexpr index_t BN1
Definition blockwise_gemm_dl_v2r3.hpp:72
static constexpr index_t BN0
Definition blockwise_gemm_dl_v2r3.hpp:75
static constexpr auto I1
Definition blockwise_gemm_dl_v2r3.hpp:53
static constexpr auto a_block_desc_bk0_bm0_bm1_bk1_
Definition blockwise_gemm_dl_v2r3.hpp:146
__host__ static __device__ constexpr auto MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM_BN()
Definition blockwise_gemm_dl_v2r3.hpp:106
MultiIndex< 3 > AIndex
Definition blockwise_gemm_dl_v2r3.hpp:48
__host__ static __device__ constexpr auto GetCThreadTensorLengths_BM0_BM1_BN0_BN1()
Definition blockwise_gemm_dl_v2r3.hpp:141
__host__ static __device__ constexpr auto MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM0_BM1_BN0_BN1()
Definition blockwise_gemm_dl_v2r3.hpp:123
static constexpr index_t BN11
Definition blockwise_gemm_dl_v2r3.hpp:69
static constexpr index_t BN100
Definition blockwise_gemm_dl_v2r3.hpp:63
static constexpr index_t BM11
Definition blockwise_gemm_dl_v2r3.hpp:68
static constexpr index_t BN
Definition blockwise_gemm_dl_v2r3.hpp:60
__host__ static __device__ constexpr auto MakeABlockDescriptor_BK0_BM0_BM1_BK1(const ABlockDesc_BK0_BM_BK1 &a_block_desc_bk0_bm_bk1)
Definition blockwise_gemm_dl_v2r3.hpp:78
MultiIndex< 3 > BIndex
Definition blockwise_gemm_dl_v2r3.hpp:49
static constexpr auto b_block_desc_bk0_bn0_bn1_bk1_
Definition blockwise_gemm_dl_v2r3.hpp:149
static constexpr auto I3
Definition blockwise_gemm_dl_v2r3.hpp:55
static constexpr index_t BM100
Definition blockwise_gemm_dl_v2r3.hpp:62
Definition utility/sequence.hpp:43
Definition threadwise_contraction_dl.hpp:130
Definition functional2.hpp:33