tensor_descriptor.hpp Source File

tensor_descriptor.hpp Source File#

Composable Kernel: tensor_descriptor.hpp Source File
tensor_description/tensor_descriptor.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
9
10namespace ck {
11
12template <index_t NDimHidden, typename VisibleDimensionIds>
13struct TensorCoordinate;
14
15template <index_t NTransform, index_t NDimVisible, typename UpdateLowerIndexHack>
17
18// Transforms: Tuple<transforms...>
19// LowerDimensionIdss : Tuple<Sequence<...>, ...>
20// UpperDimensionIdss : Tuple<Sequence<...>, ...>
21// VisibleDimensionIds> : Sequence<...>
22template <typename Transforms,
23 typename LowerDimensionIdss,
24 typename UpperDimensionIdss,
25 typename VisibleDimensionIds,
26 typename ElementSpaceSize>
28{
29 // TODO make these private
30 __host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); }
31
32 __host__ __device__ static constexpr index_t GetNumOfVisibleDimension()
33 {
34 return VisibleDimensionIds::Size();
35 }
36
37 __host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
38 {
39 constexpr auto all_low_dim_ids = unpack(
40 [](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionIdss{});
41
42 constexpr auto all_up_dim_ids = unpack(
43 [](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionIdss{});
44
45 constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
46
47 using unique_sort_all_dim_ids = typename sequence_unique_sort<decltype(all_dim_ids),
50
51 return unique_sort_all_dim_ids::Size();
52 }
53
54 __host__ __device__ static constexpr auto InitializeElementSize(const Transforms& transforms)
55 {
56 const auto lengths = generate_tuple(
57 [&](auto idim_visible) {
58 constexpr auto tmp = GetTransformAndItsUpperDimension(idim_visible);
59
60 constexpr index_t itran = tmp[Number<0>{}];
61 constexpr index_t idim_up = tmp[Number<1>{}];
62 constexpr bool found = tmp[Number<2>{}];
63
64 static_assert(found == true,
65 "wrong! not found matching transformation and upper-dimension");
66
67 const auto length =
68 transforms[Number<itran>{}].GetUpperLengths()[Number<idim_up>{}];
69
70 return length;
71 },
73
74 // TODO: make container_reduce support tuple of Number and index_t
75 return container_reduce(lengths, math::multiplies{}, Number<1>{});
76 }
77
78 template <index_t IDim>
79 __host__ __device__ static constexpr auto GetTransformAndItsUpperDimension(Number<IDim>)
80 {
81 constexpr auto idim_visible = Number<IDim>{};
82
83 constexpr index_t idim_hidden = VisibleDimensionIds::At(idim_visible);
84
85 index_t itran_found = 0;
86 index_t idim_up_found = 0;
87 bool found = false;
88
89 static_for<0, ntransform_, 1>{}([&](auto itran) {
90 constexpr auto up_dim_ids = UpperDimensionIdss{}[itran];
91
92 static_for<0, up_dim_ids.Size(), 1>{}([&](auto idim_up) {
93 if constexpr(up_dim_ids[idim_up] == idim_hidden)
94 {
95 itran_found = itran;
96 idim_up_found = idim_up;
97 found = true;
98 }
99 });
100 });
101
102 return make_tuple(itran_found, idim_up_found, found);
103 }
104
108
112
113 // may be index_t or Number<>
114 using ElementSize = remove_cv_t<decltype(InitializeElementSize(Transforms{}))>;
115
116 public:
117#if 0 // workaround compiler complaint about constexpr
118 __host__ __device__ constexpr TensorDescriptor() = default;
119#else
120 __host__ __device__ constexpr TensorDescriptor()
122 {
123 }
124#endif
125
126 __host__ __device__ constexpr TensorDescriptor(const Transforms& transforms,
127 ElementSpaceSize element_space_size)
128 : transforms_{transforms},
130 element_space_size_{element_space_size}
131
132 {
133 static_assert(Transforms::Size() == ntransform_ &&
134 LowerDimensionIdss::Size() == ntransform_ &&
135 UpperDimensionIdss::Size() == ntransform_,
136 "wrong! inconsistent # of transformations");
137
138 // TODO check dependency of dimensions is valid
139 }
140
141 __host__ __device__ static constexpr index_t GetNumOfDimension()
142 {
144 }
145
146 template <index_t IDim>
147 __host__ __device__ constexpr auto GetLength(Number<IDim>) const
148 {
149 static_assert(IDim >= 0 && IDim < ndim_visible_, "wrong! out of range");
150
151 constexpr auto tmp = GetTransformAndItsUpperDimension(Number<IDim>{});
152
153 constexpr index_t itran = tmp[Number<0>{}];
154 constexpr index_t idim_up = tmp[Number<1>{}];
155 constexpr bool found = tmp[Number<2>{}];
156
157 static_assert(found == true,
158 "wrong! not found matching transformation and upper-dimension");
159
160 return transforms_[Number<itran>{}].GetUpperLengths()[Number<idim_up>{}];
161 }
162
163 __host__ __device__ constexpr auto GetLengths() const
164 {
165 // FIXME: use Tuple of reference instead
166 return generate_sequence_v2([&](auto I) { return GetLength(I); }, Number<ndim_visible_>{});
167 }
168
169 __host__ __device__ constexpr auto GetElementSize() const { return element_size_; }
170
171 __host__ __device__ constexpr auto GetElementSpaceSize() const { return element_space_size_; }
172
173 template <typename Idx>
174 __host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const
175 {
176 static_assert(Idx::Size() == GetNumOfDimension(), "wrong! inconsistent # of dimension");
177
178 return make_tensor_coordinate(*this, idx).GetOffset();
179 }
180
181 // TODO make these private
182 __host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; }
183
184 __host__ __device__ static constexpr auto GetLowerDimensionIdss()
185 {
186 return LowerDimensionIdss{};
187 }
188
189 __host__ __device__ static constexpr auto GetUpperDimensionIdss()
190 {
191 return UpperDimensionIdss{};
192 }
193
194 __host__ __device__ static constexpr auto GetVisibleDimensionIds()
195 {
196 return VisibleDimensionIds{};
197 }
198
199 __host__ __device__ static constexpr bool IsKnownAtCompileTime()
200 {
201 bool is_known = true;
202
203 static_for<0, Transforms::Size(), 1>{}([&](auto i) {
204 is_known &= remove_cvref_t<decltype(Transforms{}[i])>::IsKnownAtCompileTime();
205 });
206
209 }
210
211 __host__ __device__ void Print() const
212 {
213 printf("{");
214 printf("TensorDescriptor, ");
215 static_for<0, ntransform_, 1>{}([&](auto i) {
216 printf("transforms: ");
217 transforms_[i].Print();
218 printf("LowerDimensionIds:");
219 LowerDimensionIdss{}.At(i).Print();
220 printf("UpperDimensionIds:");
221 UpperDimensionIdss{}.At(i).Print();
222 });
223 printf("}");
224
225 VisibleDimensionIds::Print();
226 }
227
228 // TODO make these private
229 Transforms transforms_;
231 ElementSpaceSize element_space_size_;
232};
233
234template <index_t NDimHidden, typename VisibleDimensionIds>
236{
237 // TODO make these private
238 static constexpr index_t ndim_visible_ = VisibleDimensionIds::Size();
239
242
243 public:
244 __host__ __device__ constexpr TensorCoordinate() = default;
245
246 __host__ __device__ constexpr TensorCoordinate(const HiddenIndex& idx_hidden)
247 : idx_hidden_{idx_hidden}
248 {
249 }
250
251 __host__ __device__ constexpr auto GetIndex() const { return GetVisibleIndex(); }
252
253 __host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
254
255 // TODO make these private
256 __host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; }
257
258 __host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; }
259
260 __host__ __device__ constexpr auto GetVisibleIndex() const
261 {
262 return get_container_subset(idx_hidden_, VisibleDimensionIds{});
263 }
264
265 // TODO make these private
267};
268
269template <index_t NTransform, index_t NDimVisible, typename UpdateLowerIndexHack>
271{
272 // TODO make these private
274
275 public:
276 __host__ __device__ constexpr TensorCoordinateStep() = default;
277
278 __host__ __device__ constexpr TensorCoordinateStep(const VisibleIndex& idx_diff_visible,
279 const MultiIndex<NTransform>& do_transforms)
280 : idx_diff_visible_{idx_diff_visible}, do_transforms_{do_transforms}
281 {
282 }
283
284 __host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); }
285
286 // TODO make these private
287 __host__ __device__ constexpr const auto& GetVisibleIndexDiff() const
288 {
289 return idx_diff_visible_;
290 }
291
294
295 // HACK: control UpdateLowerIndex()
296 static constexpr UpdateLowerIndexHack update_lower_index_hack_;
297};
298
299// TODO: How to fix this? It uses an struct instead of lambda because lambda
300// doesn't have constructor, and to put it outside the scope where it is used
301// (transform_tensor_descriptor) because template cannot be defined inside a function
302// template
303template <typename NewTransforms>
305{
306 template <typename I>
307 __host__ __device__ constexpr auto operator()(I) const
308 {
309 using Tran = remove_reference_t<decltype(NewTransforms{}.At(I{}))>;
310 return Number<Tran::GetNumOfUpperDimension()>{};
311 }
312};
313
314template <typename OldTensorDescriptor,
315 typename NewTransforms,
316 typename NewLowerDimensionOldVisibleIdss,
317 typename NewUpperDimensionNewVisibleIdss>
318__host__ __device__ constexpr auto
319transform_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
320 const NewTransforms& new_transforms,
321 NewLowerDimensionOldVisibleIdss,
322 NewUpperDimensionNewVisibleIdss)
323{
324 // sanity check
325 {
326 static_assert(NewTransforms::Size() == NewLowerDimensionOldVisibleIdss::Size() &&
327 NewTransforms::Size() == NewUpperDimensionNewVisibleIdss::Size(),
328 "wrong! inconsitent number of transform");
329
330 constexpr auto all_old_top_ids = unpack([](auto... xs) { return merge_sequences(xs...); },
331 NewLowerDimensionOldVisibleIdss{});
332
333 constexpr auto all_new_top_ids = unpack([](auto... xs) { return merge_sequences(xs...); },
334 NewUpperDimensionNewVisibleIdss{});
335
336 static_assert(is_valid_sequence_map<decltype(all_old_top_ids)>::value &&
337 is_valid_sequence_map<decltype(all_new_top_ids)>::value,
338 "wrong!");
339 }
340
341 // lower dimension's hidden idss
342 // convert lower dimension visible idss (tuple of sequences) to hidden idss (tuple of
343 // sequences)
344 constexpr auto low_dim_hidden_idss = transform_tuples(
345 // convert lower dimension visible ids (a sequence) to hidden ids (a sequence)
346 [](auto low_dim_visible_ids) constexpr {
347 return transform_sequences(
348 // convert lower dimension visible id to hidden id
349 [](auto low_dim_visible_id) constexpr {
350 return OldTensorDescriptor::GetVisibleDimensionIds()[low_dim_visible_id];
351 },
352 low_dim_visible_ids);
353 },
354 NewLowerDimensionOldVisibleIdss{});
355
356 constexpr index_t num_new_transform = NewTransforms::Size();
357
358 // upper dimension's hidden idss
359 constexpr index_t old_hidden_dim_number = OldTensorDescriptor::GetNumOfHiddenDimension();
360
361 constexpr auto up_dim_numbers =
363
364 constexpr auto up_dim_numbers_scan = merge_sequences(
366
367 constexpr auto up_dim_hidden_idss = generate_tuple(
368 [old_hidden_dim_number, up_dim_numbers_scan](auto i) constexpr {
369 return
370 typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
371 old_hidden_dim_number + up_dim_numbers_scan[i + 1],
372 1>::type{};
373 },
375
376 // new visible dimension's hidden ids
377 constexpr auto unordered_new_visible_dim_hidden_ids =
378 unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
379
380 constexpr auto new_visible_dim_unordered2ordered =
381 unpack([](auto... xs) constexpr { return merge_sequences(xs...); },
382 NewUpperDimensionNewVisibleIdss{});
383
384 constexpr auto new_visible_dim_hidden_ids =
385 unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
386
387 // put everything together
388 const auto all_transforms = container_concat(old_tensor_desc.GetTransforms(), new_transforms);
389
390 constexpr auto all_low_dim_hidden_idss =
391 container_concat(OldTensorDescriptor::GetLowerDimensionIdss(), low_dim_hidden_idss);
392
393 constexpr auto all_up_dim_hidden_idss =
394 container_concat(OldTensorDescriptor::GetUpperDimensionIdss(), up_dim_hidden_idss);
395
396 const auto element_space_size = old_tensor_desc.GetElementSpaceSize();
397
398 return TensorDescriptor<remove_cv_t<decltype(all_transforms)>,
399 remove_cv_t<decltype(all_low_dim_hidden_idss)>,
400 remove_cv_t<decltype(all_up_dim_hidden_idss)>,
401 remove_cv_t<decltype(new_visible_dim_hidden_ids)>,
402 remove_cv_t<decltype(element_space_size)>>{all_transforms,
403 element_space_size};
404}
405
406template <typename TensorDesc, typename VisibleIndex>
407__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc& tensor_desc,
408 const VisibleIndex& idx_visible)
409{
410 static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
411 "wrong! # of dimension inconsistent");
412
413 constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
414 constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
415 constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
416
417 MultiIndex<ndim_hidden> idx_hidden;
418
419 // initialize visible index
420 set_container_subset(idx_hidden, visible_dim_ids, idx_visible);
421
422 // calculate hidden index
423 static_for<ntransform, 0, -1>{}([&tensor_desc, &idx_hidden](auto itran_p1) {
424 auto itran = itran_p1 - Number<1>{};
425 const auto& tran = tensor_desc.GetTransforms().At(itran);
426 constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
427 constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
428
429 const auto idx_up = get_container_subset(idx_hidden, dims_up);
430
431 MultiIndex<dims_low.Size()> idx_low;
432
433 tran.CalculateLowerIndex(idx_low, idx_up);
434
435 set_container_subset(idx_hidden, dims_low, idx_low);
436 });
437
438 return TensorCoordinate<ndim_hidden, decltype(visible_dim_ids)>{idx_hidden};
439}
440
441// UpdateLowerIndexHack: Sequence<...>
442// HACK: control UpdateLowerIndex
443template <typename TensorDesc, typename VisibleIndex, typename UpdateLowerIndexHack>
444__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc&,
445 const VisibleIndex& idx_diff_visible,
446 UpdateLowerIndexHack)
447{
448 static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
449 "wrong! # of dimension inconsistent");
450
451 constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
452 constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
453 constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
454 constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
455
456 static_assert(UpdateLowerIndexHack::Size() == ntransform, "wrong!");
457
458 // use index_t for boolean type
459 auto do_transforms = make_zero_multi_index<ntransform>();
460 auto is_non_zero_diff = make_zero_multi_index<ndim_hidden>();
461
462 // decide do_transform by checkout non-zero index diff components
463 MultiIndex<VisibleIndex::Size()> non_zero_diff_pick_visible;
464
466 [&](auto i) { non_zero_diff_pick_visible(i) = (idx_diff_visible[i] != 0); });
467
468 set_container_subset(is_non_zero_diff, visible_dim_ids, non_zero_diff_pick_visible);
469
470 static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
471 constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
472 constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
473
474 const auto non_zero_diff_pick_up = get_container_subset(is_non_zero_diff, dims_up);
475
476 MultiIndex<dims_low.Size()> non_zero_diff_pick_low;
477
478 // if any of upper index diff components is non-zero, then
479 // 1) Need to do this transform
480 // 2) all components of lower index diff will assume to be non-zero and need to be
481 // computed
482 const bool idx_diff_up_has_non_zero = container_reduce(
483 non_zero_diff_pick_up, [](auto a, auto b) constexpr { return a or b; }, false);
484
485 do_transforms(itran) = idx_diff_up_has_non_zero;
486
487 static_for<0, dims_low.Size(), 1>{}(
488 [&](auto i) { non_zero_diff_pick_low(i) = idx_diff_up_has_non_zero; });
489
490 set_container_subset(is_non_zero_diff, dims_low, non_zero_diff_pick_low);
491 });
492
494 do_transforms};
495}
496
497template <typename TensorDesc, typename VisibleIndex>
498__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc&,
499 const VisibleIndex& idx_diff_visible)
500{
501 constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
502
504 TensorDesc{}, idx_diff_visible, typename uniform_sequence_gen<ntransform, 0>::type{});
505}
506
507template <typename TensorDesc, typename TensorCoord, typename TensorCoordStep>
508__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc& tensor_desc,
509 TensorCoord& coord,
510 const TensorCoordStep& coord_step)
511{
512 constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
513 constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
514
515 // this is what needs to be calculated
516 auto idx_diff_hidden = make_zero_multi_index<ndim_hidden>();
517
518 // initialize visible index diff
520 idx_diff_hidden, TensorDesc::GetVisibleDimensionIds(), coord_step.GetVisibleIndexDiff());
521
522 // this is what needs to be updated
523 auto& idx_hidden = coord.GetHiddenIndex();
524
525 // update visible index
526 auto idx_hidden_pick_visible =
527 get_container_subset(idx_hidden, TensorDesc::GetVisibleDimensionIds());
528
529 idx_hidden_pick_visible += coord_step.GetIndexDiff();
530
531 set_container_subset(idx_hidden, TensorDesc::GetVisibleDimensionIds(), idx_hidden_pick_visible);
532
533 // update rest of hidden index
534 static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
535 if(coord_step.do_transforms_[itran])
536 {
537 const auto& tran = tensor_desc.GetTransforms().At(itran);
538 constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
539 constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
540
541 const auto idx_up_new = get_container_subset(idx_hidden, dims_up);
542 auto idx_low = get_container_subset(idx_hidden, dims_low);
543 const auto idx_diff_up = get_container_subset(idx_diff_hidden, dims_up);
544
545 MultiIndex<dims_low.Size()> idx_diff_low;
546
547 // HACK: control UpdateLowerIndex for Merge using hack
548 constexpr index_t Hack = decltype(coord_step.update_lower_index_hack_)::At(itran);
549
550 tran.UpdateLowerIndex(idx_diff_low, idx_diff_up, idx_low, idx_up_new, Number<Hack>{});
551
552 set_container_subset(idx_diff_hidden, dims_low, idx_diff_low);
553 set_container_subset(idx_hidden, dims_low, idx_low);
554 }
555 });
556}
557
558template <typename TensorDesc, typename TensorCoord>
559__host__ __device__ constexpr bool
561 const TensorCoord& coord)
562{
563 bool valid = true;
564
565 constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
566
567 const auto& idx_hidden = coord.GetHiddenIndex();
568
569 static_for<ntransform - 1, -1, -1>{}([&tensor_desc, &idx_hidden, &valid](auto itran) {
570 const auto tran = tensor_desc.GetTransforms().At(itran);
571
572 // check validity, only if current transformation does not always has a valid mapping
573 if constexpr(!decltype(tran)::IsValidUpperIndexAlwaysMappedToValidLowerIndex())
574 {
575 const auto idx_up =
576 get_container_subset(idx_hidden, TensorDesc::GetUpperDimensionIdss().At(itran));
577
578 // Comment: using valid = valid && .. will result in weird control flow in ISA
579 valid &= tran.IsValidUpperIndexMappedToValidLowerIndex(idx_up);
580 }
581 });
582
583 return valid;
584}
585
586template <typename TensorDesc, typename TensorCoord>
587__host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc& tensor_desc,
588 const TensorCoord& coord)
589{
590 // check visible index
591 const auto& idx_visible = coord.GetVisibleIndex();
592
593 bool is_visible_index_valid = true;
594
595 static_for<0, TensorDesc::GetNumOfDimension(), 1>{}(
596 [&is_visible_index_valid, &idx_visible, &tensor_desc](auto i) {
597 is_visible_index_valid =
598 is_visible_index_valid &&
599 (idx_visible[i] >= 0 && idx_visible[i] < tensor_desc.GetLength(i));
600 });
601
602 // check other hidden index
603 return is_visible_index_valid &&
605}
606
607template <typename TensorDesc>
609 TensorDesc{}, MultiIndex<remove_cvref_t<TensorDesc>::GetNumOfDimension()>{}));
610
611template <typename TensorDesc>
613 TensorDesc{}, MultiIndex<remove_cvref_t<TensorDesc>::GetNumOfDimension()>{}));
614
615} // namespace ck
Definition ck.hpp:268
__host__ __device__ constexpr auto container_concat(const X &x, const Ys &... ys)
Definition utility/container_helper.hpp:320
int32_t index_t
Definition ck.hpp:299
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc &, const VisibleIndex &idx_diff_visible, UpdateLowerIndexHack)
Definition tensor_description/tensor_descriptor.hpp:444
__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const TensorCoordStep &coord_step)
Definition tensor_description/tensor_descriptor.hpp:508
decltype(make_tensor_coordinate_step( TensorDesc{}, MultiIndex< remove_cvref_t< TensorDesc >::GetNumOfDimension()>{})) TensorCoordinateStep_t
Definition tensor_description/tensor_descriptor.hpp:612
remove_cv_t< remove_reference_t< T > > remove_cvref_t
Definition type.hpp:297
__host__ __device__ constexpr bool coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc &tensor_desc, const TensorCoord &coord)
Definition tensor_description/tensor_descriptor.hpp:560
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto unpack(F &&f, X &&x)
Definition functional4.hpp:46
__host__ __device__ constexpr auto transform_sequences(F f, Sequence< Xs... >)
Definition utility/sequence.hpp:774
__host__ __device__ constexpr auto transform_tuples(F f, const X &x)
Definition tuple_helper.hpp:98
__host__ __device__ constexpr auto generate_sequence_v2(F &&f, Number< N >)
Definition sequence_helper.hpp:25
decltype(make_tensor_coordinate( TensorDesc{}, MultiIndex< remove_cvref_t< TensorDesc >::GetNumOfDimension()>{})) TensorCoordinate_t
Definition tensor_description/tensor_descriptor.hpp:608
__host__ __device__ constexpr auto get_container_subset(const Array< T, N > &arr, Sequence< Is... >)
Definition utility/container_helper.hpp:346
__host__ __device__ constexpr auto make_zero_multi_index()
Definition array_multi_index.hpp:21
__host__ __device__ constexpr auto container_reduce(const Container &x, Reduce reduce, Init init, Number< IBegin >=Number< 0 >{}, Number< IEnd >=Number< Container::Size()>{}, Number< IStep >=Number< 1 >{})
Definition utility/container_helper.hpp:111
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto generate_sequence(F, Number< N >)
Definition sequence_helper.hpp:18
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
typename remove_cv< T >::type remove_cv_t
Definition type.hpp:295
__host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc &tensor_desc, const TensorCoord &coord)
Definition tensor_description/tensor_descriptor.hpp:587
__host__ __device__ constexpr void set_container_subset(Array< T, N > &y, Sequence< Is... > picks, const Array< T, sizeof...(Is)> &x)
Definition utility/container_helper.hpp:363
__host__ __device__ constexpr auto merge_sequences(Seqs...)
Definition utility/sequence.hpp:768
__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc &tensor_desc, const VisibleIndex &idx_visible)
Definition tensor_description/tensor_descriptor.hpp:407
__host__ __device__ constexpr auto inclusive_scan_sequence(Seq, Reduce, Number< Init >)
Definition utility/sequence.hpp:812
Array< index_t, N > MultiIndex
Definition array_multi_index.hpp:12
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1517
Definition utility/sequence.hpp:43
Definition tensor_description/tensor_descriptor.hpp:236
__host__ __device__ auto & GetHiddenIndex()
Definition tensor_description/tensor_descriptor.hpp:258
__host__ __device__ constexpr TensorCoordinate()=default
__host__ __device__ constexpr const auto & GetHiddenIndex() const
Definition tensor_description/tensor_descriptor.hpp:256
__host__ __device__ constexpr auto GetIndex() const
Definition tensor_description/tensor_descriptor.hpp:251
__host__ __device__ constexpr index_t GetOffset() const
Definition tensor_description/tensor_descriptor.hpp:253
__host__ __device__ constexpr auto GetVisibleIndex() const
Definition tensor_description/tensor_descriptor.hpp:260
MultiIndex< NDimHidden > HiddenIndex
Definition tensor_description/tensor_descriptor.hpp:240
__host__ __device__ constexpr TensorCoordinate(const HiddenIndex &idx_hidden)
Definition tensor_description/tensor_descriptor.hpp:246
MultiIndex< ndim_visible_ > VisibleIndex
Definition tensor_description/tensor_descriptor.hpp:241
HiddenIndex idx_hidden_
Definition tensor_description/tensor_descriptor.hpp:266
static constexpr index_t ndim_visible_
Definition tensor_description/tensor_descriptor.hpp:238
Definition tensor_description/tensor_descriptor.hpp:271
MultiIndex< NDimVisible > VisibleIndex
Definition tensor_description/tensor_descriptor.hpp:273
__host__ __device__ constexpr const auto & GetVisibleIndexDiff() const
Definition tensor_description/tensor_descriptor.hpp:287
MultiIndex< NTransform > do_transforms_
Definition tensor_description/tensor_descriptor.hpp:293
static constexpr UpdateLowerIndexHack update_lower_index_hack_
Definition tensor_description/tensor_descriptor.hpp:296
VisibleIndex idx_diff_visible_
Definition tensor_description/tensor_descriptor.hpp:292
__host__ __device__ constexpr TensorCoordinateStep(const VisibleIndex &idx_diff_visible, const MultiIndex< NTransform > &do_transforms)
Definition tensor_description/tensor_descriptor.hpp:278
__host__ __device__ constexpr TensorCoordinateStep()=default
__host__ __device__ constexpr const auto & GetIndexDiff() const
Definition tensor_description/tensor_descriptor.hpp:284
Definition tensor_description/tensor_descriptor.hpp:28
__host__ static __device__ constexpr auto InitializeElementSize(const Transforms &transforms)
Definition tensor_description/tensor_descriptor.hpp:54
__host__ __device__ constexpr auto GetLengths() const
Definition tensor_description/tensor_descriptor.hpp:163
remove_cv_t< decltype(InitializeElementSize(Transforms{}))> ElementSize
Definition tensor_description/tensor_descriptor.hpp:114
static constexpr index_t ndim_hidden_
Definition tensor_description/tensor_descriptor.hpp:107
static constexpr index_t ndim_visible_
Definition tensor_description/tensor_descriptor.hpp:106
__host__ __device__ constexpr TensorDescriptor()
Definition tensor_description/tensor_descriptor.hpp:120
ElementSize element_size_
Definition tensor_description/tensor_descriptor.hpp:230
TensorCoordinate< ndim_hidden_, VisibleDimensionIds > Coordinate
Definition tensor_description/tensor_descriptor.hpp:111
__host__ __device__ void Print() const
Definition tensor_description/tensor_descriptor.hpp:211
__host__ static __device__ constexpr index_t GetNumOfHiddenDimension()
Definition tensor_description/tensor_descriptor.hpp:37
__host__ static __device__ constexpr auto GetUpperDimensionIdss()
Definition tensor_description/tensor_descriptor.hpp:189
MultiIndex< ndim_hidden_ > HiddenIndex
Definition tensor_description/tensor_descriptor.hpp:110
ElementSpaceSize element_space_size_
Definition tensor_description/tensor_descriptor.hpp:231
MultiIndex< ndim_visible_ > VisibleIndex
Definition tensor_description/tensor_descriptor.hpp:109
__host__ static __device__ constexpr bool IsKnownAtCompileTime()
Definition tensor_description/tensor_descriptor.hpp:199
__host__ static __device__ constexpr index_t GetNumOfDimension()
Definition tensor_description/tensor_descriptor.hpp:141
__host__ static __device__ constexpr index_t GetNumOfVisibleDimension()
Definition tensor_description/tensor_descriptor.hpp:32
__host__ __device__ constexpr auto GetElementSpaceSize() const
Definition tensor_description/tensor_descriptor.hpp:171
__host__ static __device__ constexpr auto GetVisibleDimensionIds()
Definition tensor_description/tensor_descriptor.hpp:194
static constexpr index_t ntransform_
Definition tensor_description/tensor_descriptor.hpp:105
__host__ __device__ constexpr auto GetLength(Number< IDim >) const
Definition tensor_description/tensor_descriptor.hpp:147
__host__ __device__ constexpr index_t CalculateOffset(const Idx &idx) const
Definition tensor_description/tensor_descriptor.hpp:174
__host__ static __device__ constexpr auto GetLowerDimensionIdss()
Definition tensor_description/tensor_descriptor.hpp:184
__host__ static __device__ constexpr auto GetTransformAndItsUpperDimension(Number< IDim >)
Definition tensor_description/tensor_descriptor.hpp:79
__host__ static __device__ constexpr index_t GetNumOfTransform()
Definition tensor_description/tensor_descriptor.hpp:30
__host__ __device__ constexpr const auto & GetTransforms() const
Definition tensor_description/tensor_descriptor.hpp:182
__host__ __device__ constexpr auto GetElementSize() const
Definition tensor_description/tensor_descriptor.hpp:169
Transforms transforms_
Definition tensor_description/tensor_descriptor.hpp:229
__host__ __device__ constexpr TensorDescriptor(const Transforms &transforms, ElementSpaceSize element_space_size)
Definition tensor_description/tensor_descriptor.hpp:126
Definition utility/sequence.hpp:256
Definition is_known_at_compile_time.hpp:14
Definition utility/sequence.hpp:618
Definition tensor_description/tensor_descriptor.hpp:305
__host__ __device__ constexpr auto operator()(I) const
Definition tensor_description/tensor_descriptor.hpp:307
Definition utility/math.hpp:211
Definition utility/math.hpp:217
Definition utility/math.hpp:34
Definition utility/math.hpp:23
Definition utility/sequence.hpp:543
Definition functional2.hpp:33
typename sequence_gen< NSize, F >::type type
Definition utility/sequence.hpp:295