26template <
typename Range>
27std::ostream&
LogRange(std::ostream& os, Range&& range, std::string delim)
41template <
typename T,
typename Range>
42std::ostream&
LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
53 if constexpr(std::is_same_v<RangeType, ck::f8_t> || std::is_same_v<RangeType, ck::bf8_t> ||
54 std::is_same_v<RangeType, ck::bhalf_t>)
56 os << ck::type_convert<float>(v);
58 else if constexpr(std::is_same_v<RangeType, ck::pk_i4_t> ||
59 std::is_same_v<RangeType, ck::f4x2_pk_t>)
63 os << vector_of_floats.template AsType<float>()[
ck::Number<0>{}] << delim
64 << vector_of_floats.template AsType<float>()[
ck::Number<1>{}];
68 os << static_cast<T>(v);
74template <
typename F,
typename T, std::size_t... Is>
77 return f(std::get<Is>(args)...);
80template <
typename F,
typename T>
83 constexpr std::size_t N = std::tuple_size<T>{};
88template <
typename F,
typename T, std::size_t... Is>
91 return F(std::get<Is>(args)...);
94template <
typename F,
typename T>
97 constexpr std::size_t N = std::tuple_size<T>{};
185 template <
typename Layout>
187 std::vector<std::size_t> strides,
189 : mLens(
std::move(lens)), mStrides(
std::move(strides))
195 std::cout <<
"Original Lens: [";
196 LogRange(std::cout, mLens,
", ") <<
"] and Strides: [";
197 LogRange(std::cout, mStrides,
", ") <<
"]" << std::endl;
198 std::cout <<
"Layout: " <<
layout <<
" --> " << new_layout << std::endl;
213 template <
typename F,
typename OrigLayout>
221 default: f(orig);
break;
225 template <
typename Layout>
228 if constexpr(!std::is_same_v<Layout, DefaultLayout>)
240 const auto rank = mLens.size();
261 if(mStrides.size() == 2)
281 template <
typename Layout>
284 if constexpr(std::is_same_v<Layout, ck::tensor_layout::BypassLayoutVerification>)
288 auto strides_int = AsInt(mStrides);
291 if(mStrides.empty() || std::all_of(strides_int.begin(), strides_int.end(), [](
int stride) {
296 if constexpr(!(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
297 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>))
299 std::cerr <<
"Only RowMajor and ColumnMajor layouts are supported for empty "
301 <<
layout <<
". Will calculate strides as RowMajor." << std::endl;
305 mStrides.resize(mLens.size(), 0);
310 std::partial_sum(mLens.rbegin(),
312 mStrides.rbegin() + 1,
313 std::multiplies<std::size_t>());
315 if constexpr(std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
318 if(mStrides.size() >= 2)
319 std::swap(mStrides[mStrides.size() - 1], mStrides[mStrides.size() - 2]);
326 else if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
327 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
329 auto rank = mStrides.size();
330 if(mLens.size() >= 2 &&
rank >= 2)
332 const auto inner_idx =
333 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ?
rank - 1 :
rank - 2;
334 const auto outer_idx = inner_idx ==
rank - 1 ?
rank - 2 :
rank - 1;
335 if(mStrides[inner_idx] <= 0)
337 mStrides[inner_idx] = 1;
339 if(mStrides[outer_idx] <= 0)
341 mStrides[outer_idx] = mLens[inner_idx] * mStrides[inner_idx];
347 template <
typename Layout>
350 if constexpr(std::is_same_v<ck::tensor_layout::BypassLayoutVerification, Layout>)
357 throw std::runtime_error(
358 "HostTensorDescriptor::ValidateStrides: empty tensor dimensions is not allowed.");
361 const int rank = mLens.size();
367 if constexpr(std::is_same_v<ck::tensor_layout::BaseTensorLayout, Layout>)
371 throw std::runtime_error(
"HostTensorDescriptor::ValidateStrides: Abstract tensor "
372 "layout BaseTensorLayout can't be verified. Pls "
373 "pass specific tensor layout to HostTensorDescriptor (or "
374 "ck::tensor_layout::BypassLayoutVerification)");
378 if constexpr(std::is_base_of_v<ck::tensor_layout::gemm::BaseGemmLayout, Layout>)
380 if(mLens.size() != mStrides.size())
382 std::ostringstream oss;
383 oss <<
"HostTensorDescriptor::ValidateStrides: mismatch between tensor rank and "
386 throw std::runtime_error(oss.str());
391 auto strides_int = AsInt(mStrides);
393 strides_int.begin(), strides_int.end(), [](
int stride) { return stride <= 0; }))
395 std::ostringstream oss;
396 oss <<
"Stride values must be positive or all-zeros (auto-derived from tensor "
397 "dimensions). Instead got ";
399 strides_int.begin(), strides_int.end(), std::ostream_iterator<int>(oss,
" "));
400 throw std::runtime_error(oss.str());
403 if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
404 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
408 const auto inner_idx =
409 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ?
rank - 1 :
rank - 2;
410 const auto outer_idx = inner_idx ==
rank - 1 ?
rank - 2 :
rank - 1;
412 if(mStrides[outer_idx] < mLens[inner_idx] * mStrides[inner_idx])
414 std::ostringstream oss;
415 oss <<
"Invalid strides for " <<
layout <<
": " << *
this;
416 throw std::runtime_error(oss.str());
420 for(
int i = 1; i <
rank - 2; ++i)
422 if(mStrides[i - 1] < mStrides[i] * mLens[i])
424 std::ostringstream oss;
425 oss <<
"Invalid strides for higher dimensions in " <<
layout <<
": "
427 throw std::runtime_error(oss.str());
433 std::ostringstream oss;
434 oss <<
"Error: Unsupported GEMM layout: " <<
layout;
435 throw std::runtime_error(oss.str());
444 std::cerr <<
"Warning: Tensor layout verification for ck::tensor_layout::convolution "
445 "layouts is not supported yet. Skipping..."
451 std::ostringstream oss;
452 oss <<
"Error: Tensor layout verification for " <<
layout <<
" is not supported yet.";
453 throw std::runtime_error(oss.str());
457 template <
typename X,
459 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
460 std::is_convertible_v<Layout, BaseTensorLayout>>>
465 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
469 typename = std::enable_if_t<std::is_convertible_v<Layout, BaseTensorLayout>>>
475 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
478 template <
typename Lengths,
480 typename = std::enable_if_t<
481 (std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> ||
482 std::is_convertible_v<ck::ranges::range_value_t<Lengths>,
ck::long_index_t>) &&
483 std::is_convertible_v<Layout, BaseTensorLayout>>>
488 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
491 template <
typename X,
493 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
494 std::is_convertible_v<Y, std::size_t>>,
497 const std::initializer_list<Y>& strides,
500 std::vector<
std::size_t>(strides.begin(), strides.end()),
504 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
508 template <
typename Layout = DefaultLayout>
510 const std::initializer_list<ck::long_index_t>& strides,
513 std::vector<
std::size_t>(strides.begin(), strides.end()),
517 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
521 template <
typename Str
ides,
typename Layout = DefaultLayout>
523 const Strides& strides,
526 std::vector<
std::size_t>(strides.begin(), strides.end()),
530 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
533 template <
typename Lengths,
536 typename = std::enable_if_t<
537 ((std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> &&
538 std::is_convertible_v<ck::ranges::range_value_t<Strides>, std::size_t>) ||
539 (std::is_convertible_v<ck::ranges::range_value_t<Lengths>,
ck::long_index_t> &&
540 std::is_convertible_v<ck::ranges::range_value_t<Strides>,
ck::long_index_t>)) &&
541 std::is_convertible_v<Layout, BaseTensorLayout>>>
543 const Strides& strides,
546 std::vector<
std::size_t>(strides.begin(), strides.end()),
550 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
560 template <
typename... Is>
564 std::initializer_list<std::size_t> iss{
static_cast<std::size_t
>(is)...};
565 return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
570 return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
577 std::vector<std::size_t> mLens;
578 std::vector<std::size_t> mStrides;
579 static constexpr bool dbg =
false;
587 std::vector<int> AsInt(
const std::vector<size_t>& vec)
const
589 std::vector<int> strides_int(vec.size());
590 std::transform(vec.begin(), vec.end(), strides_int.begin(), [](std::size_t stride) {
591 return static_cast<int>(stride);
597template <
typename New2Old,
typename NewLayout = HostTensorDescriptor::BaseTensorLayout>
600 const New2Old& new2old,
601 const NewLayout& new_layout = NewLayout())
603 std::vector<std::size_t> new_lengths(
a.GetNumOfDimension());
604 std::vector<std::size_t> new_strides(
a.GetNumOfDimension());
606 for(std::size_t i = 0; i <
a.GetNumOfDimension(); i++)
608 new_lengths[i] =
a.GetLengths()[new2old[i]];
609 new_strides[i] =
a.GetStrides()[new2old[i]];
617 template <
typename... Xs>
632template <
typename F,
typename... Xs>
636 static constexpr std::size_t
NDIM =
sizeof...(Xs);
637 std::array<std::size_t, NDIM>
mLens;
644 std::partial_sum(
mLens.rbegin(),
647 std::multiplies<std::size_t>());
653 std::array<std::size_t, NDIM> indices;
655 for(std::size_t idim = 0; idim <
NDIM; ++idim)
658 i -= indices[idim] *
mStrides[idim];
666 std::size_t work_per_thread = (
mN1d + num_thread - 1) / num_thread;
668 std::vector<joinable_thread> threads(num_thread);
670 for(std::size_t it = 0; it < num_thread; ++it)
672 std::size_t iw_begin = it * work_per_thread;
673 std::size_t iw_end = std::min((it + 1) * work_per_thread,
mN1d);
675 auto f = [=, *
this] {
676 for(std::size_t iw = iw_begin; iw < iw_end; ++iw)
686template <
typename F,
typename... Xs>
698 template <
typename X>
703 template <
typename X,
typename Y>
704 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides)
709 template <
typename Lengths>
714 template <
typename Lengths,
typename Str
ides>
715 Tensor(
const Lengths& lens,
const Strides& strides)
720 template <
typename X,
typename... Rest, std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
721 Tensor(std::initializer_list<X> lens, Rest&&... rest)
726 template <
typename X,
729 std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
730 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides, Rest&&... rest)
735 template <
typename Lengths,
typename... Rest, std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
736 Tensor(
const Lengths& lens, Rest&&... rest)
741 template <
typename Lengths,
744 std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
745 Tensor(
const Lengths& lens,
const Strides& strides, Rest&&... rest)
752 template <
typename OutT>
758 mData, ret.
mData.begin(), [](
auto value) { return ck::type_convert<OutT>(value); });
772 template <
typename FromT>
776 void savetxt(std::string file_name, std::string dtype =
"float")
778 std::ofstream file(file_name);
782 for(
auto& itm :
mData)
785 file << ck::type_convert<float>(itm) << std::endl;
786 else if(dtype ==
"int")
787 file << ck::type_convert<int>(itm) << std::endl;
791 file << ck::type_convert<float>(itm) << std::endl;
799 throw std::runtime_error(std::string(
"unable to open file:") + file_name);
818 return mDesc.GetElementSpaceSize();
826 template <
typename F>
835 for(
size_t i = 0; i <
mDesc.GetLengths()[
rank]; i++)
842 template <
typename F>
845 std::vector<size_t> idx(
mDesc.GetNumOfDimension(), 0);
849 template <
typename F>
858 for(
size_t i = 0; i <
mDesc.GetLengths()[
rank]; i++)
865 template <
typename F>
868 std::vector<size_t> idx(
mDesc.GetNumOfDimension(), 0);
872 template <
typename G>
875 switch(
mDesc.GetNumOfDimension())
878 auto f = [&](
auto i) { (*this)(i) = g(i); };
883 auto f = [&](
auto i0,
auto i1) { (*this)(i0, i1) = g(i0, i1); };
888 auto f = [&](
auto i0,
auto i1,
auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
890 f,
mDesc.GetLengths()[0],
mDesc.GetLengths()[1],
mDesc.GetLengths()[2])(num_thread);
894 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3) {
895 (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
898 mDesc.GetLengths()[0],
899 mDesc.GetLengths()[1],
900 mDesc.GetLengths()[2],
901 mDesc.GetLengths()[3])(num_thread);
905 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4) {
906 (*this)(i0, i1, i2, i3, i4) = g(i0, i1, i2, i3, i4);
909 mDesc.GetLengths()[0],
910 mDesc.GetLengths()[1],
911 mDesc.GetLengths()[2],
912 mDesc.GetLengths()[3],
913 mDesc.GetLengths()[4])(num_thread);
917 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4,
auto i5) {
918 (*this)(i0, i1, i2, i3, i4, i5) = g(i0, i1, i2, i3, i4, i5);
921 mDesc.GetLengths()[0],
922 mDesc.GetLengths()[1],
923 mDesc.GetLengths()[2],
924 mDesc.GetLengths()[3],
925 mDesc.GetLengths()[4],
926 mDesc.GetLengths()[5])(num_thread);
930 auto f = [&](
auto i0,
942 (*this)(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11) =
943 g(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11);
946 mDesc.GetLengths()[0],
947 mDesc.GetLengths()[1],
948 mDesc.GetLengths()[2],
949 mDesc.GetLengths()[3],
950 mDesc.GetLengths()[4],
951 mDesc.GetLengths()[5],
952 mDesc.GetLengths()[6],
953 mDesc.GetLengths()[7],
954 mDesc.GetLengths()[8],
955 mDesc.GetLengths()[9],
956 mDesc.GetLengths()[10],
957 mDesc.GetLengths()[11])(num_thread);
960 default:
throw std::runtime_error(
"unspported dimension");
966 template <
typename Distribution = std::uniform_real_distribution<
float>,
967 typename Mapping = ck::
identity,
968 typename Generator = std::minstd_rand>
971 const Generator g = Generator(0),
972 std::size_t num_thread = -1)
976 if(num_thread == -1ULL)
980 constexpr std::size_t BLOCK_BYTES = 64;
981 constexpr std::size_t BLOCK_SIZE = BLOCK_BYTES /
sizeof(T);
986 std::vector<std::thread> threads;
987 threads.reserve(num_thread - 1);
988 const auto dst =
const_cast<T*
>(this->
mData.data());
990 for(
int it = num_thread - 1; it >= 0; --it)
992 std::size_t ib_begin = it * blocks_per_thread;
993 std::size_t ib_end =
min(ib_begin + blocks_per_thread, num_blocks);
1072 static_assert(
false,
"Unsupported packed size for T");
1075 std::size_t ib = ib_begin;
1076 for(; ib < ib_end - 1; ++ib)
1077 ck::static_for<0, BLOCK_SIZE, 1>{}([&](
auto iw_) {
1078 constexpr size_t iw = iw_.value;
1079 dst[ib * BLOCK_SIZE + iw] = t_fn();
1081 for(std::size_t iw = 0; iw < BLOCK_SIZE; ++iw)
1082 if(ib * BLOCK_SIZE + iw < element_space_size)
1083 dst[ib * BLOCK_SIZE + iw] = t_fn();
1087 threads.emplace_back(std::move(job));
1091 for(
auto& t : threads)
1095 template <
typename... Is>
1101 template <
typename... Is>
1104 return mData[
mDesc.GetOffsetFromMultiIndex(is...) /
1108 template <
typename... Is>
1111 return mData[
mDesc.GetOffsetFromMultiIndex(is...) /
1131 typename Data::const_iterator
begin()
const {
return mData.begin(); }
1133 typename Data::const_iterator
end()
const {
return mData.end(); }
1135 typename Data::const_pointer
data()
const {
return mData.data(); }
1137 typename Data::size_type
size()
const {
return mData.size(); }
1139 template <
typename U = T>
1142 constexpr std::size_t FromSize =
sizeof(T);
1143 constexpr std::size_t ToSize =
sizeof(U);
1145 using Element = std::add_const_t<std::remove_reference_t<U>>;
1149 template <
typename U = T>
1152 constexpr std::size_t FromSize =
sizeof(T);
1153 constexpr std::size_t ToSize =
sizeof(U);
1155 using Element = std::remove_reference_t<U>;
Definition utility/span.hpp:14
__host__ __device__ constexpr auto rank(const Layout< Shape, UnrolledDescriptorType > &layout)
Get layout rank (num elements in shape).
Definition layout_utils.hpp:310
auto call_f_unpack_args_impl(F f, T args, std::index_sequence< Is... >)
Definition library/utility/host_tensor.hpp:75
std::ostream & LogRangeAsType(std::ostream &os, Range &&range, std::string delim)
Definition library/utility/host_tensor.hpp:42
auto construct_f_unpack_args_impl(T args, std::index_sequence< Is... >)
Definition library/utility/host_tensor.hpp:89
std::ostream & LogRange(std::ostream &os, Range &&range, std::string delim)
Definition library/utility/host_tensor.hpp:27
HostTensorDescriptor transpose_host_tensor_descriptor_given_new2old(const HostTensorDescriptor &a, const New2Old &new2old, const NewLayout &new_layout=NewLayout())
Definition library/utility/host_tensor.hpp:599
auto call_f_unpack_args(F f, T args)
Definition library/utility/host_tensor.hpp:81
auto construct_f_unpack_args(F, T args)
Definition library/utility/host_tensor.hpp:95
auto make_ParallelTensorFunctor(F f, Xs... xs)
Definition library/utility/host_tensor.hpp:687
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
Definition utility/math.hpp:72
__host__ __device__ constexpr T min(T x)
Definition utility/math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition algorithm.hpp:25
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition algorithm.hpp:36
typename vector_type< float, 16 >::type float16_t
Definition dtype_vector.hpp:2148
unsigned int get_available_cpu_cores()
Definition thread.hpp:11
remove_cv_t< remove_reference_t< T > > remove_cvref_t
Definition type.hpp:297
integral_constant< index_t, N > Number
Definition number.hpp:12
typename vector_type< float, 2 >::type float2_t
Definition dtype_vector.hpp:2145
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
typename vector_type< float, 32 >::type float32_t
Definition dtype_vector.hpp:2149
constexpr bool is_same_v
Definition type.hpp:283
constexpr bool is_packed_type_v
Definition data_type.hpp:414
int64_t long_index_t
Definition ck.hpp:300
constexpr index_t packed_size_v
Definition data_type.hpp:411
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1517
A descriptor class for host tensors that manages tensor dimensions, strides, and layout.
Definition library/utility/host_tensor.hpp:171
HostTensorDescriptor()
Definition library/utility/host_tensor.hpp:208
void DispatchChosenLayout(ChosenLayout tag, const OrigLayout &orig, F &&f) const
Definition library/utility/host_tensor.hpp:214
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const std::initializer_list< ck::long_index_t > &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:509
const std::vector< std::size_t > & GetStrides() const
HostTensorDescriptor(const std::initializer_list< X > &lens, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:461
std::size_t GetElementSize() const
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:470
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition library/utility/host_tensor.hpp:561
void ValidateStrides(const Layout &layout) const
Definition library/utility/host_tensor.hpp:348
HostTensorDescriptor(std::vector< std::size_t > lens, std::vector< std::size_t > strides, const Layout &layout=DefaultLayout())
Definition library/utility/host_tensor.hpp:186
void CalculateStrides(const Layout &layout)
Definition library/utility/host_tensor.hpp:282
friend std::ostream & operator<<(std::ostream &os, const HostTensorDescriptor &desc)
HostTensorDescriptor(const std::initializer_list< X > &lens, const std::initializer_list< Y > &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:496
HostTensorDescriptor(const std::initializer_list< std::size_t > &lens, const Strides &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:522
ChosenLayout HandleDefaultLayout(const Layout &)
Definition library/utility/host_tensor.hpp:226
HostTensorDescriptor(const Lengths &lens, const Strides &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:542
const std::vector< std::size_t > & GetLengths() const
std::size_t GetNumOfDimension() const
friend std::ostream & operator<<(std::ostream &os, ChosenLayout tag)
HostTensorDescriptor(const Lengths &lens, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:484
std::size_t GetElementSpaceSize() const
BaseTensorLayout DefaultLayout
Definition library/utility/host_tensor.hpp:173
ck::tensor_layout::BaseTensorLayout BaseTensorLayout
Definition library/utility/host_tensor.hpp:172
ChosenLayout
Definition library/utility/host_tensor.hpp:178
@ Original
Definition library/utility/host_tensor.hpp:179
@ RowMajor
Definition library/utility/host_tensor.hpp:180
@ ColumnMajor
Definition library/utility/host_tensor.hpp:181
std::size_t GetOffsetFromMultiIndex(const std::vector< std::size_t > &iss) const
Definition library/utility/host_tensor.hpp:568
Layout wrapper that performs the tensor descriptor logic.
Definition layout.hpp:24
Definition library/utility/host_tensor.hpp:634
F mF
Definition library/utility/host_tensor.hpp:635
std::size_t mN1d
Definition library/utility/host_tensor.hpp:639
ParallelTensorFunctor(F f, Xs... xs)
Definition library/utility/host_tensor.hpp:641
std::array< std::size_t, NDIM > mLens
Definition library/utility/host_tensor.hpp:637
std::array< std::size_t, NDIM > mStrides
Definition library/utility/host_tensor.hpp:638
void operator()(std::size_t num_thread=1) const
Definition library/utility/host_tensor.hpp:664
std::array< std::size_t, NDIM > GetNdIndices(std::size_t i) const
Definition library/utility/host_tensor.hpp:651
static constexpr std::size_t NDIM
Definition library/utility/host_tensor.hpp:636
auto AsSpan() const
Definition library/utility/host_tensor.hpp:1140
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides, Rest &&... rest)
Definition library/utility/host_tensor.hpp:730
Tensor(const Lengths &lens, Rest &&... rest)
Definition library/utility/host_tensor.hpp:736
Tensor(const Lengths &lens, const Strides &strides)
Definition library/utility/host_tensor.hpp:715
std::size_t GetNumOfDimension() const
Definition library/utility/host_tensor.hpp:806
void ForEach(const F &&f) const
Definition library/utility/host_tensor.hpp:866
decltype(auto) GetLengths() const
Definition library/utility/host_tensor.hpp:802
Data::const_iterator end() const
Definition library/utility/host_tensor.hpp:1133
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition library/utility/host_tensor.hpp:1096
void ForEach(F &&f)
Definition library/utility/host_tensor.hpp:843
Data::pointer data()
Definition library/utility/host_tensor.hpp:1129
Tensor & operator=(const Tensor &)=default
void ForEach_impl(F &&f, std::vector< size_t > &idx, size_t rank)
Definition library/utility/host_tensor.hpp:827
std::size_t GetElementSpaceSizeInBytes() const
Definition library/utility/host_tensor.hpp:822
void ForEach_impl(const F &&f, std::vector< size_t > &idx, size_t rank) const
Definition library/utility/host_tensor.hpp:850
std::vector< T > Data
Definition library/utility/host_tensor.hpp:696
Data mData
Definition library/utility/host_tensor.hpp:1160
Data::iterator end()
Definition library/utility/host_tensor.hpp:1127
void GenerateTensorDistr(Distribution dis={0.f, 1.f}, Mapping fn={}, const Generator g=Generator(0), std::size_t num_thread=-1)
Definition library/utility/host_tensor.hpp:969
std::size_t GetElementSize() const
Definition library/utility/host_tensor.hpp:808
const T & operator()(const std::vector< std::size_t > &idx) const
Definition library/utility/host_tensor.hpp:1120
Tensor(const Lengths &lens, const Strides &strides, Rest &&... rest)
Definition library/utility/host_tensor.hpp:745
void SetZero()
Definition library/utility/host_tensor.hpp:824
Tensor(const Lengths &lens)
Definition library/utility/host_tensor.hpp:710
void savetxt(std::string file_name, std::string dtype="float")
Definition library/utility/host_tensor.hpp:776
Tensor(Tensor &&)=default
Data::const_pointer data() const
Definition library/utility/host_tensor.hpp:1135
auto AsSpan()
Definition library/utility/host_tensor.hpp:1150
Data::iterator begin()
Definition library/utility/host_tensor.hpp:1125
HostTensorDescriptor Descriptor
Definition library/utility/host_tensor.hpp:695
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides)
Definition library/utility/host_tensor.hpp:704
Tensor(const Tensor &)=default
const T & operator()(Is... is) const
Definition library/utility/host_tensor.hpp:1109
Tensor(const Descriptor &desc)
Definition library/utility/host_tensor.hpp:750
Descriptor mDesc
Definition library/utility/host_tensor.hpp:1159
Tensor & operator=(Tensor &&)=default
Data::const_iterator begin() const
Definition library/utility/host_tensor.hpp:1131
T & operator()(const std::vector< std::size_t > &idx)
Definition library/utility/host_tensor.hpp:1115
std::size_t GetElementSpaceSize() const
Definition library/utility/host_tensor.hpp:810
Tensor(const Tensor< FromT > &other)
Definition library/utility/host_tensor.hpp:773
T & operator()(Is... is)
Definition library/utility/host_tensor.hpp:1102
Data::size_type size() const
Definition library/utility/host_tensor.hpp:1137
Tensor< OutT > CopyAsType() const
Definition library/utility/host_tensor.hpp:753
void GenerateTensorValue(G g, std::size_t num_thread=1)
Definition library/utility/host_tensor.hpp:873
decltype(auto) GetStrides() const
Definition library/utility/host_tensor.hpp:804
Tensor(std::initializer_list< X > lens)
Definition library/utility/host_tensor.hpp:699
Tensor(std::initializer_list< X > lens, Rest &&... rest)
Definition library/utility/host_tensor.hpp:721
Definition tensor_operation/gpu/device/tensor_layout.hpp:10
Definition tensor_operation/gpu/device/tensor_layout.hpp:45
Definition tensor_operation/gpu/device/tensor_layout.hpp:31
Definition tensor_operation/gpu/device/tensor_layout.hpp:26
Definition dtype_vector.hpp:10
Definition library/utility/host_tensor.hpp:616
joinable_thread(joinable_thread &&)=default
joinable_thread(Xs &&... xs)
Definition library/utility/host_tensor.hpp:618
~joinable_thread()
Definition library/utility/host_tensor.hpp:625
joinable_thread & operator=(joinable_thread &&)=default
__host__ __device__ constexpr const auto & layout(const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Get Tensor Layout.
Definition tensor_utils.hpp:162