load_tile_transpose.hpp Source File#
load_tile_transpose.hpp
Go to the documentation of this file.
235 static constexpr auto quad_output_ps_to_rhss_major0 = QuadOutputEncoding::ps_to_rhss_major_[I0];
236 static constexpr auto quad_output_ps_to_rhss_minor0 = QuadOutputEncoding::ps_to_rhss_minor_[I0];
CK_TILE_HOST_DEVICE constexpr auto make_embed_tile_distribution_encoding(OuterDstr, InnerDstr)
Definition tile_distribution_encoding.hpp:457
Definition load_tile_transpose.hpp:25
constexpr bool is_sequence_suffix_v
Definition load_tile_transpose.hpp:45
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_HOST_DEVICE constexpr auto generate_tuple_for(F &&f, sequence< ids... >)
Definition tile/core/container/tuple.hpp:423
CK_TILE_HOST_DEVICE constexpr auto tuple_reverse(const tuple< Ts... > &t)
Definition tile/core/container/tuple.hpp:583
TransposeTileDistributionTraits< TileDistributionEncoding_, DataType_, Policy, false > OutputTileDistributionTraits
Definition load_tile_transpose.hpp:338
TransposeTileDistributionTraits< TileDistributionEncoding_, DataType_, Policy, true > InputTileDistributionTraits
Definition load_tile_transpose.hpp:343
CK_TILE_HOST_DEVICE constexpr auto generate_sequence_v2(F &&f, number< N >)
Definition tile/core/container/sequence.hpp:1045
CK_TILE_HOST_DEVICE constexpr auto transform_tuples(F f, const X &x)
Definition tile/core/container/tuple.hpp:505
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_DEVICE auto load_tile_transpose(const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window)
transpose loads tile from a tensor and returns the resulting tensor with a new (transposed) tile dist...
Definition load_tile_transpose.hpp:403
CK_TILE_HOST_DEVICE constexpr auto generate_tuple(F &&f, number< N >)
Definition tile/core/container/tuple.hpp:429
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
CK_TILE_HOST_DEVICE constexpr index_t reduce_on_sequence(Seq, Reduce f, number< Init >)
Definition tile/core/container/sequence.hpp:982
CK_TILE_HOST_DEVICE constexpr auto InputTileDistributionEncoding()
Definition load_tile_transpose.hpp:351
Definition load_tile_transpose.hpp:55
tile_distribution_encoding< sequence<>, tuple< sequence< 4 >, sequence< LaneGroupSize/16, 4, 4 > >, tuple< sequence< 2, 1, 2 > >, tuple< sequence< 0, 0, 1 > >, sequence< 2 >, sequence< 2 > > InputEncoding
Definition load_tile_transpose.hpp:58
tile_distribution_encoding< sequence<>, tuple< sequence< LaneGroupSize >, sequence< 4 > >, tuple< sequence< 1 > >, tuple< sequence< 0 > >, sequence< 2 >, sequence< 0 > > OutputEncoding
Definition load_tile_transpose.hpp:66
Definition load_tile_transpose.hpp:77
tile_distribution_encoding< sequence<>, tuple< sequence< 8 >, sequence< LaneGroupSize/16, 2, 8 > >, tuple< sequence< 2, 1, 2 > >, tuple< sequence< 0, 0, 1 > >, sequence< 2 >, sequence< 2 > > InputEncoding
Definition load_tile_transpose.hpp:80
tile_distribution_encoding< sequence<>, tuple< sequence< LaneGroupSize >, sequence< 8 > >, tuple< sequence< 1 > >, tuple< sequence< 0 > >, sequence< 2 >, sequence< 0 > > OutputEncoding
Definition load_tile_transpose.hpp:88
Definition load_tile_transpose.hpp:178
static constexpr index_t LaneGroupSize
Definition load_tile_transpose.hpp:183
static constexpr bool value
Definition load_tile_transpose.hpp:179
Definition load_tile_transpose.hpp:118
static constexpr bool ys_mapping_valid
Definition load_tile_transpose.hpp:169
static constexpr auto I1
Definition load_tile_transpose.hpp:123
static constexpr bool suffix_valid_dim1
Definition load_tile_transpose.hpp:131
static constexpr bool value
Definition load_tile_transpose.hpp:172
static constexpr auto quad_ys_major
Definition load_tile_transpose.hpp:162
static constexpr auto quad_ys_minor
Definition load_tile_transpose.hpp:163
static constexpr auto quad_ps_minor0
Definition load_tile_transpose.hpp:139
static constexpr auto quad_hs
Definition load_tile_transpose.hpp:125
static constexpr auto input_ys_minor
Definition load_tile_transpose.hpp:161
static constexpr auto input_ps_major_last
Definition load_tile_transpose.hpp:141
static constexpr auto input_ps_minor_last
Definition load_tile_transpose.hpp:143
static constexpr auto I0
Definition load_tile_transpose.hpp:122
static constexpr auto shifted_quad_ps_minor0
Definition load_tile_transpose.hpp:148
static constexpr bool ps_mapping_valid
Definition load_tile_transpose.hpp:154
static constexpr auto input_ps_minor
Definition load_tile_transpose.hpp:136
static constexpr auto input_hs
Definition load_tile_transpose.hpp:124
static constexpr auto input_ps_major
Definition load_tile_transpose.hpp:135
ck_tile::sequence< input_hs[I0].size() - quad_hs[I0].size(), input_hs[I1].size() - quad_hs[I1].size()> psys_offset
Definition load_tile_transpose.hpp:146
static constexpr bool dims_valid
Definition load_tile_transpose.hpp:127
static constexpr bool suffix_valid_dim0
Definition load_tile_transpose.hpp:129
std::conditional_t< ReverseDirection, QuadOutputEncoding< LaneGroupSize >, QuadInputEncoding< LaneGroupSize > > QuadEncoding
Definition load_tile_transpose.hpp:119
static constexpr auto quad_ps_major0
Definition load_tile_transpose.hpp:138
static constexpr auto input_ys_major
Definition load_tile_transpose.hpp:160
Definition load_tile_transpose.hpp:52
static constexpr auto group_func
Definition load_tile_transpose.hpp:112
static constexpr auto transpose_dims
Definition load_tile_transpose.hpp:109
std::conditional_t< sizeof(DataType)==2, typename Quad16< LaneGroupSize >::InputEncoding, typename Quad8< LaneGroupSize >::InputEncoding > QuadInputEncoding
Definition load_tile_transpose.hpp:99
std::conditional_t< sizeof(DataType)==2, typename Quad16< LaneGroupSize >::OutputEncoding, typename Quad8< LaneGroupSize >::OutputEncoding > QuadOutputEncoding
Definition load_tile_transpose.hpp:104
Definition load_tile_transpose.hpp:192
static constexpr bool distr_encoding_valid
Definition load_tile_transpose.hpp:197
typename Policy::template ValidationTraits< InDstrEncode > Validator
Definition load_tile_transpose.hpp:195
typename remove_cvref_t< TileDistribution_ >::DstrEncode InDstrEncode
Definition load_tile_transpose.hpp:193
Definition load_tile_transpose.hpp:207
static constexpr auto quad_output_ps_to_rhss_minor0
Definition load_tile_transpose.hpp:236
static constexpr auto input_ys_to_rhs_major
Definition load_tile_transpose.hpp:229
static constexpr auto quad_input_ps_to_rhss_major0
Definition load_tile_transpose.hpp:233
static constexpr auto outer_input_ys_to_rhs_major
Definition load_tile_transpose.hpp:317
static constexpr auto quad_output_ps_to_rhss_major0
Definition load_tile_transpose.hpp:235
static constexpr index_t dim1
Definition load_tile_transpose.hpp:241
static constexpr auto dst_ps_to_rhss_major
Definition load_tile_transpose.hpp:268
static constexpr index_t LaneGroupSize
Definition load_tile_transpose.hpp:210
static constexpr auto quad_idx_offset
Definition load_tile_transpose.hpp:288
remove_cvref_t< TileDistributionEncoding_ > InDstrEncode
Definition load_tile_transpose.hpp:208
std::conditional_t< ReverseDirection, typename Policy::template QuadInputEncoding< LaneGroupSize >, typename Policy::template QuadOutputEncoding< LaneGroupSize > > QuadOutputEncoding
Definition load_tile_transpose.hpp:219
static constexpr auto dst_ys_to_rhs_minor
Definition load_tile_transpose.hpp:323
static constexpr auto dst_out_hs_lengthss
Definition load_tile_transpose.hpp:258
static constexpr auto quad_output_ys_to_rhs_minor
Definition load_tile_transpose.hpp:238
static constexpr auto I0
Definition load_tile_transpose.hpp:232
static constexpr auto quad_input_ps_to_rhss_minor0
Definition load_tile_transpose.hpp:234
static constexpr auto quad_input_hs_lengthss
Definition load_tile_transpose.hpp:224
static constexpr auto swap_one_and_two
Definition load_tile_transpose.hpp:243
static constexpr auto outer_hs_lengthss
Definition load_tile_transpose.hpp:250
static constexpr auto reversed_outer_hs_lengthss
Definition load_tile_transpose.hpp:257
static constexpr auto dst_ps_to_rhss_minor
Definition load_tile_transpose.hpp:297
static constexpr index_t dim0
Definition load_tile_transpose.hpp:240
static constexpr auto input_ys_to_rhs_minor
Definition load_tile_transpose.hpp:230
static constexpr auto quad_output_ps_minor_offset
Definition load_tile_transpose.hpp:292
static constexpr auto input_ps_to_rhss_major
Definition load_tile_transpose.hpp:227
static constexpr auto dst_ys_to_rhs_major
Definition load_tile_transpose.hpp:320
tile_distribution_encoding< typename InDstrEncode::RsLengths, remove_cvref_t< decltype(dst_out_hs_lengthss)>, remove_cvref_t< decltype(dst_ps_to_rhss_major)>, remove_cvref_t< decltype(dst_ps_to_rhss_minor)>, remove_cvref_t< decltype(dst_ys_to_rhs_major)>, remove_cvref_t< decltype(dst_ys_to_rhs_minor)> > TransposedDstrEncode
Definition load_tile_transpose.hpp:326
static constexpr auto quad_output_ys_to_rhs_major
Definition load_tile_transpose.hpp:237
static constexpr auto quad_output_hs_lengthss
Definition load_tile_transpose.hpp:225
std::conditional_t< ReverseDirection, typename Policy::template QuadOutputEncoding< LaneGroupSize >, typename Policy::template QuadInputEncoding< LaneGroupSize > > QuadInputEncoding
Definition load_tile_transpose.hpp:215
static constexpr auto input_ps_to_rhss_minor
Definition load_tile_transpose.hpp:228
static constexpr auto quad_output_ys_minor_offset
Definition load_tile_transpose.hpp:294
static constexpr auto input_hs_lengthss
Definition load_tile_transpose.hpp:209
Definition tile/core/container/sequence.hpp:287
typename std::conditional< kHasContent, type0, type1 >::type type
Definition tile/core/container/sequence.hpp:302
A fixed-size array container similar to std::array with additional utilities.
Definition tile/core/container/array.hpp:43
Definition tile/core/numeric/math.hpp:98
Definition tile/core/container/sequence.hpp:352
Definition tile/core/container/sequence.hpp:49
Definition tile/core/utility/functional.hpp:43
Definition tile_distribution_encoding.hpp:26
This class provides tile (windowed) view and access to the device memory.
Definition tile_window.hpp:46
Definition tile/core/container/tuple.hpp:192
static constexpr bool value
Definition load_tile_transpose.hpp:41
Definition load_tile_transpose.hpp:28
typename arithmetic_sequence_gen< start_pos, Sequence::size(), 1 >::type extract_indices
Definition load_tile_transpose.hpp:32
static constexpr bool value
Definition load_tile_transpose.hpp:34
static constexpr bool size_check
Definition load_tile_transpose.hpp:29
static constexpr index_t start_pos
Definition load_tile_transpose.hpp:31