batched_transpose_pipeline.hpp Source File

batched_transpose_pipeline.hpp Source File#

Composable Kernel: batched_transpose_pipeline.hpp Source File
batched_transpose_pipeline.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include "ck_tile/core.hpp"
8
9namespace ck_tile {
10
11template <typename Problem_, typename Policy_ = BatchedTransposePolicy>
13{
14 // TODO: this kernel only support warp per row
17
18 template <typename InputWindow, typename OutputWindow>
19 CK_TILE_DEVICE auto operator()(const InputWindow& input_window, OutputWindow& out_window)
20 {
21 auto inp_win =
22 make_tile_window(input_window, Policy::template MakeInputDistribution<Problem>());
23
24 auto input_tile = load_tile(inp_win);
25
27 Policy::template MakeOutputDistribution<Problem>());
28
29 transpose_tile2d(output_tile, input_tile);
30
31 auto out_win =
32 make_tile_window(out_window, Policy::template MakeOutputDistribution<Problem>());
33
34 store_tile(out_win, output_tile);
35 }
36};
37} // namespace ck_tile
#define CK_TILE_DEVICE
Definition config.hpp:41
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE void transpose_tile2d(OutTensor &out, const InTensor &in)
Definition transpose_tile.hpp:195
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
Definition batched_transpose_pipeline.hpp:13
CK_TILE_DEVICE auto operator()(const InputWindow &input_window, OutputWindow &out_window)
Definition batched_transpose_pipeline.hpp:19
ck_tile::remove_cvref_t< Problem_ > Problem
Definition batched_transpose_pipeline.hpp:15
ck_tile::remove_cvref_t< Policy_ > Policy
Definition batched_transpose_pipeline.hpp:16