block_universal_gemm_as_bs_cr.hpp Source File#
block_universal_gemm_as_bs_cr.hpp
Go to the documentation of this file.
CK_TILE_HOST_DEVICE constexpr auto make_embed_tile_distribution_encoding(OuterDstr, InnerDstr)
Definition tile_distribution_encoding.hpp:457
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
TransposeTileDistributionTraits< TileDistributionEncoding_, DataType_, Policy, true > InputTileDistributionTraits
Definition load_tile_transpose.hpp:343
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_DEVICE auto load_tile_transpose(const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window)
transpose loads tile from a tensor and returns the resulting tensor with a new (transposed) tile dist...
Definition load_tile_transpose.hpp:403
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
array< index_t, N > multi_index
Definition tile/core/container/multi_index.hpp:17
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Intrawave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:16
@ Interwave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:17
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
Definition block_gemm_asmem_bsmem_creg_v1_default_policy.hpp:15
CK_TILE_DEVICE void LocalPrefetch(const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window, bool_constant< ALoadTranspose >={}, bool_constant< BLoadTranspose >={})
Definition block_universal_gemm_as_bs_cr.hpp:298
ALdsTile a_warp_tile_
Definition block_universal_gemm_as_bs_cr.hpp:291
decltype(make_static_distributed_tensor< ComputeDataType >(BLdsTileDistr)) BLdsTile
Definition block_universal_gemm_as_bs_cr.hpp:289
BLdsTile b_warp_tile_
Definition block_universal_gemm_as_bs_cr.hpp:292
static constexpr auto ALdsTileDistr
Definition block_universal_gemm_as_bs_cr.hpp:283
decltype(make_static_distributed_tensor< ComputeDataType >(ALdsTileDistr)) ALdsTile
Definition block_universal_gemm_as_bs_cr.hpp:288
CK_TILE_DEVICE void operator()(CBlockTensor &c_block_tensor, const ASmemBlockWindow &, const BSmemBlockWindow &, bool_constant< ALoadTranspose >={}, bool_constant< BLoadTranspose >={})
Definition block_universal_gemm_as_bs_cr.hpp:335
static constexpr auto BLdsTileDistr
Definition block_universal_gemm_as_bs_cr.hpp:285
static constexpr index_t KPerThread
Definition block_universal_gemm_as_bs_cr.hpp:387
static constexpr index_t NumMacClusters
Definition block_universal_gemm_as_bs_cr.hpp:388
static constexpr index_t KRepeat
Definition block_universal_gemm_as_bs_cr.hpp:391
static constexpr index_t KPerInnerLoop
Definition block_universal_gemm_as_bs_cr.hpp:389
decltype(make_static_distributed_tensor< ComputeDataType >(ALdsTileDistr)) ALdsTile
Definition block_universal_gemm_as_bs_cr.hpp:399
decltype(make_static_distributed_tensor< ComputeDataType >(BLdsTileDistr)) BLdsTile
Definition block_universal_gemm_as_bs_cr.hpp:400
static constexpr index_t KInnerLoopIter
Definition block_universal_gemm_as_bs_cr.hpp:392
CK_TILE_DEVICE void LocalPrefetch(const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window, bool_constant< ALoadTranspose >={}, bool_constant< BLoadTranspose >={})
Definition block_universal_gemm_as_bs_cr.hpp:410
ALdsTile a_warp_tile_
Definition block_universal_gemm_as_bs_cr.hpp:402
static constexpr auto BLdsTileDistr
Definition block_universal_gemm_as_bs_cr.hpp:396
static constexpr auto ALdsTileDistr
Definition block_universal_gemm_as_bs_cr.hpp:394
BLdsTile b_warp_tile_
Definition block_universal_gemm_as_bs_cr.hpp:403
CK_TILE_DEVICE void operator()(CBlockTensor &c_block_tensor, const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window, bool_constant< ALoadTranspose > a_load_tr={}, bool_constant< BLoadTranspose > b_load_tr={})
Definition block_universal_gemm_as_bs_cr.hpp:486
BLdsTile b_warp_tile_
Definition block_universal_gemm_as_bs_cr.hpp:203
decltype(make_static_distributed_tensor< ComputeDataType >(ALdsTileDistr)) ALdsTile
Definition block_universal_gemm_as_bs_cr.hpp:199
decltype(make_static_distributed_tensor< ComputeDataType >(BLdsTileDistr)) BLdsTile
Definition block_universal_gemm_as_bs_cr.hpp:200
CK_TILE_DEVICE void operator()(CBlockTensor &c_block_tensor, const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window, bool_constant< ALoadTranspose >={}, bool_constant< BLoadTranspose >={})
Definition block_universal_gemm_as_bs_cr.hpp:211
static constexpr auto BLdsTileDistr
Definition block_universal_gemm_as_bs_cr.hpp:196
static constexpr auto ALdsTileDistr
Definition block_universal_gemm_as_bs_cr.hpp:194
ALdsTile a_warp_tile_
Definition block_universal_gemm_as_bs_cr.hpp:202
Definition block_universal_gemm_as_bs_cr.hpp:188
Definition block_universal_gemm_as_bs_cr.hpp:21
remove_cvref_t< typename Traits::ComputeDataType > ComputeDataType
Definition block_universal_gemm_as_bs_cr.hpp:94
static constexpr index_t NIterPerWarp
Definition block_universal_gemm_as_bs_cr.hpp:102
static constexpr auto b_warp_y_index_zeros
Definition block_universal_gemm_as_bs_cr.hpp:125
typename WarpGemm::AWarpTensor AWarpTensor
Definition block_universal_gemm_as_bs_cr.hpp:113
remove_cvref_t< typename Traits::WarpGemm > WarpGemm
Definition block_universal_gemm_as_bs_cr.hpp:98
static constexpr auto a_warp_y_index_zeros
Definition block_universal_gemm_as_bs_cr.hpp:124
static CK_TILE_DEVICE constexpr auto MakeBBlockDistributionEncode()
Definition block_universal_gemm_as_bs_cr.hpp:161
static constexpr auto b_warp_y_lengths
Definition block_universal_gemm_as_bs_cr.hpp:119
typename WarpGemm::AWarpDstr AWarpDstr
Definition block_universal_gemm_as_bs_cr.hpp:109
typename WarpGemm::CWarpTensor CWarpTensor
Definition block_universal_gemm_as_bs_cr.hpp:115
GemmTraits_< Problem_, Policy_ > Traits
Definition block_universal_gemm_as_bs_cr.hpp:90
static constexpr index_t MWarp
Definition block_universal_gemm_as_bs_cr.hpp:104
remove_cvref_t< typename Traits::ADataType > ADataType
Definition block_universal_gemm_as_bs_cr.hpp:92
static constexpr index_t APackedSize
Definition block_universal_gemm_as_bs_cr.hpp:128
static constexpr index_t NWarp
Definition block_universal_gemm_as_bs_cr.hpp:105
typename WarpGemm::BWarpDstr BWarpDstr
Definition block_universal_gemm_as_bs_cr.hpp:110
CK_TILE_DEVICE void operator()(CBlockTensor &c_block_tensor, const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window, bool_constant< ALoadTranspose > a_load_tr={}, bool_constant< BLoadTranspose > b_load_tr={})
Definition block_universal_gemm_as_bs_cr.hpp:617
static constexpr index_t MIterPerWarp
Definition block_universal_gemm_as_bs_cr.hpp:101
static CK_TILE_DEVICE constexpr auto MakeCBlockTile()
Definition block_universal_gemm_as_bs_cr.hpp:581
typename WarpGemm::CWarpDstr CWarpDstr
Definition block_universal_gemm_as_bs_cr.hpp:111
CK_TILE_DEVICE void LocalPrefetch(const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window, bool_constant< ALoadTranspose > a_load_tr={}, bool_constant< BLoadTranspose > b_load_tr={})
Definition block_universal_gemm_as_bs_cr.hpp:603
static constexpr auto c_warp_y_index_zeros
Definition block_universal_gemm_as_bs_cr.hpp:126
CK_TILE_DEVICE auto operator()(const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window, bool_constant< ALoadTranspose > a_load_tr={}, bool_constant< BLoadTranspose > b_load_tr={})
Definition block_universal_gemm_as_bs_cr.hpp:631
remove_cvref_t< typename Traits::CDataType > CDataType
Definition block_universal_gemm_as_bs_cr.hpp:95
static constexpr auto c_warp_y_lengths
Definition block_universal_gemm_as_bs_cr.hpp:121
static constexpr auto a_warp_y_lengths
Definition block_universal_gemm_as_bs_cr.hpp:117
typename WarpGemm::BWarpTensor BWarpTensor
Definition block_universal_gemm_as_bs_cr.hpp:114
static constexpr auto Scheduler
Definition block_universal_gemm_as_bs_cr.hpp:107
static constexpr index_t BPackedSize
Definition block_universal_gemm_as_bs_cr.hpp:130
remove_cvref_t< InterleavedPKTypeLoader< ComputeDataType, UnaryOpSize_ > > Loader
Definition block_universal_gemm_as_bs_cr.hpp:97
static CK_TILE_DEVICE constexpr auto MakeABlockDistributionEncode()
Definition block_universal_gemm_as_bs_cr.hpp:136
remove_cvref_t< typename Traits::BDataType > BDataType
Definition block_universal_gemm_as_bs_cr.hpp:93
static constexpr index_t KIterPerWarp
Definition block_universal_gemm_as_bs_cr.hpp:100
Definition tile/core/numeric/numeric.hpp:81
Definition tile/core/container/sequence.hpp:49
Definition tile/core/utility/functional.hpp:43
Definition tile_distribution_encoding.hpp:26
Definition tile/core/container/tuple.hpp:192