block_universal_gemm_ar_flatbr_bquant_cr.hpp Source File#
block_universal_gemm_ar_flatbr_bquant_cr.hpp
Go to the documentation of this file.
CK_TILE_HOST_DEVICE constexpr auto make_embed_tile_distribution_encoding(OuterDstr, InnerDstr)
Definition tile_distribution_encoding.hpp:457
CK_TILE_DEVICE float amd_assembly_fp8_to_fp32(uint32_t src)
Definition tile/ops/elementwise/unary_element_wise_operation.hpp:258
CK_TILE_DEVICE float amd_assembly_bf8_to_fp32(uint32_t src)
Definition tile/ops/elementwise/unary_element_wise_operation.hpp:265
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
Definition tile/core/numeric/math.hpp:149
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
tuple_array< T, N > statically_indexed_array
Definition tile/core/container/statically_indexed_array.hpp:16
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:18
static constexpr index_t NWarp
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:49
remove_cvref_t< typename Problem::BQDataType > BQDataType
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:23
static constexpr index_t KIterPerWarp
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:59
static constexpr index_t QScalesPerWarpGemmRow
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:68
static constexpr index_t MWarp
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:48
static constexpr index_t m_preload
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:74
static CK_TILE_DEVICE constexpr auto MakeCBlockTile()
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:101
remove_cvref_t< typename BlockGemmShape::BlockWarps > BlockWarps
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:39
remove_cvref_t< typename Problem::QuantGroupSize > QuantGroupSize
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:27
remove_cvref_t< Problem_ > Problem
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:19
static constexpr auto idxK
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:37
remove_cvref_t< BlockPolicy_ > BlockPolicy
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:20
remove_cvref_t< typename Problem::CDataType > CDataType
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:24
remove_cvref_t< typename Problem::ADataType > ADataType
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:21
remove_cvref_t< typename BlockGemmShape::WarpTile > WarpTile
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:40
static constexpr auto I1
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:33
static constexpr index_t KPerBlock
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:52
static constexpr index_t MIterPerWarp
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:56
static CK_TILE_DEVICE float cvt_scale_to_fp32(T &scale)
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:79
static constexpr auto I0
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:32
static constexpr index_t QScalesPerBlockRow
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:66
remove_cvref_t< typename Problem::ComputeDataType > ComputeDataType
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:25
static constexpr auto I2
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:34
remove_cvref_t< typename Problem::BDataType > BDataType
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:22
CK_TILE_DEVICE void operator()(CBlockTensor &c_block_tensor, ABlockTensor &a_warp_tensor, BFlatBlockTensor &b_warp_tensor, BQBlockTensor &bq_block_tensor, ABlockWindow &a_warp_windows) const
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:126
remove_cvref_t< decltype(config.template at< 0 >())> WG
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:46
remove_cvref_t< typename BlockGemmShape::BlockTile > BlockTile
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:38
static constexpr index_t MPerBlock
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:51
static constexpr index_t kBlockSize
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:54
static constexpr index_t KPerBlockBQ
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:64
static constexpr auto config
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:42
static constexpr index_t NIterPerWarp
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:57
static constexpr index_t KIterPerQScale
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:71
static constexpr auto idxM
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:35
static constexpr index_t DsReadPreload
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:72
static constexpr auto idxN
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:36
static constexpr auto warp_size
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:44
static constexpr auto MIter_2nd_last
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:61
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition block_universal_gemm_ar_flatbr_bquant_cr.hpp:26
Definition tile/core/utility/functional.hpp:43
Definition tile_distribution_encoding.hpp:26
Definition tile/core/container/tuple.hpp:192