Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference

Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference
ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference

#include <blockwise_gemm_pipeline_xdlops_v2_b_scale.hpp>

Inheritance diagram for ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >:
ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >

Public Types

using Base
using ComputeDataTypeBuf = typename Base::ComputeDataTypeBuf
Public Types inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
using ThisThreadBlock = ThisThreadBlock<BlockSize>
using ComputeDataTypeBuf
using HotLoopInstList
using Tuple4 = decltype(CalculateAThreadOriginDataIndex())

Public Member Functions

template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CThreadBuffer, typename BScaleGridBuffer, typename BScaleGridDesc, typename BScaleThreadDesc, typename BScaleThreadTransfer, typename BScaleThreadTransferStep>
__device__ void Run (const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, CThreadBuffer &c_thread_buf, const BScaleGridDesc &b_scale_grid_desc, const BScaleThreadDesc &b_scale_thread_desc, BScaleThreadTransfer &b_scale_thread_copy, const BScaleGridBuffer &b_scale_grid_buf, const BScaleThreadTransferStep &b_scale_thread_copy_step, index_t num_loop, index_t num_loop_per_scale) const
__host__ __device__ constexpr auto & GetCThreadBuffer ()
Public Member Functions inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmXdlops_pipeline_base (Tuple4 a_origin=CalculateAThreadOriginDataIndex(), Tuple4 b_origin=CalculateBThreadOriginDataIndex())
 Constructor for BlockwiseGemmXdlops_pipeline_base.

Static Public Member Functions

__host__ static __device__ constexpr bool BlockHasHotloop (index_t num_loop)
__host__ static __device__ constexpr TailNumber BlockLoopTailNum (index_t num_loop)
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 ()
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n)
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
Static Public Member Functions inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex ()
static __device__ auto CalculateAThreadOriginDataIndex6D ()
static __device__ auto CalculateBThreadOriginDataIndex ()
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
static __device__ auto CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n)
__host__ static __device__ constexpr auto GetCThreadDesc ()

Static Public Attributes

static constexpr index_t NumMacClusters = CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS
static constexpr index_t KPerInnerLoop = math::max(KPerThread / NumMacClusters, KPack)
static constexpr index_t KRepeat = KPerThread / KPerInnerLoop
static constexpr index_t WgpPerCU
static constexpr index_t FullMemBandPrefetchStages
static constexpr index_t PrefetchStages
static constexpr index_t PrefillStages = 1
static constexpr index_t GlobalBufferNum = PrefetchStages
static constexpr index_t A_K1
static constexpr index_t B_K1
static constexpr auto I0
static constexpr auto I1
static constexpr index_t KPerThread
static constexpr auto xdlops_gemm
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_k
static constexpr BMmaTileDesc b_block_desc_n0_n1_n2_k
static constexpr index_t WaveSize
Static Public Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr index_t MWaves = MPerBlock / (MRepeat * MPerXDL)
static constexpr index_t NWaves = NPerBlock / (NRepeat * NPerXDL)
static constexpr index_t WaveSize = BlockSize / MWaves / NWaves
static constexpr index_t A_K0 = ATileDesc{}.GetLength(I0)
static constexpr index_t B_K0 = BTileDesc{}.GetLength(I0)
static constexpr index_t A_K1 = ATileDesc{}.GetLength(I2)
static constexpr index_t B_K1
static constexpr auto xdlops_gemm
static constexpr index_t AMmaKStride = KPack
static constexpr index_t BMmaKStride = KPack
static constexpr index_t KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops
static constexpr index_t KRepeat = KPerThread / KPack
static constexpr index_t KPerInnerLoop = KPack
static constexpr index_t KGroup
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_k
static constexpr BMmaTileDesc b_block_desc_n0_n1_n2_k

Protected Types

using AThreadCopy
using BThreadCopy
Protected Types inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
using AThreadCopy
using BThreadCopy

Protected Attributes

AThreadCopy a_thread_copy_ {Base::CalculateAThreadOriginDataIndex()}
BThreadCopy b_thread_copy_ {Base::CalculateBThreadOriginDataIndex()}
Protected Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
AThreadCopy a_thread_copy_
BThreadCopy b_thread_copy_

Static Protected Attributes

static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_
Static Protected Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_

Additional Inherited Members

Public Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > c_thread_buf_

Member Typedef Documentation

◆ AThreadCopy

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
using ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::AThreadCopy
protected
Initial value:
decltype(a_thread_desc_),
3,
static constexpr auto a_block_desc_m0_m1_m2_k
Definition blockwise_gemm_dpp.hpp:254
static constexpr auto a_thread_desc_
Definition blockwise_gemm_dpp.hpp:312
static constexpr index_t A_K1
Definition blockwise_gemm_dpp.hpp:52
conditional_t< std::is_same< ComputeDataType, ck::tf32_t >::value, float, ComputeDataType > ComputeDataTypeBuf
Definition blockwise_gemm_pipeline_xdlops_base.hpp:57
Definition utility/sequence.hpp:43
Definition threadwise_tensor_slice_transfer.hpp:1260

◆ Base

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
using ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Base
Initial value:
ADataType,
BDataType,
ComputeDataType,
AccDataType,
ATileDesc,
BTileDesc,
AMmaTileDesc,
BMmaTileDesc,
ABlockTransferSrcScalarPerVector,
BBlockTransferSrcScalarPerVector,
MPerBlock,
NPerBlock,
KPerBlock,
MPerXDL,
NPerXDL,
MRepeat,
NRepeat,
KPack>
__host__ __device__ BlockwiseGemmXdlops_pipeline_base(Tuple4 a_origin=CalculateAThreadOriginDataIndex(), Tuple4 b_origin=CalculateBThreadOriginDataIndex())
Constructor for BlockwiseGemmXdlops_pipeline_base.
Definition blockwise_gemm_pipeline_xdlops_base.hpp:222

◆ BThreadCopy

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
using ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BThreadCopy
protected
Initial value:
decltype(b_thread_desc_),
3,
static constexpr index_t B_K1
Definition blockwise_gemm_dpp.hpp:53
static constexpr auto b_thread_desc_
Definition blockwise_gemm_dpp.hpp:316
static constexpr auto b_block_desc_n0_n1_n2_k
Definition blockwise_gemm_dpp.hpp:255

◆ ComputeDataTypeBuf

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
using ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::ComputeDataTypeBuf = typename Base::ComputeDataTypeBuf

Member Function Documentation

◆ BlockHasHotloop()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr bool ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BlockHasHotloop ( index_t num_loop)
inlinestaticconstexpr

◆ BlockLoopTailNum()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr TailNumber ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::BlockLoopTailNum ( index_t num_loop)
inlinestaticconstexpr

◆ CalculateCThreadOriginDataIndex()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
__device__ auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::CalculateCThreadOriginDataIndex ( Number< m0 > ,
Number< n0 > ,
Number< xdlops_i > ,
Number< blk_i >  )
inlinestatic

◆ CalculateCThreadOriginDataIndex8D()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<index_t m0, index_t n0, index_t xdlops_i, index_t blk_i>
__device__ auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::CalculateCThreadOriginDataIndex8D ( Number< m0 > ,
Number< n0 > ,
Number< xdlops_i > ,
Number< blk_i >  )
inlinestatic

◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 ( )
inlinestaticconstexpr

◆ GetCThreadBuffer()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ __device__ constexpr auto & ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::GetCThreadBuffer ( )
inlineconstexpr

◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( )
inlinestaticconstexpr

◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 ( )
inlinestaticconstexpr

◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename CGridDesc_G_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_G_M_N & c_grid_desc_g_m_n)
inlinestaticconstexpr

◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename CGridDesc_M_N>
__host__ static __device__ constexpr auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ( const CGridDesc_M_N & c_grid_desc_m_n)
inlinestaticconstexpr

◆ Run()

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<bool HasMainLoop, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CThreadBuffer, typename BScaleGridBuffer, typename BScaleGridDesc, typename BScaleThreadDesc, typename BScaleThreadTransfer, typename BScaleThreadTransferStep>
__device__ void ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Run ( const AGridDesc & a_grid_desc,
const ABlockDesc & a_block_desc,
ABlockTransfer & a_blockwise_copy,
const AGridBuffer & a_grid_buf,
ABlockBuffer & a_block_buf,
const ABlockTransferStep & a_block_copy_step,
const BGridDesc & b_grid_desc,
const BBlockDesc & b_block_desc,
BBlockTransfer & b_blockwise_copy,
const BGridBuffer & b_grid_buf,
BBlockBuffer & b_block_buf,
const BBlockTransferStep & b_block_copy_step,
CThreadBuffer & c_thread_buf,
const BScaleGridDesc & b_scale_grid_desc,
const BScaleThreadDesc & b_scale_thread_desc,
BScaleThreadTransfer & b_scale_thread_copy,
const BScaleGridBuffer & b_scale_grid_buf,
const BScaleThreadTransferStep & b_scale_thread_copy_step,
index_t num_loop,
index_t num_loop_per_scale ) const
inline

Member Data Documentation

◆ a_block_desc_m0_m1_m2_k

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
AMmaTileDesc ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::a_block_desc_m0_m1_m2_k
staticconstexpr

◆ A_K1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::A_K1
staticconstexpr

◆ a_thread_copy_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
AThreadCopy ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_thread_copy_ {Base::CalculateAThreadOriginDataIndex()}
protected

◆ a_thread_desc_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::a_thread_desc_
staticconstexprprotected
Initial value:
I1))
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
Definition blockwise_gemm_pipeline_xdlops_v2_b_scale.hpp:37

◆ b_block_desc_n0_n1_n2_k

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
BMmaTileDesc ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::b_block_desc_n0_n1_n2_k
staticconstexpr

◆ B_K1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::B_K1
staticconstexpr

◆ b_thread_copy_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
BThreadCopy ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_thread_copy_ {Base::CalculateBThreadOriginDataIndex()}
protected

◆ b_thread_desc_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::b_thread_desc_
staticconstexprprotected

◆ c_thread_desc_

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::c_thread_desc_
staticconstexprprotected

◆ FullMemBandPrefetchStages

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::FullMemBandPrefetchStages
staticconstexpr
Initial value:
32768 / WgpPerCU,
(MPerBlock * sizeof(ADataType) + NPerBlock * sizeof(BDataType)) * KPerBlock)
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
Definition utility/math.hpp:72

◆ GlobalBufferNum

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::GlobalBufferNum = PrefetchStages
staticconstexpr

◆ I0

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::I0
staticconstexpr

◆ I1

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::I1
staticconstexpr

◆ KPerInnerLoop

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KPerInnerLoop = math::max(KPerThread / NumMacClusters, KPack)
staticconstexpr

◆ KPerThread

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::KPerThread
staticconstexpr

◆ KRepeat

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::KRepeat = KPerThread / KPerInnerLoop
staticconstexpr

◆ NumMacClusters

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::NumMacClusters = CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS
staticconstexpr

◆ PrefetchStages

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::PrefetchStages
staticconstexpr

◆ PrefillStages

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::PrefillStages = 1
staticconstexpr

◆ WaveSize

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::WaveSize
staticconstexpr

◆ WgpPerCU

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
index_t ck::BlockwiseGemmXdlops_pipeline_v2_b_scale< BlockGemmPipelineScheduler::Interwave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::WgpPerCU
staticconstexpr

◆ xdlops_gemm

template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
auto ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, TransposeC >::xdlops_gemm
staticconstexpr

The documentation for this struct was generated from the following file: