BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL > Struct Template Reference#
ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL > Struct Template Reference
#include <blockwise_gemm_pipeline_xdlops.hpp>
Static Public Member Functions | |
| static constexpr auto | Print () |
| static constexpr auto | Print () |
Static Public Attributes | |
| static constexpr index_t | WaveNumM = MPerBlock / (MRepeat * MPerXDL) |
| static constexpr index_t | WaveNumN = NPerBlock / (NRepeat * NPerXDL) |
| static constexpr index_t | WaveSize = BlockSize / (WaveNumM * WaveNumN) |
| static constexpr index_t | A_Buffer_Load_Inst_Num |
| static constexpr index_t | B_Buffer_Load_Inst_Num |
| static constexpr index_t | A_LDS_Write_Inst_Num |
| static constexpr index_t | B_LDS_Write_Inst_Num |
| static constexpr index_t | A_LDS_Read_Inst_Num |
| static constexpr index_t | B_LDS_Read_Inst_Num |
| static constexpr index_t | C_MFMA_Inst_Num |
| static constexpr index_t | A_LDS_Read_Width = ALDSReadWidth |
| static constexpr index_t | B_LDS_Read_Width = BLDSReadWidth |
| static constexpr index_t | C_MFMA_SpeedUp = IsF4F6 ? 2 : 1 |
| static constexpr index_t | C_MFMA_Inst_Cycle |
Member Function Documentation
◆ Print() [1/2]
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
inlinestaticconstexpr |
◆ Print() [2/2]
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
inlinestaticconstexpr |
Member Data Documentation
◆ A_Buffer_Load_Inst_Num
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
=
MPerBlock * KPerBlock / (BlockSize * ABufferLoadWidth)
◆ A_LDS_Read_Inst_Num
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
=
WaveNumN * MPerBlock * KPerBlock / (BlockSize * ALDSReadWidth)
static constexpr index_t WaveNumN
Definition blockwise_gemm_pipeline_wmmaops.hpp:29
◆ A_LDS_Read_Width
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
◆ A_LDS_Write_Inst_Num
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
=
MPerBlock * KPerBlock / (BlockSize * ALDSWriteWidth)
◆ B_Buffer_Load_Inst_Num
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
=
NPerBlock * KPerBlock / (BlockSize * BBufferLoadWidth)
◆ B_LDS_Read_Inst_Num
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
=
WaveNumM * MPerBlock * KPerBlock / (BlockSize * BLDSReadWidth)
static constexpr index_t WaveNumM
Definition blockwise_gemm_pipeline_wmmaops.hpp:28
◆ B_LDS_Read_Width
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
◆ B_LDS_Write_Inst_Num
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
=
NPerBlock * KPerBlock / (BlockSize * BLDSWriteWidth)
◆ C_MFMA_Inst_Cycle
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
= []() {
if constexpr(NPerXDL == 16)
{
}
else if constexpr(NPerXDL == 32)
{
}
}()
static constexpr index_t C_MFMA_SpeedUp
Definition blkgemmpipe_scheduler.hpp:103
◆ C_MFMA_Inst_Num
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
Initial value:
=
MPerBlock * NPerBlock * KPerBlock / (BlockSize / WaveSize) / (MPerXDL * NPerXDL * KPerXDL)
static constexpr index_t WaveSize
Definition blockwise_gemm_pipeline_xdlops.hpp:37
◆ C_MFMA_SpeedUp
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
◆ WaveNumM
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
◆ WaveNumN
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
◆ WaveSize
template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
|
staticconstexpr |
The documentation for this struct was generated from the following files: