BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc > Struct Template Reference#
ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc > Struct Template Reference
#include <blockwise_gemm_pipeline_wmmaops_base.hpp>
Public Member Functions | |
| __device__ | BScale (GridDesc b_scale_grid_desc_, ThreadCopy b_scale_thread_copy_, GridBuffer b_scale_grid_buf_) |
| template<index_t NBuffer> | |
| __device__ void | GlobalLoad (bool cond) |
Public Attributes | |
| ThreadCopy | b_scale_thread_copy |
| GridDesc | b_scale_grid_desc |
| GridBuffer | b_scale_grid_buf |
| StaticallyIndexedArray< ThreadStaticBuffer, Number< NumberOfBuffers >{}> | b_scale_thread_bufs |
Static Public Attributes | |
| static constexpr index_t | num_scale_k_block = BScaleThreadDesc{}.GetLength(Number<1>{}) |
| static constexpr index_t | num_scale_krepeat = KRepeat / num_scale_k_block |
| static constexpr auto | b_scale_thread_desc = BScaleThreadDesc{} |
| static constexpr auto | b_scale_thread_copy_step |
Constructor & Destructor Documentation
◆ BScale()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
|
inline |
Member Function Documentation
◆ GlobalLoad()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
template<index_t NBuffer>
|
inline |
Member Data Documentation
◆ b_scale_grid_buf
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
| GridBuffer ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_grid_buf |
◆ b_scale_grid_desc
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
| GridDesc ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_grid_desc |
◆ b_scale_thread_bufs
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
| StaticallyIndexedArray<ThreadStaticBuffer, Number<NumberOfBuffers>{}> ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_thread_bufs |
◆ b_scale_thread_copy
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
| ThreadCopy ck::BlockwiseGemmWmmaops_pipeline_base< BlockSize, ADataType, BDataType, ComputeTypeA, ComputeTypeB, AccDataType, AWmmaTileDesc, BWmmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, MRepeat, NRepeat, KPack, TransposeC >::BScale< ScaleSliceSizeN, ScaleSliceSizeK, NWaves, ScaleBlockK, NumberOfBuffers, GridDesc, ThreadCopy, GridBuffer, ThreadStaticBuffer, BScaleThreadDesc >::b_scale_thread_copy |
◆ b_scale_thread_copy_step
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
|
staticconstexpr |
Initial value:
=
make_multi_index(-NPerBlock, 0),
make_multi_index(-NPerBlock, (KPerBlock + ScaleBlockK - 1) / ScaleBlockK))
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr index_t NWaves
Definition blockwise_gemm_pipeline_wmmaops_base.hpp:47
◆ b_scale_thread_desc
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
|
staticconstexpr |
◆ num_scale_k_block
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
|
staticconstexpr |
◆ num_scale_krepeat
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeTypeA, typename ComputeTypeB, typename AccDataType, typename AWmmaTileDesc, typename BWmmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, index_t KPack, bool TransposeC = false>
template<index_t ScaleSliceSizeN, index_t ScaleSliceSizeK, index_t NWaves, index_t ScaleBlockK, index_t NumberOfBuffers, typename GridDesc, typename ThreadCopy, typename GridBuffer, typename ThreadStaticBuffer, typename BScaleThreadDesc>
|
staticconstexpr |
The documentation for this struct was generated from the following file: