DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector > Struct Template Reference
#include <device_gemm_xdl_skip_b_lds.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >:
Classes | |
| struct | Argument |
| struct | Invoker |
Public Types | |
| using | AGridDesc_K0_M_K1 = decltype(MakeAGridDescriptor_K0_M_K1(1, 1, 1)) |
| using | BGridDesc_K0_N_K1 = decltype(MakeBGridDescriptor_K0_N_K1(1, 1, 1)) |
| using | CGridDesc_M_N = decltype(MakeCGridDescriptor_M_N(1, 1, 1)) |
| template<index_t NXdlPerWave_> | |
| using | GridwiseGemmBase |
| using | GridwiseGemm64 = GridwiseGemmBase<math::max(NXdlPerWave64, 1)> |
| using | GridwiseGemm32 = GridwiseGemmBase<NXdlPerWave32> |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
| Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetInstanceString () const |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static auto | MakeAGridDescriptor_K0_M_K1 (index_t M, index_t K, index_t StrideA) |
| static auto | MakeBGridDescriptor_K0_N_K1 (index_t K, index_t N, index_t StrideB) |
| static auto | MakeCGridDescriptor_M_N (index_t M, index_t N, index_t StrideC) |
| static constexpr bool | IsValidCompilationParameter () |
| static bool | IsSupportedArgument (const Argument &arg) |
| static auto | MakeArgument (const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) |
| static auto | MakeInvoker () |
Static Public Attributes | |
| static GET_NXDL_PER_WAVE_IMPL constexpr auto | NXdlPerWave64 = GetNXdlPerWave<true>() |
| static constexpr auto | NXdlPerWave32 = GetNXdlPerWave<false>() |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | K1Number = Number<K1>{} |
Member Typedef Documentation
◆ AGridDesc_K0_M_K1
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
| using ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::AGridDesc_K0_M_K1 = decltype(MakeAGridDescriptor_K0_M_K1(1, 1, 1)) |
◆ BGridDesc_K0_N_K1
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
| using ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::BGridDesc_K0_N_K1 = decltype(MakeBGridDescriptor_K0_N_K1(1, 1, 1)) |
◆ CGridDesc_M_N
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
| using ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::CGridDesc_M_N = decltype(MakeCGridDescriptor_M_N(1, 1, 1)) |
◆ GridwiseGemm32
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
| using ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::GridwiseGemm32 = GridwiseGemmBase<NXdlPerWave32> |
◆ GridwiseGemm64
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
| using ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::GridwiseGemm64 = GridwiseGemmBase<math::max(NXdlPerWave64, 1)> |
◆ GridwiseGemmBase
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
template<index_t NXdlPerWave_>
| using ck::tensor_operation::device::DeviceGemmXdlSkipBLds< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, K0PerBlock, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferSrcScalarPerVector, BBlockBufferSize, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector >::GridwiseGemmBase |
Member Function Documentation
◆ GetTypeString()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument() [1/2]
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlinestatic |
◆ IsSupportedArgument() [2/2]
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsValidCompilationParameter()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlinestaticconstexpr |
◆ MakeAGridDescriptor_K0_M_K1()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlinestatic |
◆ MakeArgument()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlinestatic |
◆ MakeArgumentPointer()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlineoverridevirtual |
◆ MakeBGridDescriptor_K0_N_K1()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlinestatic |
◆ MakeCGridDescriptor_M_N()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlinestatic |
◆ MakeInvoker()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlinestatic |
◆ MakeInvokerPointer()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
inlineoverridevirtual |
Member Data Documentation
◆ I0
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
staticconstexpr |
◆ I1
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
staticconstexpr |
◆ I2
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
staticconstexpr |
◆ K1Number
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
staticconstexpr |
◆ NXdlPerWave32
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
staticconstexpr |
◆ NXdlPerWave64
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t K0PerBlock, ck::index_t K1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockBufferSize, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector>
|
staticconstexpr |
The documentation for this struct was generated from the following file: