device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp Source File#
device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp
Go to the documentation of this file.
28#include "ck_tile/builder/reflect/instance_traits_device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp"
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
Definition tensor_operation/gpu/device/tensor_layout.hpp:42
Definition convolution_backward_data_specialization.hpp:8
constexpr bool is_NSpatialGC_GKSpatial_NSpatialGK()
Definition device_grouped_conv_utils.hpp:119
GemmSpecialization
Definition gemm_specialization.hpp:11
decltype(std::declval< T & >().IsTuple()) is_tuple
Definition device_grouped_conv_fwd_multiple_abd.hpp:23
ConvolutionForwardSpecialization
Definition convolution_forward_specialization.hpp:15
@ Filter1x1Stride1Pad0
Definition convolution_forward_specialization.hpp:18
@ Filter3x3
Definition convolution_forward_specialization.hpp:20
@ Filter1x1Pad0
Definition convolution_forward_specialization.hpp:17
std::string getConvForwardSpecializationString(const ConvolutionForwardSpecialization &s)
Definition convolution_forward_specialization.hpp:24
Definition convolution_backward_data_specialization.hpp:7
Definition ck.hpp:268
typename tuple_element< I, TTuple >::type tuple_element_t
Definition utility/tuple.hpp:208
__device__ uint32_t amd_wave_read_first_lane(uint32_t value)
Definition amd_wave_read_first_lane.hpp:100
__host__ __device__ void array_convert(std::array< Y, NumElems > &y, const std::array< X, NumElems > &x)
Definition utility/type_convert.hpp:2466
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
constexpr LoopScheduler make_default_loop_scheduler()
Definition loop_scheduler.hpp:20
Definition ck/stream_config.hpp:10
Definition utility/array.hpp:14
Definition gridwise_gemm_multiple_d_xdl_cshuffle.hpp:78
__host__ static __device__ constexpr auto MakeDefaultBGridDescriptor_BK0_N_BK1(const BGridDesc_N_K &b_grid_desc_n_k)
Definition gridwise_gemm_multiple_d_xdl_cshuffle.hpp:207
__host__ static __device__ constexpr auto MakeDefaultAGridDescriptor_AK0_M_AK1(const AGridDesc_M_K &a_grid_desc_m_k)
Definition gridwise_gemm_multiple_d_xdl_cshuffle.hpp:190
__host__ static __device__ constexpr auto MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(const EGridDesc_M_N &e_grid_desc_m_n)
Definition gridwise_gemm_multiple_d_xdl_cshuffle.hpp:224
__host__ static __device__ constexpr auto MakeDefaultBlock2ETileMap(const EGridDesc_M_N &e_grid_desc_m_n)
Definition gridwise_gemm_multiple_d_xdl_cshuffle.hpp:257
__host__ static __device__ constexpr auto MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(const DsGridDesc_M_N &ds_grid_desc_m_n)
Definition gridwise_gemm_multiple_d_xdl_cshuffle.hpp:245
Definition multi_index_transform.hpp:196
Definition multi_index_transform.hpp:284
Definition utility/integral_constant.hpp:20
Definition functional2.hpp:33
Definition tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp:25
__host__ auto SplitConvProblem(const ADataType *a_grid_ptr_base, DsPointer &ds_grid_ptr_base, CDataType *c_grid_ptr_base) const
Definition tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp:393
Definition device_base.hpp:197
BaseArgument()=default
BaseInvoker()=default
virtual std::string GetInstanceString() const
Definition device_base.hpp:230
Grouped Convolution Forward.
Definition device_grouped_conv_fwd_multiple_abd.hpp:73
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:469
bool is_split_valid_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:683
index_t gemms_count_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:680
index_t valid_gemms_count_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:681
std::array< long_index_t, NDimSpatial+3 > b_g_k_c_xs_lengths_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:697
BElementwiseOperation b_element_op_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:691
ComputePtrOffsetOfStridedBatch< I1, I1, NumDTensor > compute_ptr_offset_of_groups_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:686
std::array< long_index_t, NDimSpatial+3 > e_g_n_k_wos_lengths_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:701
std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > ds_g_n_k_wos_lengths_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:699
void init_gemm_args(const ADataType *a_ptr, const BDataType *b_ptr, DsPointer ds_ptr, EDataType *e_ptr, const AGridDesc_M_K &a_grid_desc_m_k, const BGridDesc_N_K &b_grid_desc_n_k, const DsGridDesc_M_N_ &ds_grid_desc_m_n, const EGridDescriptor_M_N_ &e_grid_desc_m_n, const Block2ETileMap &block_2_etile_map, index_t BlockStart, index_t BlockEnd)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:471
std::array< long_index_t, NDimSpatial > conv_filter_dilations_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:704
index_t num_group_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:674
std::array< long_index_t, NDimSpatial+3 > e_g_n_k_wos_strides_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:702
Array< GemmArgs, MaxGemmsNum > gemm_desc_kernel_args_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:677
CDEElementwiseOperation cde_element_op_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:692
Argument(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< long_index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< long_index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< long_index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< long_index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_lengths, const std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_strides, const std::array< long_index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< long_index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< long_index_t, NDimSpatial > &conv_filter_strides, const std::array< long_index_t, NDimSpatial > &conv_filter_dilations, const std::array< long_index_t, NDimSpatial > &input_left_pads, const std::array< long_index_t, NDimSpatial > &input_right_pads, const AElementwiseOperation &a_element_op, const BElementwiseOperation &b_element_op, const CDEElementwiseOperation &cde_element_op)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:507
AElementwiseOperation a_element_op_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:690
ComputePtrOffsetOfStridedBatch< I1, I1, NumDTensor > compute_ptr_offset_of_n_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:687
std::array< long_index_t, NDimSpatial > input_right_pads_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:706
std::array< long_index_t, NDimSpatial+3 > a_g_n_c_wis_strides_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:696
std::array< long_index_t, NDimSpatial+3 > a_g_n_c_wis_lengths_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:695
void Print() const
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:659
std::array< long_index_t, NDimSpatial+3 > b_g_k_c_xs_strides_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:698
index_t conv_N_per_block_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:675
std::array< long_index_t, NDimSpatial > input_left_pads_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:705
std::array< long_index_t, NDimSpatial > conv_filter_strides_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:703
index_t grid_size_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:679
std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > ds_g_n_k_wos_strides_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:700
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:448
DsPointer ds_ptr_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:452
BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:457
ck::index_t BlockEnd_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:464
AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:456
const ADataType * a_ptr_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:450
const BDataType * b_ptr_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:451
Block2ETileMap block_2_etile_map_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:463
DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock ds_grid_desc_mblock_mperblock_nblock_nperblock_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:459
EDataType * e_ptr_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:453
EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock e_grid_desc_mblock_mperblock_nblock_nperblock_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:460
ck::index_t BlockStart_
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:464
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:711
float RunImp(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:715
DeviceOp::Argument Argument
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:713
INVOKER_RUN_IMPL float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:771
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:206
static auto CastDsPointers(const std::array< const void *, NumDTensor > &p_ds)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:293
static auto MakeArgument(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< long_index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< long_index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< long_index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< long_index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_lengths, const std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_strides, const std::array< long_index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< long_index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< long_index_t, NDimSpatial > &conv_filter_strides, const std::array< long_index_t, NDimSpatial > &conv_filter_dilations, const std::array< long_index_t, NDimSpatial > &input_left_pads, const std::array< long_index_t, NDimSpatial > &input_right_pads, const AElementwiseOperation &a_element_op, const BElementwiseOperation &b_element_op, const CDEElementwiseOperation &cde_element_op)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:1033
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:1195
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor::GetTypeString
std::string GetTypeString() const override
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:1200
remove_cvref_t< decltype(GridwiseGemm64::MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock( EGridDesc_M_N{}))> EGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:439
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor::BGridDesc_N_K
remove_cvref_t< decltype(MakeBGridDescriptor_N_K< BLayout >(dummy_conv_to_gemm_transformer))> BGridDesc_N_K
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:308
GridwiseGemmMultipleD_xdl_cshuffle< GridwiseGemmTemplateParameters > GridwiseGemmBase
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:425
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:426
static constexpr auto I2
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:220
static auto MakeBGridDescriptor_N_K(const ConvToGemmFwdTransformerIndexT &conv_to_gemm_transformer)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:257
static constexpr index_t MaxGemmsNum
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:213
static bool IsSupportedArgument(const Argument &arg)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:778
static constexpr auto I0
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:218
decltype(CastDsPointers(std::array< const void *, NumDTensor >{})) DsPointer
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:303
static constexpr bool DoElementwiseBeforeCShuffle
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:214
remove_cvref_t< decltype(GridwiseGemm64::MakeDefaultBlock2ETileMap(EGridDesc_M_N{}))> Block2ETileMap
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:444
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:956
remove_cvref_t< decltype(MakeDsGridDescriptor_M_N(dummy_conv_to_gemm_transformer))> DsGridDesc_M_N
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:310
static auto MakeDsGridDescriptor_M_N(const ConvToGemmFwdTransformerIndexT &conv_to_gemm_transformer)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:282
static constexpr ConvToGemmFwdTransformerIndexT dummy_conv_to_gemm_transformer
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:305
static constexpr index_t NumDTensor
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:212
static auto MakeAGridDescriptor_M_K(const ConvToGemmFwdTransformerIndexT &conv_to_gemm_transformer)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:244
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor::AGridDesc_M_K
remove_cvref_t< decltype(MakeAGridDescriptor_M_K< ALayout >(dummy_conv_to_gemm_transformer))> AGridDesc_M_K
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:306
static auto MakeArgument(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const AElementwiseOperation &a_element_op, const BElementwiseOperation &b_element_op, const CDEElementwiseOperation &cde_element_op)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:961
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor::EGridDesc_M_N
remove_cvref_t< decltype(MakeEGridDescriptor_M_N< ELayout >(dummy_conv_to_gemm_transformer))> EGridDesc_M_N
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:312
DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor DeviceOp
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:207
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_lengths, const std::array< std::array< index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_strides, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const AElementwiseOperation &a_element_op, const BElementwiseOperation &b_element_op, const CDEElementwiseOperation &cde_element_op) override
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:1078
static auto MakeEGridDescriptor_M_N(const ConvToGemmFwdTransformerIndexT &conv_to_gemm_transformer)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:270
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor::NXdlPerWave32
static constexpr auto NXdlPerWave32
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:210
remove_cvref_t< decltype(GridwiseGemm64::MakeDefaultAGridDescriptor_AK0_M_AK1( AGridDesc_M_K{}))> AGridDesc_AK0_M_AK1
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:430
TransformConvFwdToGemm< NDimSpatial, ConvForwardSpecialization, true, ADataType, EDataType, I1, index_t > ConvToGemmFwdTransformerIndexT
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:223
static auto GenerateConvToGemmTransforms(ConvToGemmFwdTransformerLongIndexT conv_to_gemm_transformer_base, const ADataType *a_grid_ptr_base, DsPointer ds_grid_ptr_base, EDataType *c_grid_ptr_base)
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:316
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor::matrix_padder
static constexpr auto matrix_padder
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:239
TransformConvFwdToGemm< NDimSpatial, ConvForwardSpecialization, true, ADataType, EDataType, I1, long_index_t > ConvToGemmFwdTransformerLongIndexT
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:231
GridwiseGemmBase< NXdlPerWave32 > GridwiseGemm32
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:427
static constexpr auto I3
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:221
static auto MakeInvoker()
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:1076
static constexpr auto I1
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:219
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor::NXdlPerWave64
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:209
remove_cvref_t< decltype(GridwiseGemm64::MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock( DsGridDesc_M_N{}))> DsGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:436
remove_cvref_t< decltype(GridwiseGemm64::MakeDefaultBGridDescriptor_BK0_N_BK1( BGridDesc_N_K{}))> BGridDesc_BK0_N_BK1
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:433
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, const std::array< const void *, NumDTensor > &p_ds, void *p_e, const std::array< long_index_t, NDimSpatial+3 > &a_g_n_c_wis_lengths, const std::array< long_index_t, NDimSpatial+3 > &a_g_n_c_wis_strides, const std::array< long_index_t, NDimSpatial+3 > &b_g_k_c_xs_lengths, const std::array< long_index_t, NDimSpatial+3 > &b_g_k_c_xs_strides, const std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_lengths, const std::array< std::array< long_index_t, NDimSpatial+3 >, NumDTensor > &ds_g_n_k_wos_strides, const std::array< long_index_t, NDimSpatial+3 > &e_g_n_k_wos_lengths, const std::array< long_index_t, NDimSpatial+3 > &e_g_n_k_wos_strides, const std::array< long_index_t, NDimSpatial > &conv_filter_strides, const std::array< long_index_t, NDimSpatial > &conv_filter_dilations, const std::array< long_index_t, NDimSpatial > &input_left_pads, const std::array< long_index_t, NDimSpatial > &input_right_pads, const AElementwiseOperation &a_element_op, const BElementwiseOperation &b_element_op, const CDEElementwiseOperation &cde_element_op) override
Definition device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp:1151
Definition matrix_padder.hpp:180