detail Namespace Reference

detail Namespace Reference#

Composable Kernel: ck_tile::detail Namespace Reference
ck_tile::detail Namespace Reference

Classes

struct  pick_sequence_elements_by_mask_impl
struct  pick_sequence_elements_by_mask_impl< WorkSeq, sequence<>, sequence<> >
struct  modify_sequence_elements_by_ids_impl
struct  modify_sequence_elements_by_ids_impl< WorkSeq, sequence<>, sequence<> >
struct  sorted_sequence_histogram
struct  sorted_sequence_histogram< h_idx, sequence< x, xs... >, sequence< r, rs... > >
struct  sorted_sequence_histogram< h_idx, sequence< x >, sequence< r, rs... > >
struct  fp16x2_repr
struct  bf16x2_repr
struct  fp32x2_repr
struct  is_similiar_distributed_tensor
struct  is_similiar_distributed_tensor< static_distributed_tensor< TypeX, DistX >, static_distributed_tensor< TypeY, DistY > >
struct  tile_distribution_detail
struct  swallow
struct  static_for_impl
struct  static_for_impl< sequence< Is... > >
struct  applier
struct  static_ford_impl
struct  static_ford_impl< sequence<>, Orders >
struct  unpack_impl
struct  unpack_impl< sequence< Is... > >
struct  unpack2_impl
struct  unpack2_impl< sequence< Is... >, sequence< Js... > >
struct  static_uford_impl
struct  static_uford_impl< sequence<>, sequence<>, Orders >
struct  static_uford_one_shot_impl
struct  static_uford_one_shot_impl< sequence<>, sequence<>, Orders >
struct  ignore_t
struct  detector
struct  detector< Default, std::void_t< Op< Args... > >, Op, Args... >
struct  tuple_element_or_default_dispatch
struct  tuple_element_or_default_dispatch< true, Idx, Tuple, DefaultType >
struct  log2
struct  log2< 4 >
struct  log2< 8 >
struct  log2< 16 >
struct  log2< 32 >
struct  log2< 64 >
struct  log2< 128 >
struct  get_aq_layout_or
struct  get_aq_layout_or< T, Default, std::void_t< typename T::AQLayout > >
struct  get_bq_layout_or
struct  get_bq_layout_or< T, Default, std::void_t< typename T::BQLayout > >
struct  get_aq_data_type_or
struct  get_aq_data_type_or< T, Default, std::void_t< typename T::AQDataType > >
struct  get_bq_data_type_or
struct  get_bq_data_type_or< T, Default, std::void_t< typename T::BQDataType > >
struct  is_quantpreshuffle_enabled
struct  is_quantpreshuffle_enabled< T, std::void_t< decltype(T::PreshuffleQuant)> >
struct  is_preshuffleB_enabled
struct  is_preshuffleB_enabled< T, std::void_t< decltype(T::PreshuffleB)> >

Typedefs

template<int32_t Size>
using make_applier = __make_integer_seq<applier, index_t, Size>

Functions

template<typename F, typename X, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto transform_tuples_impl (F f, const X &x, sequence< Is... >)
template<typename F, typename X, typename Y, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto transform_tuples_impl (F f, const X &x, const Y &y, sequence< Is... >)
template<typename F, typename X, typename Y, typename Z, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto transform_tuples_impl (F f, const X &x, const Y &y, const Z &z, sequence< Is... >)
template<typename F, typename Tuple, index_t... Is>
constexpr decltype(auto) apply_impl (F &&f, Tuple &&t, sequence< Is... >)
template<typename F, typename X, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto embed_tuples_impl (F f, const X &x, sequence< Is... >)
CK_TILE_HOST_DEVICE constexpr _Float16 lane0 (const fp16x2_t &v)
CK_TILE_HOST_DEVICE constexpr _Float16 lane1 (const fp16x2_t &v)
CK_TILE_HOST_DEVICE constexpr bfloat16_t lane0 (const bf16x2_t &v)
CK_TILE_HOST_DEVICE constexpr bfloat16_t lane1 (const bf16x2_t &v)
CK_TILE_HOST_DEVICE constexpr float lane0 (const fp32x2_t &v)
CK_TILE_HOST_DEVICE constexpr float lane1 (const fp32x2_t &v)
template<typename OutTensor, typename InTensor>
CK_TILE_DEVICE void shuffle_tile_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor)
template<typename Lengths, typename Strides, index_t I, typename AccOld>
CK_TILE_HOST_DEVICE constexpr auto calculate_element_space_size_impl (const Lengths &lengths, const Strides &strides, number< I > i, AccOld acc_old)
template<typename Distribution>
CK_TILE_HOST_DEVICE auto get_partition_index (Distribution)
template<index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto make_tile_distributed_span (sequence< Is... >)
template<index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto make_tile_distributed_index (sequence< Is... >)
template<index_t NDimMax>
CK_TILE_HOST_DEVICE constexpr auto make_sequential_index (index_t ibegin, index_t iend)
template<typename StaticTileDistributionEncoding_>
CK_TILE_HOST_DEVICE constexpr auto make_adaptor_encoding_for_tile_distribution (StaticTileDistributionEncoding_)
template<typename Distribution, index_t... XSliceBegins, index_t... XSliceEnds>
CK_TILE_HOST_DEVICE constexpr auto slice_distribution_from_x (Distribution, sequence< XSliceBegins... > x_slice_begins, sequence< XSliceEnds... > x_slice_ends)
template<typename OuterDstr, typename InnerDstr>
CK_TILE_HOST_DEVICE constexpr auto make_embed_tile_distribution_encoding (OuterDstr, InnerDstr)
template<typename InDstr, index_t... InReduceDimXs>
CK_TILE_HOST_DEVICE constexpr auto make_reduce_tile_distribution_encoding_impl (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in)
template<typename InDstr, index_t... InReduceDimXs>
CK_TILE_HOST_DEVICE constexpr auto make_reduce_tile_distribution_encoding (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in)
template<typename OutTensor, typename InTensor>
CK_TILE_DEVICE void transpose_tile2d_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor)
CK_TILE_DEVICE float fma_impl_vsv (float a, float b, float c)
CK_TILE_DEVICE float add_impl_vv (float lhs, float rhs)
CK_TILE_DEVICE float mul_impl_vv (float lhs, float rhs)
CK_TILE_DEVICE fp16x2_t cvt_pk_fp16_f32 (float a, float b)
CK_TILE_DEVICE bf16x2_t cvt_pk_bf16_f32 (float a, float b)
CK_TILE_DEVICE fp32x2_t pk_mul_f32 (fp32x2_t lhs, fp32x2_t rhs)

Variables

template<typename X, typename Y>
constexpr bool is_similiar_distributed_tensor_v
constexpr index_t philox_per_tile = 64

Typedef Documentation

◆ make_applier

template<int32_t Size>
using ck_tile::detail::make_applier = __make_integer_seq<applier, index_t, Size>

Function Documentation

◆ add_impl_vv()

CK_TILE_DEVICE float ck_tile::detail::add_impl_vv ( float lhs,
float rhs )

◆ apply_impl()

template<typename F, typename Tuple, index_t... Is>
decltype(auto) ck_tile::detail::apply_impl ( F && f,
Tuple && t,
sequence< Is... >  )
constexpr

◆ calculate_element_space_size_impl()

template<typename Lengths, typename Strides, index_t I, typename AccOld>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::calculate_element_space_size_impl ( const Lengths & lengths,
const Strides & strides,
number< I > i,
AccOld acc_old )
constexpr

◆ cvt_pk_bf16_f32()

CK_TILE_DEVICE bf16x2_t ck_tile::detail::cvt_pk_bf16_f32 ( float a,
float b )

◆ cvt_pk_fp16_f32()

CK_TILE_DEVICE fp16x2_t ck_tile::detail::cvt_pk_fp16_f32 ( float a,
float b )

◆ embed_tuples_impl()

template<typename F, typename X, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::embed_tuples_impl ( F f,
const X & x,
sequence< Is... >  )
constexpr

◆ fma_impl_vsv()

CK_TILE_DEVICE float ck_tile::detail::fma_impl_vsv ( float a,
float b,
float c )

◆ get_partition_index()

template<typename Distribution>
CK_TILE_HOST_DEVICE auto ck_tile::detail::get_partition_index ( Distribution )

◆ lane0() [1/3]

CK_TILE_HOST_DEVICE constexpr bfloat16_t ck_tile::detail::lane0 ( const bf16x2_t & v)
constexpr

◆ lane0() [2/3]

CK_TILE_HOST_DEVICE constexpr _Float16 ck_tile::detail::lane0 ( const fp16x2_t & v)
constexpr

◆ lane0() [3/3]

CK_TILE_HOST_DEVICE constexpr float ck_tile::detail::lane0 ( const fp32x2_t & v)
constexpr

◆ lane1() [1/3]

CK_TILE_HOST_DEVICE constexpr bfloat16_t ck_tile::detail::lane1 ( const bf16x2_t & v)
constexpr

◆ lane1() [2/3]

CK_TILE_HOST_DEVICE constexpr _Float16 ck_tile::detail::lane1 ( const fp16x2_t & v)
constexpr

◆ lane1() [3/3]

CK_TILE_HOST_DEVICE constexpr float ck_tile::detail::lane1 ( const fp32x2_t & v)
constexpr

◆ make_adaptor_encoding_for_tile_distribution()

template<typename StaticTileDistributionEncoding_>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::make_adaptor_encoding_for_tile_distribution ( StaticTileDistributionEncoding_ )
constexpr

◆ make_embed_tile_distribution_encoding()

template<typename OuterDstr, typename InnerDstr>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::make_embed_tile_distribution_encoding ( OuterDstr ,
InnerDstr  )
constexpr

◆ make_reduce_tile_distribution_encoding()

template<typename InDstr, index_t... InReduceDimXs>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::make_reduce_tile_distribution_encoding ( InDstr ,
sequence< InReduceDimXs... > reduce_dim_xs_in )
constexpr

◆ make_reduce_tile_distribution_encoding_impl()

template<typename InDstr, index_t... InReduceDimXs>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::make_reduce_tile_distribution_encoding_impl ( InDstr ,
sequence< InReduceDimXs... > reduce_dim_xs_in )
constexpr

◆ make_sequential_index()

template<index_t NDimMax>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::make_sequential_index ( index_t ibegin,
index_t iend )
constexpr

◆ make_tile_distributed_index()

template<index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::make_tile_distributed_index ( sequence< Is... > )
constexpr

◆ make_tile_distributed_span()

template<index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::make_tile_distributed_span ( sequence< Is... > )
constexpr

◆ mul_impl_vv()

CK_TILE_DEVICE float ck_tile::detail::mul_impl_vv ( float lhs,
float rhs )

◆ pk_mul_f32()

CK_TILE_DEVICE fp32x2_t ck_tile::detail::pk_mul_f32 ( fp32x2_t lhs,
fp32x2_t rhs )

◆ shuffle_tile_impl_in_thread()

template<typename OutTensor, typename InTensor>
CK_TILE_DEVICE void ck_tile::detail::shuffle_tile_impl_in_thread ( OutTensor & out_tensor,
const InTensor & in_tensor )

◆ slice_distribution_from_x()

template<typename Distribution, index_t... XSliceBegins, index_t... XSliceEnds>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::slice_distribution_from_x ( Distribution ,
sequence< XSliceBegins... > x_slice_begins,
sequence< XSliceEnds... > x_slice_ends )
constexpr

◆ transform_tuples_impl() [1/3]

template<typename F, typename X, typename Y, typename Z, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::transform_tuples_impl ( F f,
const X & x,
const Y & y,
const Z & z,
sequence< Is... >  )
constexpr

◆ transform_tuples_impl() [2/3]

template<typename F, typename X, typename Y, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::transform_tuples_impl ( F f,
const X & x,
const Y & y,
sequence< Is... >  )
constexpr

◆ transform_tuples_impl() [3/3]

template<typename F, typename X, index_t... Is>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::detail::transform_tuples_impl ( F f,
const X & x,
sequence< Is... >  )
constexpr

◆ transpose_tile2d_impl_in_thread()

template<typename OutTensor, typename InTensor>
CK_TILE_DEVICE void ck_tile::detail::transpose_tile2d_impl_in_thread ( OutTensor & out_tensor,
const InTensor & in_tensor )

Variable Documentation

◆ is_similiar_distributed_tensor_v

template<typename X, typename Y>
bool ck_tile::detail::is_similiar_distributed_tensor_v
inlineconstexpr
Initial value:
=
static constexpr bool value
Definition static_distributed_tensor.hpp:216

◆ philox_per_tile

index_t ck_tile::detail::philox_per_tile = 64
constexpr