BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN > Struct Template Reference#
Public Types |
Public Member Functions |
Public Attributes |
Static Public Attributes |
List of all members
ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN > Struct Template Reference
Blockwise softmax. More...
#include <blockwise_softmax.hpp>
Public Types | |
| using | ThreadSliceDesc_M |
| using | ThreadwiseMaxReduce |
| using | ThreadwiseSumReduce |
| using | ThreadClusterLengths_M_K = decltype(ThreadClusterDesc_M_K{}.GetLengths()) |
| using | BlockwiseMaxReduce |
| using | BlockwiseSumReduce |
| using | BufferType = StaticBuffer<AddressSpaceEnum::Vgpr, AccDataType, MRepeat, true> |
Public Member Functions | |
| template<typename CThreadBuffer, typename WorkspaceBuffer> | |
| __host__ __device__ void | Run (CThreadBuffer &in_thread_buf, WorkspaceBuffer &reduce_work_buf) |
Public Attributes | |
| BufferType | max_value_buf |
| BufferType | sum_value_buf |
Static Public Attributes | |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr index_t | MRepeat = ThreadSliceDesc_M_K{}.GetLength(I0) |
| static constexpr index_t | KRepeat = ThreadSliceDesc_M_K{}.GetLength(I1) |
Detailed Description
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
struct ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >
struct ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >
Blockwise softmax.
- Template Parameters
-
BlockSize Block size AccDataType Accumulator data type ThreadMap_M_K Thread id to m_k ThreadClusterDesc_M_K Threadwise cluster descriptor ThreadSliceDesc_M_K Threadwise slices descriptor IgnoreNaN Flag to ignore NaN, false by default
Member Typedef Documentation
◆ BlockwiseMaxReduce
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::BlockwiseMaxReduce |
Initial value:
PartitionedBlockwiseReduction_v2<AccDataType,
BlockSize,
ThreadMap_M_K,
false>
decltype(ThreadClusterDesc_M_K{}.GetLengths()) ThreadClusterLengths_M_K
Definition blockwise_softmax.hpp:69
Definition reduction_functions_blockwise.hpp:101
Definition reduction_operator.hpp:163
◆ BlockwiseSumReduce
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::BlockwiseSumReduce |
Initial value:
PartitionedBlockwiseReduction_v2<AccDataType,
BlockSize,
ThreadMap_M_K,
false>
Definition reduction_operator.hpp:37
◆ BufferType
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::BufferType = StaticBuffer<AddressSpaceEnum::Vgpr, AccDataType, MRepeat, true> |
◆ ThreadClusterLengths_M_K
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadClusterLengths_M_K = decltype(ThreadClusterDesc_M_K{}.GetLengths()) |
◆ ThreadSliceDesc_M
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadSliceDesc_M |
Initial value:
decltype(make_naive_tensor_descriptor_packed(
make_tuple(ThreadSliceDesc_M_K{}.GetLength(I0))))
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
◆ ThreadwiseMaxReduce
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadwiseMaxReduce |
Initial value:
typename conditional<
IgnoreNaN,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false>>::type
decltype(make_naive_tensor_descriptor_packed( make_tuple(ThreadSliceDesc_M_K{}.GetLength(I0)))) ThreadSliceDesc_M
Definition blockwise_softmax.hpp:38
Definition reduction_functions_threadwise.hpp:23
Definition utility/functional.hpp:100
Definition reduction_functions_accumulate.hpp:17
◆ ThreadwiseSumReduce
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| using ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::ThreadwiseSumReduce |
Initial value:
typename conditional<
IgnoreNaN,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false,
ThreadwiseReduction<AccDataType,
ThreadSliceDesc_M_K,
false>>::type
Member Function Documentation
◆ Run()
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
template<typename CThreadBuffer, typename WorkspaceBuffer>
|
inline |
Member Data Documentation
◆ I0
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
|
staticconstexpr |
◆ I1
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
|
staticconstexpr |
◆ KRepeat
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
|
staticconstexpr |
◆ max_value_buf
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| BufferType ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::max_value_buf |
◆ MRepeat
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
|
staticconstexpr |
◆ sum_value_buf
template<index_t BlockSize, typename AccDataType, typename ThreadMap_M_K, typename ThreadClusterDesc_M_K, typename ThreadSliceDesc_M_K, bool IgnoreNaN = false>
| BufferType ck::BlockwiseSoftmax< BlockSize, AccDataType, ThreadMap_M_K, ThreadClusterDesc_M_K, ThreadSliceDesc_M_K, IgnoreNaN >::sum_value_buf |
The documentation for this struct was generated from the following file: