BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop > Struct Template Reference

BlockwiseGemmDlops_km_kn_m0m1n0n1_v3&lt; BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop > Struct Template Reference
ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop > Struct Template Reference

#include <blockwise_gemm_dlops_v3.hpp>

Public Types

using AIndex = MultiIndex<3>
using BIndex = MultiIndex<3>
using CIndex = MultiIndex<4>

Public Member Functions

__device__ BlockwiseGemmDlops_km_kn_m0m1n0n1_v3 ()
template<typename ABlockBuffer, typename BThreadBuffer, typename CThreadBuffer>
__device__ void Run (const ABlockBuffer &a_block_buf, const BThreadBuffer &b_thread_buf, CThreadBuffer &c_thread_buf) const
template<typename ABlockSliceMoveStepIdx>
__device__ void MoveABlockSliceWindow (const ABlockSliceMoveStepIdx &a_block_slice_move_step_idx)

Static Public Member Functions

static __device__ constexpr auto GetCThreadDesc_K_N_Ho_WoLengths ()
static __device__ CIndex GetBeginOfCThreadDesc_K_N_Ho_Wo (index_t thread_id)

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr auto I4 = Number<4>{}
static constexpr auto E1 = ABlockDesc_E1_K1_E2{}.GetLength(I0)
static constexpr auto KPerBlock = ABlockDesc_E1_K1_E2{}.GetLength(I1)
static constexpr auto E2 = ABlockDesc_E1_K1_E2{}.GetLength(I2)
static constexpr auto HoPerBlock = BBlockDesc_E1_N_Ho_Wo_E2{}.GetLength(I2)
static constexpr auto WoPerBlock = BBlockDesc_E1_N_Ho_Wo_E2{}.GetLength(I3)
static constexpr auto KPerThread = CThreadDesc_K_N_Ho_Wo{}.GetLength(I0)
static constexpr auto HoPerThread = CThreadDesc_K_N_Ho_Wo{}.GetLength(I2)
static constexpr auto WoPerThread = CThreadDesc_K_N_Ho_Wo{}.GetLength(I3)
static constexpr auto a_thread_mtx_
static constexpr auto b_thread_mtx_
static constexpr auto c_thread_mtx_

Member Typedef Documentation

◆ AIndex

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
using ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::AIndex = MultiIndex<3>

◆ BIndex

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
using ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::BIndex = MultiIndex<3>

◆ CIndex

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
using ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::CIndex = MultiIndex<4>

Constructor & Destructor Documentation

◆ BlockwiseGemmDlops_km_kn_m0m1n0n1_v3()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
__device__ ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3 ( )
inline

Member Function Documentation

◆ GetBeginOfCThreadDesc_K_N_Ho_Wo()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
__device__ CIndex ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::GetBeginOfCThreadDesc_K_N_Ho_Wo ( index_t thread_id)
inlinestatic

◆ GetCThreadDesc_K_N_Ho_WoLengths()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
__device__ constexpr auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::GetCThreadDesc_K_N_Ho_WoLengths ( )
inlinestaticconstexpr

◆ MoveABlockSliceWindow()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
template<typename ABlockSliceMoveStepIdx>
__device__ void ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::MoveABlockSliceWindow ( const ABlockSliceMoveStepIdx & a_block_slice_move_step_idx)
inline

◆ Run()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
template<typename ABlockBuffer, typename BThreadBuffer, typename CThreadBuffer>
__device__ void ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::Run ( const ABlockBuffer & a_block_buf,
const BThreadBuffer & b_thread_buf,
CThreadBuffer & c_thread_buf ) const
inline

Member Data Documentation

◆ a_thread_mtx_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::a_thread_mtx_
staticconstexpr
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ b_thread_mtx_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::b_thread_mtx_
staticconstexpr

◆ c_thread_mtx_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::c_thread_mtx_
staticconstexpr

◆ E1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::E1 = ABlockDesc_E1_K1_E2{}.GetLength(I0)
staticconstexpr

◆ E2

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::E2 = ABlockDesc_E1_K1_E2{}.GetLength(I2)
staticconstexpr

◆ HoPerBlock

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::HoPerBlock = BBlockDesc_E1_N_Ho_Wo_E2{}.GetLength(I2)
staticconstexpr

◆ HoPerThread

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::HoPerThread = CThreadDesc_K_N_Ho_Wo{}.GetLength(I2)
staticconstexpr

◆ I0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::I3 = Number<3>{}
staticconstexpr

◆ I4

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::I4 = Number<4>{}
staticconstexpr

◆ KPerBlock

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::KPerBlock = ABlockDesc_E1_K1_E2{}.GetLength(I1)
staticconstexpr

◆ KPerThread

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::KPerThread = CThreadDesc_K_N_Ho_Wo{}.GetLength(I0)
staticconstexpr

◆ WoPerBlock

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::WoPerBlock = BBlockDesc_E1_N_Ho_Wo_E2{}.GetLength(I3)
staticconstexpr

◆ WoPerThread

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_E1_K1_E2, typename BBlockDesc_E1_N_Ho_Wo_E2, typename CThreadDesc_K_N_Ho_Wo, index_t EPerThreadLoop, index_t KPerThreadLoop>
auto ck::BlockwiseGemmDlops_km_kn_m0m1n0n1_v3< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_E1_K1_E2, BBlockDesc_E1_N_Ho_Wo_E2, CThreadDesc_K_N_Ho_Wo, EPerThreadLoop, KPerThreadLoop >::WoPerThread = CThreadDesc_K_N_Ho_Wo{}.GetLength(I3)
staticconstexpr

The documentation for this struct was generated from the following file: