intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB > Struct Template Reference

intrin_mfma_scale_f32_32x32x64f8f6f4&lt; 32, 32, OpselA, OpselB &gt; Struct Template Reference#

Composable Kernel: ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB > Struct Template Reference
ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB > Struct Template Reference

#include <amd_xdlops.hpp>

Static Public Member Functions

template<class FloatC>
static __device__ void Run (const f8x32_t &reg_a, const int32_t &scale_a, const f8x32_t &reg_b, const int32_t &scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const bf8x32_t &reg_a, const int32_t &scale_a, const bf8x32_t &reg_b, const int32_t &scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const bf8x32_t &reg_a, const int32_t &scale_a, const f8x32_t &reg_b, const int32_t &scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const f6x32_t &reg_a, const int32_t scale_a, const f6x32_t &reg_b, const int32_t scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const bf6x32_t &reg_a, const int32_t scale_a, const bf6x32_t &reg_b, const int32_t scale_b, FloatC &reg_c)
template<class FloatC>
static __device__ void Run (const f4x32_t &reg_a, const int32_t scale_a, const f4x32_t &reg_b, const int32_t scale_b, FloatC &reg_c)

Member Function Documentation

◆ Run() [1/6]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB >::Run ( const bf6x32_t & reg_a,
const int32_t scale_a,
const bf6x32_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [2/6]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB >::Run ( const bf8x32_t & reg_a,
const int32_t & scale_a,
const bf8x32_t & reg_b,
const int32_t & scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [3/6]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB >::Run ( const bf8x32_t & reg_a,
const int32_t & scale_a,
const f8x32_t & reg_b,
const int32_t & scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [4/6]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB >::Run ( const f4x32_t & reg_a,
const int32_t scale_a,
const f4x32_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [5/6]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB >::Run ( const f6x32_t & reg_a,
const int32_t scale_a,
const f6x32_t & reg_b,
const int32_t scale_b,
FloatC & reg_c )
inlinestatic

◆ Run() [6/6]

template<index_t OpselA, index_t OpselB>
template<class FloatC>
__device__ void ck::intrin_mfma_scale_f32_32x32x64f8f6f4< 32, 32, OpselA, OpselB >::Run ( const f8x32_t & reg_a,
const int32_t & scale_a,
const f8x32_t & reg_b,
const int32_t & scale_b,
FloatC & reg_c )
inlinestatic

The documentation for this struct was generated from the following file: