F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference
#include <mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp>
Inheritance diagram for ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >:
Public Types | |
| using | Underlying = FlatmmPipelineAGmemBGmemCRegV1<Problem, PipelinePolicy> |
| using | ADataType = remove_cvref_t<typename Problem::ADataType> |
| using | BDataType = remove_cvref_t<typename Problem::QuantType> |
| using | CDataType = remove_cvref_t<typename Problem::CDataType> |
| using | BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape> |
| using | ComputeType = ADataType |
| using | ALayout = remove_cvref_t<typename Problem::ALayout> |
| using | BLayout = remove_cvref_t<typename Problem::BLayout> |
| using | CLayout = remove_cvref_t<typename Problem::CLayout> |
| using | BlockFlatmm |
| using | WG = remove_cvref_t<decltype(config.template at<0>())> |
| using | BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile> |
| using | BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps> |
| using | WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile> |
| Public Types inherited from ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, F16xMXF4FlatmmPipelineAgBgCrPolicy > | |
| using | ADataType |
| using | BDataType |
| using | CDataType |
| using | BlockGemmShape |
| using | ALayout |
| using | BLayout |
| using | CLayout |
| using | BlockFlatmm |
| using | WG |
| using | BlockTile |
| using | BlockWarps |
| using | WarpTile |
Public Member Functions | |
| template<typename ADramBlockWindowTmp, typename AElementFunction, typename BFlatBlockWindowTmp, typename DequantBFlatWindow> | |
| CK_TILE_HOST_DEVICE auto | operator() (ADramBlockWindowTmp a_copy_dram_window_, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const DequantBFlatWindow &scale_b_flat_window, const index_t num_loop, const index_t k_padded_zeros, void *p_smem_ping, void *p_smem_pong) const |
| template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename DequantBFlatWindow> | |
| CK_TILE_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const DequantBFlatWindow &scale_b_flat_window, const index_t num_loop, const index_t k_padded_zeros, void *p_smem_ping, void *p_smem_pong) const |
| template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename DequantBFlatWindow> | |
| CK_TILE_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const DequantBFlatWindow &scale_b_flat_window, const index_t num_loop, void *p_smem_ping, void *p_smem_pong) const |
| Public Member Functions inherited from ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, F16xMXF4FlatmmPipelineAgBgCrPolicy > | |
| CK_TILE_HOST_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const |
Static Public Attributes | |
| static constexpr auto | config |
| static constexpr index_t | DsWritePreIssue = 3 |
| static constexpr index_t | DsReadPreload = 2 |
| static constexpr index_t | BlockSize = Problem::kBlockSize |
| static constexpr index_t | WaveSize = get_warp_size() |
| static constexpr index_t | kMPerBlock = BlockGemmShape::kM |
| static constexpr index_t | kNPerBlock = BlockGemmShape::kN |
| static constexpr index_t | kKPerBlock = BlockGemmShape::kK |
| static constexpr index_t | flatKPerWarp = Problem::flatKPerWarp |
| static constexpr index_t | flatNPerWarp = Problem::flatNPerWarp |
| static constexpr bool | kPadM = Problem::kPadM |
| static constexpr bool | kPadN = Problem::kPadN |
| static constexpr bool | kPadK = Problem::kPadK |
| static constexpr index_t | kLdsAlignmentInBytes = 16 |
| static constexpr index_t | NumWaveGroups = Problem::NumWaveGroups |
| static constexpr bool | UsePersistentKernel = Problem::Traits::UsePersistentKernel |
| static constexpr auto | I0 = number<0>() |
| static constexpr auto | I1 = number<1>() |
| static constexpr auto | I2 = number<2>() |
| static constexpr auto | idxM = I0 |
| static constexpr auto | idxN = I1 |
| static constexpr auto | idxK = I2 |
| static constexpr index_t | MWarp = config.template at<1>() |
| static constexpr index_t | NWarp = config.template at<2>() |
| static constexpr index_t | MIterPerWarp = kMPerBlock / (MWarp * WG::kM) |
| static constexpr index_t | NIterPerWarp = kNPerBlock / (NWarp * WG::kN) |
| static constexpr index_t | KIterPerWarp = kKPerBlock / WG::kK |
| static constexpr index_t | KFlatPerBlockPerIter = flatKPerWarp |
| static constexpr index_t | NFlatPerBlockPerIter = flatNPerWarp |
| static constexpr index_t | MPerBlockPerIter = kMPerBlock / MIterPerWarp |
| static constexpr index_t | KPerBlockPerIter = kKPerBlock / KIterPerWarp |
| static constexpr int | MXFP4PackedSize = 2 |
| static constexpr index_t | AK1 = Problem::VectorLoadSize / sizeof(ADataType) |
| static constexpr index_t | BK1 = Problem::VectorLoadSize / sizeof(BDataType) * MXFP4PackedSize |
| static constexpr index_t | m_preload |
| static constexpr int | ContinuousKPerThread = Problem::ContinuousKPerThread |
| static constexpr int | ContinuousScaleNPerThread = Problem::ContinuousScaleNPerThread |
| static constexpr int | ContinuousScaleKPerThread = Problem::ContinuousScaleKPerThread |
| static constexpr int | ScaleKFlatPerWarp |
| static constexpr int | XDLK_PerThread |
| static constexpr int | XDL_PerWeightK = 4 |
| static constexpr int | XDL_PerScaleK = XDL_PerWeightK * ContinuousScaleKPerThread |
| static constexpr int | XDL_PerScaleN = ContinuousScaleNPerThread |
| static constexpr int | MXFP4KPerWarp = KIterPerWarp / XDL_PerWeightK |
| static constexpr int | ScaleKPerWarp = KIterPerWarp / XDL_PerScaleK |
| static constexpr int | ScaleNPerWarp = NIterPerWarp / XDL_PerScaleN |
| static constexpr int | MXFP4K_PerScaleK = MXFP4KPerWarp / ScaleKPerWarp |
| static constexpr bool | HasHotLoop = Problem::HasHotLoop |
| static constexpr auto | TailNum = Problem::TailNum |
| static constexpr index_t | mfma_per_wg = 1 |
| static constexpr index_t | dsread_per_wg |
| static constexpr index_t | dsread_num_perK = dsread_per_wg * MIterPerWarp |
| static constexpr index_t | dswrite_num_perK = dsread_num_perK / (MWarp * NWarp) |
| static constexpr index_t | dswrite_rep = (dswrite_num_perK + MIterPerWarp - 1) / MIterPerWarp |
| static constexpr index_t | Aload_num_perK = dswrite_num_perK |
| static constexpr index_t | Aload_rep = dswrite_rep |
| static constexpr index_t | Bload_num_perK = kNPerBlock * WG::kK / NWarp / BK1 / WaveSize |
| static constexpr index_t | ScaleBload_K1 = ContinuousScaleNPerThread * ContinuousScaleKPerThread |
| static constexpr index_t | ScaleBload_num |
| static constexpr index_t | Bload_total_num |
| static constexpr index_t | KPerScaleLoad = KIterPerWarp / ScaleBload_num |
| static constexpr index_t | HalfMIter = (MIterPerWarp + 1) / 2 |
| static constexpr index_t | Bload_rep = (Bload_num_perK + HalfMIter - 1) / HalfMIter |
| static constexpr index_t | mfma_perM_perK = NIterPerWarp * mfma_per_wg |
| static constexpr index_t | dswrite_mIter = (DsWritePreIssue - 1) % MIterPerWarp |
| static constexpr index_t | dswrite_kIter = (DsWritePreIssue - 1) / MIterPerWarp |
| static constexpr bool | DoubleSmemBuffer = false |
| Static Public Attributes inherited from ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, F16xMXF4FlatmmPipelineAgBgCrPolicy > | |
| static constexpr auto | config |
| static constexpr index_t | DsWritePreIssue |
| static constexpr index_t | DsReadPreload |
| static constexpr index_t | BlockSize |
| static constexpr index_t | WaveSize |
| static constexpr index_t | kMPerBlock |
| static constexpr index_t | kNPerBlock |
| static constexpr index_t | kKPerBlock |
| static constexpr index_t | flatKPerWarp |
| static constexpr index_t | flatNPerWarp |
| static constexpr bool | kPadM |
| static constexpr bool | kPadN |
| static constexpr bool | kPadK |
| static constexpr index_t | kLdsAlignmentInBytes |
| static constexpr index_t | NumWaveGroups |
| static constexpr bool | UsePersistentKernel |
| static constexpr auto | I0 |
| static constexpr auto | I1 |
| static constexpr auto | I2 |
| static constexpr auto | idxM |
| static constexpr auto | idxN |
| static constexpr auto | idxK |
| static constexpr index_t | MWarp |
| static constexpr index_t | NWarp |
| static constexpr index_t | MIterPerWarp |
| static constexpr index_t | NIterPerWarp |
| static constexpr index_t | KIterPerWarp |
| static constexpr index_t | KFlatPerBlockPerIter |
| static constexpr index_t | NFlatPerBlockPerIter |
| static constexpr index_t | MPerBlockPerIter |
| static constexpr index_t | KPerBlockPerIter |
| static constexpr index_t | K1 |
| static constexpr index_t | m_preload |
| static constexpr bool | HasHotLoop |
| static constexpr auto | TailNum |
| static constexpr index_t | mfma_per_wg |
| static constexpr index_t | dsread_per_wg |
| static constexpr index_t | dsread_num_perK |
| static constexpr index_t | dswrite_num_perK |
| static constexpr index_t | dswrite_rep |
| static constexpr index_t | Aload_num_perK |
| static constexpr index_t | Aload_rep |
| static constexpr index_t | Bload_num_perK |
| static constexpr index_t | HalfMIter |
| static constexpr index_t | Bload_rep |
| static constexpr index_t | mfma_perM_perK |
| static constexpr index_t | dswrite_mIter |
| static constexpr index_t | dswrite_kIter |
| static constexpr bool | DoubleSmemBuffer |
Member Typedef Documentation
◆ ADataType
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ADataType = remove_cvref_t<typename Problem::ADataType> |
◆ ALayout
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ALayout = remove_cvref_t<typename Problem::ALayout> |
◆ BDataType
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BDataType = remove_cvref_t<typename Problem::QuantType> |
◆ BLayout
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BLayout = remove_cvref_t<typename Problem::BLayout> |
◆ BlockFlatmm
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockFlatmm |
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
◆ BlockGemmShape
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape> |
◆ BlockTile
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile> |
◆ BlockWarps
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps> |
◆ CDataType
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::CDataType = remove_cvref_t<typename Problem::CDataType> |
◆ CLayout
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::CLayout = remove_cvref_t<typename Problem::CLayout> |
◆ ComputeType
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ComputeType = ADataType |
◆ Underlying
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Underlying = FlatmmPipelineAGmemBGmemCRegV1<Problem, PipelinePolicy> |
◆ WarpTile
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile> |
◆ WG
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
| using ck_tile::F16xMXF4FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WG = remove_cvref_t<decltype(config.template at<0>())> |
Member Function Documentation
◆ GetADramTileDistribution()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ GetVectorSizeA()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ GetVectorSizeB()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ GetVectorSizeC()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ HotLoopScheduler()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ Last2ndHotLoopScheduler()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ LastHotLoopScheduler()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ operator()() [1/3]
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename AElementFunction, typename BFlatBlockWindowTmp, typename DequantBFlatWindow>
|
inline |
◆ operator()() [2/3]
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename DequantBFlatWindow>
|
inline |
◆ operator()() [3/3]
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename DequantBFlatWindow>
|
inline |
◆ SchedulerPerM()
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
Member Data Documentation
◆ AK1
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ Aload_num_perK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ Aload_rep
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ BK1
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ Bload_num_perK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ Bload_rep
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ Bload_total_num
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
static constexpr index_t KIterPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:113
static constexpr index_t ScaleBload_num
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:170
static constexpr index_t Bload_num_perK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:168
◆ BlockSize
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ config
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
BlockFlatmm::BlockPolicy::template GetWarpGemmMWarpNWarp<Problem>()
◆ ContinuousKPerThread
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ ContinuousScaleKPerThread
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ ContinuousScaleNPerThread
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ DoubleSmemBuffer
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ dsread_num_perK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ dsread_per_wg
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
remove_cvref_t< typename Problem::ADataType > ADataType
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:48
static constexpr index_t WaveSize
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:69
◆ DsReadPreload
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ dswrite_kIter
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ dswrite_mIter
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ dswrite_num_perK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ dswrite_rep
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ DsWritePreIssue
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ flatKPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ flatNPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ HalfMIter
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ HasHotLoop
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ I0
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ I1
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ I2
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ idxK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ idxM
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ idxN
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ KFlatPerBlockPerIter
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ KIterPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kKPerBlock
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kLdsAlignmentInBytes
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kMPerBlock
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kNPerBlock
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kPadK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kPadM
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kPadN
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ KPerBlockPerIter
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ KPerScaleLoad
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ m_preload
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
static constexpr index_t MIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:103
static constexpr index_t KIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:105
static constexpr index_t DsReadPreload
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:66
◆ mfma_per_wg
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ mfma_perM_perK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ MIterPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ MPerBlockPerIter
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ MWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ MXFP4K_PerScaleK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ MXFP4KPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ MXFP4PackedSize
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ NFlatPerBlockPerIter
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ NIterPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ NumWaveGroups
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ NWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ ScaleBload_K1
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ ScaleBload_num
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
static constexpr index_t kNPerBlock
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:80
static constexpr index_t WaveSize
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:77
static constexpr index_t ScaleBload_K1
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:169
static constexpr index_t NWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:109
static constexpr index_t kKPerBlock
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:81
◆ ScaleKFlatPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
static constexpr int ContinuousScaleNPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:129
static constexpr int ContinuousScaleKPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:130
◆ ScaleKPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ ScaleNPerWarp
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ TailNum
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ UsePersistentKernel
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ WaveSize
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ XDL_PerScaleK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ XDL_PerScaleN
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ XDL_PerWeightK
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ XDLK_PerThread
template<typename Problem, typename PipelinePolicy = F16xMXF4FlatmmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
static constexpr auto I1
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:99
static constexpr auto I2
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:100
The documentation for this struct was generated from the following file: