BlockFmhaV3PipelineDefaultPolicy Struct Reference

BlockFmhaV3PipelineDefaultPolicy Struct Reference#

Composable Kernel: ck_tile::BlockFmhaV3PipelineDefaultPolicy Struct Reference
ck_tile::BlockFmhaV3PipelineDefaultPolicy Struct Reference

#include <block_fmha_fwd_v3_pipeline_default_policy.hpp>

Static Public Member Functions

template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentQ ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto GetAlignmentK ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto GetAlignmentV ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentO ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetSmemKPackK ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetSmemVPackK ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeKDramTileDistribution ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeVDramTileDistribution ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeQRegTileDistribution ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeKRegTileDistribution ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakePRegTileDistribution ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeVRegTileDistribution ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto GetQKBlockGemm ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto GetPVBlockGemm ()
template<typename Problem, ck_tile::index_t IBuf = 0>
static CK_TILE_DEVICE constexpr auto MakeKLdsStoreBlockDescriptor (ck_tile::number< IBuf >=ck_tile::number< 0 >{})
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeKLdsLoadBlockDescriptor ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto GetSingleSmemElementSpaceSize ()
template<typename Problem, ck_tile::index_t IBuf = 0>
static CK_TILE_DEVICE constexpr auto MakeVLdsStoreBlockDescriptor (ck_tile::number< IBuf >=ck_tile::number< 0 >{})
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeVLdsLoadBlockDescriptor ()
template<typename Problem>
static CK_TILE_DEVICE constexpr ck_tile::index_t GetSmemSizeKV ()
template<typename Problem>
static CK_TILE_DEVICE constexpr ck_tile::index_t GetSmemSize ()

Static Public Attributes

static constexpr ck_tile::index_t NumWarpPerGroup = 4
static constexpr ck_tile::index_t NumThreadPerWarpGroup
static constexpr ck_tile::index_t kKLdsPadInBytes = 4 * 4
static constexpr ck_tile::index_t kVLdsPadInBytes = 4 * 16

Member Function Documentation

◆ GetAlignmentK()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetAlignmentK ( )
inlinestaticconstexpr

◆ GetAlignmentO()

template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetAlignmentO ( )
inlinestaticconstexpr

◆ GetAlignmentQ()

template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetAlignmentQ ( )
inlinestaticconstexpr

◆ GetAlignmentV()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetAlignmentV ( )
inlinestaticconstexpr

◆ GetPVBlockGemm()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetPVBlockGemm ( )
inlinestaticconstexpr

NOTICE: in order to use load_tile_transpose() later for V tiles, we have to pass WGAttrNumAccessEnum::Double instead of WGAttrNumAccessEnum::Single

◆ GetQKBlockGemm()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetQKBlockGemm ( )
inlinestaticconstexpr

NOTICE: in order to use load_tile_transpose() later for V tile, we cannot use WarpGemmMfmaF16F16F32M32N32K16SwizzleBTransposedCDistribution here

NOTICE: in order to use load_tile_transpose() later for V tile, we cannot use WarpGemmMfmaBf16Bf16F32M32N32K16SwizzleBTransposedCDistribution here

◆ GetSingleSmemElementSpaceSize()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetSingleSmemElementSpaceSize ( )
inlinestaticconstexpr

◆ GetSmemKPackK()

template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetSmemKPackK ( )
inlinestaticconstexpr

◆ GetSmemSize()

template<typename Problem>
CK_TILE_DEVICE constexpr ck_tile::index_t ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetSmemSize ( )
inlinestaticconstexpr

◆ GetSmemSizeKV()

template<typename Problem>
CK_TILE_DEVICE constexpr ck_tile::index_t ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetSmemSizeKV ( )
inlinestaticconstexpr

TODO: override GetSingleSmemElementSpaceSize() to align with MakeKLdsBlockDescriptor() & MakeVLdsBlockDescriptor()

◆ GetSmemVPackK()

template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::GetSmemVPackK ( )
inlinestaticconstexpr

◆ MakeKDramTileDistribution()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeKDramTileDistribution ( )
inlinestaticconstexpr

◆ MakeKLdsLoadBlockDescriptor()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeKLdsLoadBlockDescriptor ( )
inlinestaticconstexpr

◆ MakeKLdsStoreBlockDescriptor()

template<typename Problem, ck_tile::index_t IBuf = 0>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeKLdsStoreBlockDescriptor ( ck_tile::number< IBuf > = ck_tile::number<0>{})
inlinestaticconstexpr

◆ MakeKRegTileDistribution()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeKRegTileDistribution ( )
inlinestaticconstexpr

◆ MakePRegTileDistribution()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakePRegTileDistribution ( )
inlinestaticconstexpr

◆ MakeQRegTileDistribution()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeQRegTileDistribution ( )
inlinestaticconstexpr

◆ MakeVDramTileDistribution()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeVDramTileDistribution ( )
inlinestaticconstexpr

◆ MakeVLdsLoadBlockDescriptor()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeVLdsLoadBlockDescriptor ( )
inlinestaticconstexpr

FIXME: rename the kNPerBlock & kKPerBlock since the kN1 is congtigous dimension

◆ MakeVLdsStoreBlockDescriptor()

template<typename Problem, ck_tile::index_t IBuf = 0>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeVLdsStoreBlockDescriptor ( ck_tile::number< IBuf > = ck_tile::number<0>{})
inlinestaticconstexpr

FIXME: rename the kNPerBlock & kKPerBlock since the kN1 is congtigous dimension

◆ MakeVRegTileDistribution()

template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaV3PipelineDefaultPolicy::MakeVRegTileDistribution ( )
inlinestaticconstexpr

Member Data Documentation

◆ kKLdsPadInBytes

ck_tile::index_t ck_tile::BlockFmhaV3PipelineDefaultPolicy::kKLdsPadInBytes = 4 * 4
staticconstexpr

◆ kVLdsPadInBytes

ck_tile::index_t ck_tile::BlockFmhaV3PipelineDefaultPolicy::kVLdsPadInBytes = 4 * 16
staticconstexpr

◆ NumThreadPerWarpGroup

ck_tile::index_t ck_tile::BlockFmhaV3PipelineDefaultPolicy::NumThreadPerWarpGroup
staticconstexpr
Initial value:
=
CK_TILE_HOST_DEVICE constexpr index_t get_warp_size()
Definition arch.hpp:63
static constexpr ck_tile::index_t NumWarpPerGroup
Definition block_fmha_fwd_v3_pipeline_default_policy.hpp:16

◆ NumWarpPerGroup

ck_tile::index_t ck_tile::BlockFmhaV3PipelineDefaultPolicy::NumWarpPerGroup = 4
staticconstexpr

The documentation for this struct was generated from the following file: