BlockFmhaBwdPipelineDefaultPolicy Struct Reference#
ck_tile::BlockFmhaBwdPipelineDefaultPolicy Struct Reference
#include <block_fmha_bwd_pipeline_default_policy.hpp>
Classes | |
| struct | HotLoopScheduler |
Static Public Member Functions | |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetQKBlockGemm () |
| template<typename Problem> | |
| static CK_TILE_DEVICE constexpr auto | GetPTOGradTBlockGemm () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetOGradVBlockGemm () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSGradTQTBlockGemm () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSGradKTBlockGemm () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentQ () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentK () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentV () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentO () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentOGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentBias () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentKGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentVGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetTransposedAlignmentQ () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetTransposedAlignmentK () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetTransposedAlignmentOGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetTransposedAlignmentBias () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentPostQGradAcc () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetAlignmentPostQGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeKDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeVDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeQDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeOGradDramTileDistribution () |
| template<typename Problem, typename BlockGemm> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeLSEDDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeBiasTileDistribution () |
| template<typename DataType, index_t MPerBlock, index_t KPerBlock> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakePreXDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakePreODramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakePreOGradDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakePostQGradAccDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakePostQGradDramTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackQ () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackQT () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackK () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackKT () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackV () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackBias () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackBiasT () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackOGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackOGradT () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemKPackSGrad () |
| template<index_t KIter, index_t MNPerBlock, index_t KPerSubBlock, index_t KPack> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeXLdsBlockDescriptor () |
| template<index_t MNPerBlock, index_t KPerBlock, index_t KPack> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeXLdsBlockDescriptor () |
| template<typename Problem, index_t MNPerBlock, index_t KPerBlock, index_t KPack, index_t KPackT> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeXTLdsBlockDescriptor () |
| template<typename Problem, index_t MNIter, index_t MNPerSubBlock, index_t KPerBlock, index_t KPack, index_t KPackT> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeXTLdsBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeKLdsWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeKRegBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeVLdsWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeVRegBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledKRegWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledKLdsWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeKTLdsReadBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeKTRegBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeQLdsBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeQRegSliceBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledQRegWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledQLdsWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeQTLdsReadBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeQTRegSliceBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeSGradTRegSliceBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeLSEDLdsWriteBlockDescriptor () |
| template<typename Problem, typename BlockGemm> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeLSEDLdsReadBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeOGradLdsBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeOGradRegSliceBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledOGradRegWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledOGradLdsWriteBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeOGradTLdsReadBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeOGradTRegSliceBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakePTRegSliceBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeSGradLdsBlockDescriptor () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeSGradRegSliceBlockDescriptor () |
| template<typename Problem, typename PTOutTensor, typename PInTensor> | |
| static CK_TILE_DEVICE constexpr void | PTFromGemm0CToGemm1A (PTOutTensor &pt_out, const PInTensor &p_in) |
| template<typename Problem, typename SGradTOutTensor, typename SGradInTensor> | |
| static CK_TILE_DEVICE constexpr void | SGradTFromGemm2CToGemm3A (SGradTOutTensor &dst_out, const SGradInTensor &ds_in) |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeShuffledBiasTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeBiasLdsBlockDescriptor () |
| template<typename BlockGemm> | |
| static CK_TILE_HOST_DEVICE constexpr auto | MakeBiasSTileDistribution () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeQ () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeQT () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeK () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeKT () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeLSE () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeD () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeV () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeOGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeOGradT () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeSGrad () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSizeBias () |
| template<typename Problem> | |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSize () |
Static Public Attributes | |
| template<index_t ndim> | |
| static constexpr auto | swap_last2 |
Member Function Documentation
◆ GetAlignmentBias()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentK()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentKGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentO()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentOGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentPostQGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentPostQGradAcc()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentQ()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentV()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetAlignmentVGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetOGradVBlockGemm()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetPTOGradTBlockGemm()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetQKBlockGemm()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSGradKTBlockGemm()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSGradTQTBlockGemm()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackBias()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackBiasT()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackK()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackKT()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackOGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackOGradT()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackQ()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackQT()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackSGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemKPackV()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSize()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeBias()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeD()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeK()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeKT()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeLSE()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeOGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeOGradT()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeQ()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeQT()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeSGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetSmemSizeV()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetTransposedAlignmentBias()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetTransposedAlignmentK()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetTransposedAlignmentOGrad()
template<typename Problem>
|
inlinestaticconstexpr |
◆ GetTransposedAlignmentQ()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeBiasLdsBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeBiasSTileDistribution()
template<typename BlockGemm>
|
inlinestaticconstexpr |
◆ MakeBiasTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeKDramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeKLdsWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeKRegBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeKTLdsReadBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeKTRegBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeLSEDDramTileDistribution()
template<typename Problem, typename BlockGemm>
|
inlinestaticconstexpr |
◆ MakeLSEDLdsReadBlockDescriptor()
template<typename Problem, typename BlockGemm>
|
inlinestaticconstexpr |
◆ MakeLSEDLdsWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeOGradDramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeOGradLdsBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeOGradRegSliceBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeOGradTLdsReadBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeOGradTRegSliceBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakePostQGradAccDramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakePostQGradDramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakePreODramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakePreOGradDramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakePreXDramTileDistribution()
|
inlinestaticconstexpr |
◆ MakePTRegSliceBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeQDramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeQLdsBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeQRegSliceBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeQTLdsReadBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeQTRegSliceBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeSGradLdsBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeSGradRegSliceBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeSGradTRegSliceBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledBiasTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledKLdsWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledKRegWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledOGradLdsWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledOGradRegWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledQLdsWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeShuffledQRegWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeVDramTileDistribution()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeVLdsWriteBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeVRegBlockDescriptor()
template<typename Problem>
|
inlinestaticconstexpr |
◆ MakeXLdsBlockDescriptor() [1/2]
|
inlinestaticconstexpr |
◆ MakeXLdsBlockDescriptor() [2/2]
|
inlinestaticconstexpr |
◆ MakeXTLdsBlockDescriptor() [1/2]
template<typename Problem, index_t MNIter, index_t MNPerSubBlock, index_t KPerBlock, index_t KPack, index_t KPackT>
|
inlinestaticconstexpr |
◆ MakeXTLdsBlockDescriptor() [2/2]
|
inlinestaticconstexpr |
◆ PTFromGemm0CToGemm1A()
template<typename Problem, typename PTOutTensor, typename PInTensor>
|
inlinestaticconstexpr |
◆ SGradTFromGemm2CToGemm3A()
template<typename Problem, typename SGradTOutTensor, typename SGradInTensor>
|
inlinestaticconstexpr |
Member Data Documentation
◆ swap_last2
template<index_t ndim>
|
staticconstexpr |
Initial value:
[](auto i) {
return number < i == ndim - 2 ? ndim - 1 : i == ndim - 1 ? ndim - 2 : i > {};
},
number<ndim>{})
CK_TILE_HOST_DEVICE constexpr auto generate_sequence_v2(F &&f, number< N >)
Definition tile/core/container/sequence.hpp:1045
The documentation for this struct was generated from the following file: