block_universal_gemm_as_aquant_bs_cr.hpp Source File#
block_universal_gemm_as_aquant_bs_cr.hpp
Go to the documentation of this file.
CK_TILE_HOST_DEVICE constexpr auto make_embed_tile_distribution_encoding(OuterDstr, InnerDstr)
Definition tile_distribution_encoding.hpp:457
CK_TILE_DEVICE float amd_assembly_fp8_to_fp32(uint32_t src)
Definition tile/ops/elementwise/unary_element_wise_operation.hpp:258
CK_TILE_DEVICE float amd_assembly_bf8_to_fp32(uint32_t src)
Definition tile/ops/elementwise/unary_element_wise_operation.hpp:265
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
Definition tile/core/numeric/math.hpp:149
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Intrawave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:16
@ Interwave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:17
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
Definition block_universal_gemm_as_aquant_bs_cr.hpp:56
static constexpr auto b_warp_y_index_zeros
Definition block_universal_gemm_as_aquant_bs_cr.hpp:190
static constexpr auto c_warp_y_index_zeros
Definition block_universal_gemm_as_aquant_bs_cr.hpp:191
typename WarpGemm::BWarpDstr BWarpDstr
Definition block_universal_gemm_as_aquant_bs_cr.hpp:173
CK_TILE_DEVICE void LocalPrefetch(const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window)
Definition block_universal_gemm_as_aquant_bs_cr.hpp:563
static constexpr auto a_warp_y_index_zeros
Definition block_universal_gemm_as_aquant_bs_cr.hpp:189
typename WarpGemm::BWarpTensor BWarpTensor
Definition block_universal_gemm_as_aquant_bs_cr.hpp:177
static constexpr auto a_warp_y_lengths
Definition block_universal_gemm_as_aquant_bs_cr.hpp:182
remove_cvref_t< typename Traits::BDataType > BDataType
Definition block_universal_gemm_as_aquant_bs_cr.hpp:154
static constexpr index_t APackedSize
Definition block_universal_gemm_as_aquant_bs_cr.hpp:193
static constexpr index_t MWarp
Definition block_universal_gemm_as_aquant_bs_cr.hpp:167
remove_cvref_t< InterleavedPKTypeLoader< ComputeDataType, UnaryOpSize_ > > Loader
Definition block_universal_gemm_as_aquant_bs_cr.hpp:160
remove_cvref_t< typename Traits::AQDataType > AQDataType
Definition block_universal_gemm_as_aquant_bs_cr.hpp:153
BlockGemmAQuantBase< Problem_ > Base
Definition block_universal_gemm_as_aquant_bs_cr.hpp:158
static constexpr auto Scheduler
Definition block_universal_gemm_as_aquant_bs_cr.hpp:170
typename WarpGemm::AWarpTensor AWarpTensor
Definition block_universal_gemm_as_aquant_bs_cr.hpp:176
typename WarpGemm::CWarpTensor CWarpTensor
Definition block_universal_gemm_as_aquant_bs_cr.hpp:178
remove_cvref_t< typename Traits::ADataType > ADataType
Definition block_universal_gemm_as_aquant_bs_cr.hpp:152
typename WarpGemm::CWarpDstr CWarpDstr
Definition block_universal_gemm_as_aquant_bs_cr.hpp:174
static CK_TILE_DEVICE constexpr auto MakeBBlockDistributionEncode()
Definition block_universal_gemm_as_aquant_bs_cr.hpp:228
static constexpr index_t BPackedSize
Definition block_universal_gemm_as_aquant_bs_cr.hpp:195
remove_cvref_t< typename Traits::CDataType > CDataType
Definition block_universal_gemm_as_aquant_bs_cr.hpp:156
static constexpr auto b_warp_y_lengths
Definition block_universal_gemm_as_aquant_bs_cr.hpp:184
static constexpr index_t NIterPerWarp
Definition block_universal_gemm_as_aquant_bs_cr.hpp:165
static constexpr index_t KIterPerWarp
Definition block_universal_gemm_as_aquant_bs_cr.hpp:163
static constexpr index_t NWarp
Definition block_universal_gemm_as_aquant_bs_cr.hpp:168
remove_cvref_t< typename Traits::ComputeDataType > ComputeDataType
Definition block_universal_gemm_as_aquant_bs_cr.hpp:155
static CK_TILE_DEVICE constexpr auto MakeCBlockTile()
Definition block_universal_gemm_as_aquant_bs_cr.hpp:544
number< 1 > I1
Definition block_universal_gemm_as_aquant_bs_cr.hpp:199
remove_cvref_t< typename Traits::WarpGemm > WarpGemm
Definition block_universal_gemm_as_aquant_bs_cr.hpp:161
static constexpr index_t MIterPerWarp
Definition block_universal_gemm_as_aquant_bs_cr.hpp:164
static constexpr auto c_warp_y_lengths
Definition block_universal_gemm_as_aquant_bs_cr.hpp:186
CK_TILE_DEVICE void operator()(CBlockTensor &c_block_tensor, AQBlockTensor &aq_block_tensor, const ASmemBlockWindow &a_block_window, const BSmemBlockWindow &b_block_window)
Definition block_universal_gemm_as_aquant_bs_cr.hpp:574
typename WarpGemm::AWarpDstr AWarpDstr
Definition block_universal_gemm_as_aquant_bs_cr.hpp:172
static CK_TILE_DEVICE constexpr auto MakeABlockDistributionEncode()
Definition block_universal_gemm_as_aquant_bs_cr.hpp:201
GemmTraits_< Problem_, Policy_ > Traits
Definition block_universal_gemm_as_aquant_bs_cr.hpp:150
number< 0 > I0
Definition block_universal_gemm_as_aquant_bs_cr.hpp:198
Definition block_universal_gemm_as_aquant_bs_cr.hpp:17
remove_cvref_t< typename Problem::ComputeDataType > ComputeDataType
Definition block_universal_gemm_as_aquant_bs_cr.hpp:19
static CK_TILE_DEVICE float cvt_scale_to_fp32(T scale)
Definition block_universal_gemm_as_aquant_bs_cr.hpp:22
remove_cvref_t< typename Problem::AQDataType > AQDataType
Definition block_universal_gemm_as_aquant_bs_cr.hpp:18
Definition tile/core/numeric/numeric.hpp:81
Definition tile/core/container/sequence.hpp:49
Definition tile_distribution_encoding.hpp:26
Definition tile/core/container/tuple.hpp:192