device_batched_gemm_multiple_d_xdl_cshuffle_v3.hpp File Reference#
device_batched_gemm_multiple_d_xdl_cshuffle_v3.hpp File Reference
#include <iostream>#include <sstream>#include "ck/utility/common_header.hpp"#include "ck/tensor_description/tensor_descriptor.hpp"#include "ck/tensor_description/tensor_descriptor_helper.hpp"#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"#include "ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp"#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d.hpp"#include "ck/host_utility/device_prop.hpp"#include "ck/host_utility/kernel_launch.hpp"#include "ck/host_utility/flush_cache.hpp"Go to the source code of this file.
Namespaces | |
| namespace | ck |
| namespace | ck::tensor_operation |
| namespace | ck::tensor_operation::device |
Functions | |
| template<typename GridwiseGemm, typename BatchedGemmArg, bool HasMainKBlockLoop, InMemoryDataOperationEnum CGlobalMemoryDataOperation, index_t MinimumOccupancy = 1, TailNumber TailNum = TailNumber::Full> | |
| __global__ void | ck::kernel_batched_gemm_xdl_cshuffle_v3_multi_d (BatchedGemmArg karg) |
| template<typename GridwiseGemm, typename BatchedGemmArg, bool HasMainKBlockLoop, InMemoryDataOperationEnum CGlobalMemoryDataOperation, index_t MinimumOccupancy = 1, TailNumber TailNum = TailNumber::Full> | |
| __global__ void | ck::kernel_batched_gemm_xdl_cshuffle_v3_multi_d_2lds (BatchedGemmArg karg) |