thread_group_tensor_slice_transfer_global.hpp Source File#
thread_group_tensor_slice_transfer_global.hpp
Go to the documentation of this file.
Definition ck.hpp:268
decltype(ck::declval< T & >().is_pack8_invocable) is_pack8_invocable_t
Definition is_detected.hpp:43
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition utility/statically_indexed_array.hpp:45
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc &, const VisibleIndex &idx_diff_visible, UpdateLowerIndexHack)
Definition tensor_description/tensor_descriptor.hpp:444
__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const TensorCoordStep &coord_step)
Definition tensor_description/tensor_descriptor.hpp:508
__host__ __device__ constexpr auto container_push_back(const Array< TData, NSize > &a, const TData &x)
Definition utility/container_helper.hpp:18
decltype(ck::declval< T & >().is_pack4_invocable) is_pack4_invocable_t
Definition is_detected.hpp:40
__host__ __device__ constexpr bool coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc &tensor_desc, const TensorCoord &coord)
Definition tensor_description/tensor_descriptor.hpp:560
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition is_detected.hpp:34
__host__ __device__ constexpr auto sequence_to_tuple_of_number(Sequence< Is... >)
Definition utility/container_helper.hpp:380
__host__ __device__ constexpr auto generate_sequence_v2(F &&f, Number< N >)
Definition sequence_helper.hpp:25
__host__ __device__ constexpr auto container_reorder_given_old2new(const Array< TData, NSize > &old_array, Sequence< IRs... > old2new)
Definition utility/container_helper.hpp:54
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
decltype(ck::declval< T & >().is_pack2_invocable) is_pack2_invocable_t
Definition is_detected.hpp:37
__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc &tensor_desc, const VisibleIndex &idx_visible)
Definition tensor_description/tensor_descriptor.hpp:407
__host__ __device__ constexpr auto container_reorder_given_new2old(const Array< TData, NSize > &old_array, Sequence< IRs... >)
Definition utility/container_helper.hpp:43
typename vector_type_maker< T, N >::type vector_type_maker_t
Definition dtype_vector.hpp:54
decltype(make_tensor_coordinate(DstDesc{}, Index{})) DstCoord
Definition thread_group_tensor_slice_transfer_global.hpp:38
__device__ ThreadGroupTransferGlobal(const SrcDesc &src_desc, const DstDesc &dst_desc, const Index &src_block_slice_origin, const Index &dst_block_slice_origin, const ElementwiseOperation &element_op)
Definition thread_group_tensor_slice_transfer_global.hpp:40
static constexpr auto I6
Definition thread_group_tensor_slice_transfer_global.hpp:33
static constexpr auto I1
Definition thread_group_tensor_slice_transfer_global.hpp:28
static constexpr index_t nDim
Definition thread_group_tensor_slice_transfer_global.hpp:35
__device__ void RunRead(const SrcDesc &src_desc, const GridBufferType &grid_buf)
Definition thread_group_tensor_slice_transfer_global.hpp:52
static constexpr auto I2
Definition thread_group_tensor_slice_transfer_global.hpp:29
static constexpr auto I0
Definition thread_group_tensor_slice_transfer_global.hpp:27
__device__ void MoveSrcSliceWindow(const SrcDesc &src_desc, const Index &step)
Definition thread_group_tensor_slice_transfer_global.hpp:376
static constexpr auto I4
Definition thread_group_tensor_slice_transfer_global.hpp:31
static constexpr auto I3
Definition thread_group_tensor_slice_transfer_global.hpp:30
static constexpr auto I5
Definition thread_group_tensor_slice_transfer_global.hpp:32
MultiIndex< nDim > Index
Definition thread_group_tensor_slice_transfer_global.hpp:36
__device__ void RunWrite(const DstDesc &dst_desc, BlockBufferType &dst_buf)
Definition thread_group_tensor_slice_transfer_global.hpp:240
decltype(make_tensor_coordinate(SrcDesc{}, Index{})) SrcCoord
Definition thread_group_tensor_slice_transfer_global.hpp:37
Definition functional2.hpp:33
Definition functional3.hpp:97
Definition dtype_vector.hpp:30
Definition dtype_vector.hpp:10