1#include <muda/type_traits/type_label.h>
2#include <muda/launch/memory.h>
5#include <muda/buffer/buffer_2d_view.h>
6#include <muda/buffer/buffer_3d_view.h>
7#include <muda/buffer/buffer_info_accessor.h>
9namespace muda::details::buffer
13MUDA_INLINE MUDA_HOST
void kernel_copy_construct(cudaStream_t stream,
17 ParallelFor(1, 1, 0, stream)
19 [dst, src] __device__(
int i)
mutable
20 {
new(dst.data()) T(*src.data()); });
24MUDA_INLINE MUDA_HOST
void kernel_copy_construct_non_trivial(
int grid_dim,
30 ParallelFor(grid_dim, block_dim, 0, stream)
32 [dst, src] __device__(
int i)
mutable
33 {
new(dst.data(i)) T(*src.data(i)); });
38MUDA_INLINE MUDA_HOST
void kernel_copy_construct(
int grid_dim,
44 if constexpr(muda::is_trivially_copy_constructible_v<T>)
47 Memory(stream).transfer(dst.data(), src.data(), dst.size() *
sizeof(T));
51 kernel_copy_construct_non_trivial(grid_dim, block_dim, stream, dst, src);
56MUDA_INLINE MUDA_HOST
void kernel_copy_construct_non_trivial(
int grid_dim,
60 CBuffer2DView<T>& src)
62 ParallelFor(grid_dim, block_dim, 0, stream)
63 .apply(dst.total_size(),
64 [dst, src] __device__(
int i)
mutable
65 {
new(dst.data(i)) T(*src.data(i)); });
70MUDA_INLINE MUDA_HOST
void kernel_copy_construct(
int grid_dim,
76 if constexpr(muda::is_trivially_copy_constructible_v<T>)
79 cudaMemcpy3DParms parms = {0};
81 details::buffer::BufferInfoAccessor::template cuda_pitched_ptr(src);
82 parms.srcPos = src.offset().template cuda_pos<T>();
84 details::buffer::BufferInfoAccessor::template cuda_pitched_ptr(dst);
85 parms.extent = dst.extent().template cuda_extent<T>();
86 parms.dstPos = dst.offset().template cuda_pos<T>();
88 Memory(stream).transfer(parms);
93 kernel_copy_construct_non_trivial(grid_dim, block_dim, stream, dst, src);
98MUDA_INLINE MUDA_HOST
void kernel_copy_construct_non_trivial(
int grid_dim,
101 Buffer3DView<T>& dst,
102 CBuffer3DView<T>& src)
104 ParallelFor(grid_dim, block_dim, 0, stream)
105 .apply(dst.total_size(),
106 [dst, src] __device__(
int i)
mutable
107 {
new(dst.data(i)) T(*src.data(i)); });
112MUDA_INLINE MUDA_HOST
void kernel_copy_construct(
int grid_dim,
116 CBuffer3DView<T> src)
118 if constexpr(muda::is_trivially_copy_constructible_v<T>)
121 cudaMemcpy3DParms parms = {0};
123 details::buffer::BufferInfoAccessor::template cuda_pitched_ptr(src);
124 parms.srcPos = src.offset().template cuda_pos<T>();
126 details::buffer::BufferInfoAccessor::template cuda_pitched_ptr(dst);
127 parms.extent = dst.extent().template cuda_extent<T>();
128 parms.dstPos = dst.offset().template cuda_pos<T>();
130 Memory(stream).transfer(parms);
135 kernel_copy_construct_non_trivial(grid_dim, block_dim, stream, dst, src);
A view interface for any array-like liner memory, which can be constructed from DeviceBuffer/DeviceVe...
A frequently used parallel for loop, DynamicBlockDim and GridStrideLoop strategy are provided,...