MUDA
Loading...
Searching...
No Matches
kernel_assign.inl
1#include <muda/type_traits/type_label.h>
4#include <muda/buffer/buffer_2d_view.h>
5#include <muda/buffer/buffer_3d_view.h>
6
7namespace muda::details::buffer
8{
9// assign 0D
10template <typename T>
11MUDA_INLINE MUDA_HOST void kernel_assign(cudaStream_t stream, VarView<T> dst, CVarView<T> src)
12{
13 ParallelFor(1, 1, 0, stream)
14 .apply(1,
15 [dst, src] __device__(int i) mutable
16 { *dst.data() = *src.data(); });
17}
18
19// assign 1D
20template <typename T>
21MUDA_INLINE MUDA_HOST void kernel_assign(int grid_dim,
22 int block_dim,
23 cudaStream_t stream,
24 BufferView<T> dst,
25 CBufferView<T> src)
26{
27 ParallelFor(grid_dim, block_dim, 0, stream)
28 .apply(dst.size(),
29 [dst, src] __device__(int i) mutable
30 { *dst.data(i) = *src.data(i); });
31}
32
33// assign 2D
34template <typename T>
35MUDA_INLINE MUDA_HOST void kernel_assign(int grid_dim,
36 int block_dim,
37 cudaStream_t stream,
38 Buffer2DView<T> dst,
39 CBuffer2DView<T> src)
40{
41 ParallelFor(grid_dim, block_dim, 0, stream)
42 .apply(dst.total_size(),
43 [dst, src] __device__(int i) mutable
44 { *dst.data(i) = *src.data(i); });
45}
46
47// assign 3D
48template <typename T>
49MUDA_INLINE MUDA_HOST void kernel_assign(int grid_dim,
50 int block_dim,
51 cudaStream_t stream,
52 Buffer3DView<T> dst,
53 CBuffer3DView<T> src)
54{
55 ParallelFor(grid_dim, block_dim, 0, stream)
56 .apply(dst.total_size(),
57 [dst, src] __device__(int i) mutable
58 { *dst.data(i) = *src.data(i); });
59}
60} // namespace muda::details::buffer
A view interface for any array-like liner memory, which can be constructed from DeviceBuffer/DeviceVe...
A frequently used parallel for loop, DynamicBlockDim and GridStrideLoop strategy are provided,...