MUDA
Loading...
Searching...
No Matches
buffer_launch.inl
1#include <muda/buffer/var_view.h>
3#include <muda/buffer/buffer_2d_view.h>
4#include <muda/buffer/buffer_3d_view.h>
5
6#include <muda/buffer/graph_var_view.h>
7#include <muda/buffer/graph_buffer_view.h>
8#include <muda/buffer/graph_buffer_2d_view.h>
9#include <muda/buffer/graph_buffer_3d_view.h>
10
11#include <muda/buffer/agent.h>
12#include <muda/buffer/reshape_nd/nd_reshaper.h>
13
14namespace muda
15{
16/**********************************************************************************************
17*
18* Buffer API
19* 0D DeviceVar
20* 1D DeviceBuffer
21* 2D DeviceBuffer2D
22* 3D DeviceBuffer3D
23*
24**********************************************************************************************/
25template <typename T>
26MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer<T>& buffer, size_t new_size)
27{
28 return resize(
29 buffer,
30 new_size,
31 [&](BufferView<T> view) // construct
32 {
33 if constexpr(std::is_trivially_constructible_v<T>)
34 {
35 Memory(m_stream).set(view.data(), view.size() * sizeof(T), 0);
36 }
37 else
38 {
39 static_assert(std::is_constructible_v<T>,
40 "The type T must be constructible, which means T must have a 0-arg constructor");
41
42 details::buffer::kernel_construct(m_grid_dim, m_block_dim, m_stream, view);
43 }
44 });
45}
46
47template <typename T>
48MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer2D<T>& buffer, Extent2D extent)
49{
50 return resize(buffer,
51 extent,
52 [&](Buffer2DView<T> view) // construct
53 {
54 // cudaMemset2D has no offset, so we can't use it here
55
56 //if constexpr(std::is_trivially_constructible_v<T>)
57 //{
58 // Extent2D extent = view.extent();
59 // Memory(m_stream).set(view.data(),
60 // view.pitch_bytes(),
61 // extent.width() * sizeof(T),
62 // extent.height(),
63 // 0);
64 //}
65 //else
66 //{
67 // static_assert(std::is_constructible_v<T>,
68 // "The type T must be constructible, which means T must have a 0-arg constructor");
69
70 // details::buffer::kernel_construct(m_grid_dim, m_block_dim, m_stream, view);
71 //}
72 details::buffer::kernel_construct(m_grid_dim, m_block_dim, m_stream, view);
73 });
74}
75
76template <typename T>
77MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer3D<T>& buffer, Extent3D extent)
78{
79 return resize(buffer,
80 extent,
81 [&](Buffer3DView<T> view) // construct
82 {
83 // cudaMemset3D has no offset, so we can't use it here
84
85 //if constexpr(std::is_trivially_constructible_v<T>)
86 //{
87 // Extent3D extent = view.extent();
88 // cudaPitchedPtr pitched_ptr = view.cuda_pitched_ptr();
89 // Memory(m_stream).set(pitched_ptr, extent.cuda_extent<T>(), 0);
90 //}
91 //else
92 //{
93 // static_assert(std::is_constructible_v<T>,
94 // "The type T must be constructible, which means T must have a 0-arg constructor");
95 // details::buffer::kernel_construct(m_grid_dim, m_block_dim, m_stream, view);
96 //
97 //}
98 details::buffer::kernel_construct(m_grid_dim, m_block_dim, m_stream, view);
99 });
100}
101
102template <typename T>
103MUDA_HOST BufferLaunch& BufferLaunch::reserve(DeviceBuffer<T>& buffer, size_t capacity)
104{
105 NDReshaper::reserve(m_grid_dim, m_block_dim, m_stream, buffer, capacity);
106 return *this;
107}
108
109template <typename T>
110MUDA_HOST BufferLaunch& BufferLaunch::reserve(DeviceBuffer2D<T>& buffer, Extent2D capacity)
111{
112 NDReshaper::reserve(m_grid_dim, m_block_dim, m_stream, buffer, capacity);
113 return *this;
114}
115
116template <typename T>
117MUDA_HOST BufferLaunch& BufferLaunch::reserve(DeviceBuffer3D<T>& buffer, Extent3D capacity)
118{
119 NDReshaper::reserve(m_grid_dim, m_block_dim, m_stream, buffer, capacity);
120 return *this;
121}
122
123template <typename T>
124MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer<T>& buffer, size_t new_size, const T& val)
125{
126 return resize(buffer, new_size, [&](BufferView<T> view) { fill(view, val); });
127}
128
129template <typename T>
130MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer2D<T>& buffer,
131 Extent2D extent,
132 const T& val)
133{
134 return resize(buffer, extent, [&](Buffer2DView<T> view) { fill(view, val); });
135}
136
137template <typename T>
138MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer3D<T>& buffer,
139 Extent3D extent,
140 const T& val)
141{
142 return resize(buffer, extent, [&](Buffer3DView<T> view) { fill(view, val); });
143}
144
145template <typename T>
146MUDA_HOST BufferLaunch& BufferLaunch::clear(DeviceBuffer<T>& buffer)
147{
148 resize(buffer, 0);
149 return *this;
150}
151
152template <typename T>
153MUDA_HOST BufferLaunch& BufferLaunch::clear(DeviceBuffer2D<T>& buffer)
154{
155 resize(buffer, Extent2D::Zero());
156 return *this;
157}
158
159template <typename T>
160MUDA_HOST BufferLaunch& BufferLaunch::clear(DeviceBuffer3D<T>& buffer)
161{
162 resize(buffer, Extent3D::Zero());
163 return *this;
164}
165
166template <typename T>
167MUDA_HOST BufferLaunch& BufferLaunch::alloc(DeviceBuffer<T>& buffer, size_t n)
168{
169 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
170 "cannot alloc a buffer in a compute graph");
171 MUDA_ASSERT(!buffer.m_data, "The buffer is already allocated");
172 resize(buffer, n);
173 return *this;
174}
175
176template <typename T>
177MUDA_HOST BufferLaunch& BufferLaunch::alloc(DeviceBuffer2D<T>& buffer, Extent2D extent)
178{
179 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
180 "cannot alloc a buffer in a compute graph");
181 MUDA_ASSERT(!buffer.m_data, "The buffer is already allocated");
182 resize(buffer, extent);
183 return *this;
184}
185
186template <typename T>
187MUDA_HOST BufferLaunch& BufferLaunch::alloc(DeviceBuffer3D<T>& buffer, Extent3D extent)
188{
189 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
190 "cannot alloc a buffer in a compute graph");
191 MUDA_ASSERT(!buffer.m_data, "The buffer is already allocated");
192 resize(buffer, extent);
193 return *this;
194}
195
196template <typename T>
197MUDA_HOST BufferLaunch& BufferLaunch::free(DeviceBuffer<T>& buffer)
198{
199 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
200 "cannot free a buffer in a compute graph");
201 MUDA_ASSERT(buffer.m_data, "The buffer is not allocated");
202
203 auto& m_data = buffer.m_data;
204 auto& m_size = buffer.m_size;
205 auto& m_capacity = buffer.m_capacity;
206
207 Memory(m_stream).free(m_data);
208 m_data = nullptr;
209 m_size = 0;
210 m_capacity = 0;
211 return *this;
212}
213
214template <typename T>
215MUDA_HOST BufferLaunch& BufferLaunch::free(DeviceBuffer2D<T>& buffer)
216{
217 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
218 "cannot free a buffer in a compute graph");
219 MUDA_ASSERT(buffer.m_data, "The buffer is not allocated");
220
221 auto& m_data = buffer.m_data;
222 auto& m_pitch_bytes = buffer.m_pitch_bytes;
223 auto& m_extent = buffer.m_extent;
224 auto& m_capacity = buffer.m_capacity;
225
226 Memory(m_stream).free(m_data);
227 m_data = nullptr;
228 m_pitch_bytes = 0;
229 m_extent = Extent2D::Zero();
230 m_capacity = Extent2D::Zero();
231 return *this;
232}
233
234template <typename T>
235MUDA_HOST BufferLaunch& BufferLaunch::free(DeviceBuffer3D<T>& buffer)
236{
237 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
238 "cannot free a buffer in a compute graph");
239 MUDA_ASSERT(buffer.m_data, "The buffer is not allocated");
240
241 auto& m_data = buffer.m_data;
242 auto& m_pitch_bytes = buffer.m_pitch_bytes;
243 auto& m_pitch_bytes_area = buffer.m_pitch_bytes_area;
244 auto& m_extent = buffer.m_extent;
245 auto& m_capacity = buffer.m_capacity;
246
247 Memory(m_stream).free(m_data);
248 m_data = nullptr;
249 m_pitch_bytes = 0;
250 m_pitch_bytes_area = 0;
251 m_extent = Extent3D::Zero();
252 m_capacity = Extent3D::Zero();
253 return *this;
254}
255
256template <typename T>
257MUDA_HOST BufferLaunch& BufferLaunch::shrink_to_fit(DeviceBuffer<T>& buffer)
258{
259 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
260 "cannot shrink a buffer in a compute graph");
261 NDReshaper::shrink_to_fit(m_grid_dim, m_block_dim, m_stream, buffer);
262 return *this;
263}
264
265template <typename T>
266MUDA_HOST BufferLaunch& BufferLaunch::shrink_to_fit(DeviceBuffer2D<T>& buffer)
267{
268 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
269 "cannot shrink a buffer in a compute graph");
270 NDReshaper::shrink_to_fit(m_grid_dim, m_block_dim, m_stream, buffer);
271 return *this;
272}
273
274template <typename T>
275MUDA_HOST BufferLaunch& BufferLaunch::shrink_to_fit(DeviceBuffer3D<T>& buffer)
276{
277 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
278 "cannot shrink a buffer in a compute graph");
279 NDReshaper::shrink_to_fit(m_grid_dim, m_block_dim, m_stream, buffer);
280 return *this;
281}
282
283
284/**********************************************************************************************
285*
286* BufferView Copy: Device <- Device
287*
288**********************************************************************************************/
289template <typename T>
290MUDA_HOST BufferLaunch& BufferLaunch::copy(VarView<T> dst, CVarView<T> src)
291{
292 details::buffer::kernel_assign(m_stream, dst, src);
293 return *this;
294}
295
296template <typename T>
297MUDA_HOST BufferLaunch& BufferLaunch::copy(BufferView<T> dst, CBufferView<T> src)
298{
299 MUDA_ASSERT(dst.size() == src.size(), "BufferView should have the same size");
300 details::buffer::kernel_assign(m_grid_dim, m_block_dim, m_stream, dst, src);
301 return *this;
302}
303
304template <typename T>
305MUDA_HOST BufferLaunch& BufferLaunch::copy(Buffer2DView<T> dst, CBuffer2DView<T> src)
306{
307 MUDA_ASSERT(dst.extent() == src.extent(), "BufferView should have the same size");
308 details::buffer::kernel_assign(m_grid_dim, m_block_dim, m_stream, dst, src);
309 return *this;
310}
311
312template <typename T>
313MUDA_HOST BufferLaunch& BufferLaunch::copy(Buffer3DView<T> dst, CBuffer3DView<T> src)
314{
315 MUDA_ASSERT(dst.extent() == src.extent(), "BufferView should have the same size");
316 details::buffer::kernel_assign(m_grid_dim, m_block_dim, m_stream, dst, src);
317 return *this;
318}
319
320template <typename T>
321MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<VarView<T>>& dst,
322 const ComputeGraphVar<VarView<T>>& src)
323{
324 return copy(dst.eval(), src.ceval());
325}
326
327template <typename T>
328MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<BufferView<T>>& dst,
329 const ComputeGraphVar<BufferView<T>>& src)
330{
331 return copy(dst.eval(), src.ceval());
332}
333
334template <typename T>
335MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<Buffer2DView<T>>& dst,
336 const ComputeGraphVar<Buffer2DView<T>>& src)
337{
338 return copy(dst.eval(), src.ceval());
339}
340
341template <typename T>
342MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<Buffer3DView<T>>& dst,
343 const ComputeGraphVar<Buffer3DView<T>>& src)
344{
345 return copy(dst.eval(), src.ceval());
346}
347
348
349/**********************************************************************************************
350*
351* BufferView Copy: Host <- Device
352*
353**********************************************************************************************/
354template <typename T>
355MUDA_HOST BufferLaunch& BufferLaunch::copy(T* dst, CVarView<T> src)
356{
357 Memory(m_stream).download(dst, src.data(), sizeof(T));
358 return *this;
359}
360
361template <typename T>
362MUDA_HOST BufferLaunch& BufferLaunch::copy(T* dst, CBufferView<T> src)
363{
364 Memory(m_stream).download(dst, src.data(), src.size() * sizeof(T));
365 return *this;
366}
367
368
369template <typename T>
370MUDA_HOST BufferLaunch& BufferLaunch::copy(T* dst, CBuffer2DView<T> src)
371{
372 cudaMemcpy3DParms parms = {0};
373
374 parms.srcPtr = src.cuda_pitched_ptr();
375 parms.srcPos = src.offset().template cuda_pos<T>();
376 parms.dstPtr = make_cudaPitchedPtr(
377 dst, parms.srcPtr.xsize, parms.srcPtr.xsize, parms.srcPtr.ysize);
378 parms.extent = src.extent().template cuda_extent<T>();
379 parms.dstPos = make_cudaPos(0, 0, 0);
380
381 Memory(m_stream).download(parms);
382 return *this;
383}
384
385template <typename T>
386MUDA_HOST BufferLaunch& BufferLaunch::copy(T* dst, CBuffer3DView<T> src)
387{
388 cudaMemcpy3DParms parms = {0};
389
390 parms.srcPtr = src.cuda_pitched_ptr();
391 parms.srcPos = src.offset().template cuda_pos<T>();
392 parms.dstPtr = make_cudaPitchedPtr(
393 dst, parms.srcPtr.xsize, parms.srcPtr.xsize, parms.srcPtr.ysize);
394 parms.extent = src.extent().template cuda_extent<T>();
395 parms.dstPos = make_cudaPos(0, 0, 0);
396
397 Memory(m_stream).download(parms);
398 return *this;
399}
400
401template <typename T>
402MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<T*>& dst,
403 const ComputeGraphVar<VarView<T>>& src)
404{
405 return copy(dst.eval(), src.ceval());
406}
407
408template <typename T>
409MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<T*>& dst,
410 const ComputeGraphVar<BufferView<T>>& src)
411{
412 return copy(dst.eval(), src.ceval());
413}
414
415template <typename T>
416MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<T*>& dst,
417 const ComputeGraphVar<Buffer2DView<T>>& src)
418{
419 return copy(dst.eval(), src.ceval());
420}
421
422template <typename T>
423MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<T*>& dst,
424 const ComputeGraphVar<Buffer3DView<T>>& src)
425{
426 return copy(dst.eval(), src.ceval());
427}
428
429/**********************************************************************************************
430*
431* BufferView Copy: Device <- Host
432*
433**********************************************************************************************/
434template <typename T>
435MUDA_HOST BufferLaunch& BufferLaunch::copy(VarView<T> dst, const T* src)
436{
437 Memory(m_stream).upload(dst.data(), src, sizeof(T));
438 return *this;
439}
440
441template <typename T>
442MUDA_HOST BufferLaunch& BufferLaunch::copy(BufferView<T> dst, const T* src)
443{
444 Memory(m_stream).upload(dst.data(), src, dst.size() * sizeof(T));
445 return *this;
446}
447
448template <typename T>
449MUDA_HOST BufferLaunch& BufferLaunch::copy(Buffer2DView<T> dst, const T* src)
450{
451 cudaMemcpy3DParms parms = {0};
452
453 parms.extent = dst.extent().template cuda_extent<T>();
454 parms.dstPos = dst.offset().template cuda_pos<T>();
455 parms.dstPtr = dst.cuda_pitched_ptr();
456
457 parms.srcPtr = make_cudaPitchedPtr(const_cast<T*>(src),
458 parms.dstPtr.xsize,
459 parms.dstPtr.xsize,
460 parms.dstPtr.ysize);
461 parms.srcPos = make_cudaPos(0, 0, 0);
462
463 Memory(m_stream).upload(parms);
464
465 return *this;
466}
467
468template <typename T>
469MUDA_HOST BufferLaunch& BufferLaunch::copy(Buffer3DView<T> dst, const T* src)
470{
471 cudaMemcpy3DParms parms = {0};
472
473 parms.extent = dst.extent().template cuda_extent<T>();
474 parms.dstPos = dst.offset().template cuda_pos<T>();
475 parms.dstPtr = dst.cuda_pitched_ptr();
476
477 parms.srcPtr = make_cudaPitchedPtr(const_cast<T*>(src),
478 parms.dstPtr.xsize,
479 parms.dstPtr.xsize,
480 parms.dstPtr.ysize);
481 parms.srcPos = make_cudaPos(0, 0, 0);
482
483 Memory(m_stream).upload(parms);
484
485 return *this;
486}
487
488template <typename T>
489MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<VarView<T>>& dst,
490 const ComputeGraphVar<T*>& src)
491{
492 return copy(dst.eval(), src.ceval());
493}
494
495template <typename T>
496MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<BufferView<T>>& dst,
497 const ComputeGraphVar<T*>& src)
498{
499 return copy(dst.eval(), src.ceval());
500}
501
502template <typename T>
503MUDA_HOST BufferLaunch& BufferLaunch::copy(ComputeGraphVar<Buffer2DView<T>>& dst,
504 const ComputeGraphVar<T*>& src)
505{
506 return copy(dst.eval(), src.ceval());
507}
508
509template <typename T>
510MUDA_HOST BufferLaunch& copy(ComputeGraphVar<Buffer3DView<T>>& dst,
511 const ComputeGraphVar<T*>& src)
512{
513 return copy(dst.eval(), src.ceval());
514}
515
516/**********************************************************************************************
517*
518* BufferView Scatter: Device <- Host
519*
520**********************************************************************************************/
521template <typename T>
522MUDA_HOST BufferLaunch& BufferLaunch::fill(VarView<T> view, const T& val)
523{
524 details::buffer::kernel_fill(m_stream, view, val);
525 return *this;
526}
527
528template <typename T>
529MUDA_HOST BufferLaunch& BufferLaunch::fill(BufferView<T> buffer, const T& val)
530{
531 details::buffer::kernel_fill(m_grid_dim, m_block_dim, m_stream, buffer, val);
532 return *this;
533}
534
535template <typename T>
536MUDA_HOST BufferLaunch& BufferLaunch::fill(Buffer2DView<T> buffer, const T& val)
537{
538 details::buffer::kernel_fill(m_grid_dim, m_block_dim, m_stream, buffer, val);
539 return *this;
540}
541
542template <typename T>
543MUDA_HOST BufferLaunch& BufferLaunch::fill(Buffer3DView<T> buffer, const T& val)
544{
545 details::buffer::kernel_fill(m_grid_dim, m_block_dim, m_stream, buffer, val);
546 return *this;
547}
548
549template <typename T>
550MUDA_HOST BufferLaunch& BufferLaunch::fill(ComputeGraphVar<VarView<T>>& buffer,
551 const ComputeGraphVar<T>& val)
552{
553 return fill(buffer.eval(), val.ceval());
554}
555
556template <typename T>
557MUDA_HOST BufferLaunch& BufferLaunch::fill(ComputeGraphVar<BufferView<T>>& buffer,
558 const ComputeGraphVar<T>& val)
559{
560 return fill(buffer.eval(), val.ceval());
561}
562
563template <typename T>
564MUDA_HOST BufferLaunch& BufferLaunch::fill(ComputeGraphVar<Buffer2DView<T>>& buffer,
565 const ComputeGraphVar<T>& val)
566{
567 return fill(buffer.eval(), val.ceval());
568}
569
570template <typename T>
571MUDA_HOST BufferLaunch& BufferLaunch::fill(ComputeGraphVar<Buffer3DView<T>>& buffer,
572 const ComputeGraphVar<T>& val)
573{
574 return fill(buffer.eval(), val.ceval());
575}
576
577/**********************************************************************************************
578*
579* Internal BufferView Resize
580*
581**********************************************************************************************/
582template <typename T, typename FConstruct>
583MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer<T>& buffer, size_t new_size, FConstruct&& fct)
584{
585 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
586 "cannot resize a buffer in a compute graph");
587 NDReshaper::resize(
588 m_grid_dim, m_block_dim, m_stream, buffer, new_size, std::forward<FConstruct>(fct));
589 return *this;
590}
591
592template <typename T, typename FConstruct>
593MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer2D<T>& buffer,
594 Extent2D new_extent,
595 FConstruct&& fct)
596{
597 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
598 "cannot resize a buffer in a compute graph");
599 NDReshaper::resize(
600 m_grid_dim, m_block_dim, m_stream, buffer, new_extent, std::forward<FConstruct>(fct));
601 return *this;
602}
603//using T = float;
604//using FConstruct = std::function<void(Buffer3DView<T>)>;
605template <typename T, typename FConstruct>
606MUDA_HOST BufferLaunch& BufferLaunch::resize(DeviceBuffer3D<T>& buffer,
607 Extent3D new_extent,
608 FConstruct&& fct)
609{
610 MUDA_ASSERT(ComputeGraphBuilder::is_direct_launching(),
611 "cannot resize a buffer in a compute graph");
612
613 NDReshaper::resize(
614 m_grid_dim, m_block_dim, m_stream, buffer, new_extent, std::forward<FConstruct>(fct));
615 return *this;
616}
617} // namespace muda
A view interface for any array-like liner memory, which can be constructed from DeviceBuffer/DeviceVe...