MUDA
Loading...
Searching...
No Matches
nd_reshaper.inl
1#include <list>
2#include <array>
3#include <bitset>
5#include <muda/buffer/device_buffer_2d.h>
7#include <muda/buffer/reshape_nd/reserve.h>
8#include <muda/buffer/reshape_nd/masked_compare.h>
9#include <muda/buffer/reshape_nd/masked_swap.h>
10
11namespace muda::details::buffer
12{
13template <typename BufferView>
15{
16 public:
17 BufferView dst;
18 BufferView src;
19};
20
21template <typename BufferView>
23{
24 public:
25 BufferView dst;
26};
27
28template <typename BufferView>
30{
31 public:
32 BufferView dst;
33};
34
35template <typename T, size_t N>
36using Array = std::array<T, N>;
37template <size_t N>
38using Offset = std::array<size_t, N>;
39
40template <typename F, size_t N>
41void for_all_cell(const Array<Array<size_t, 3>, N>& offsets, F&& f)
42{
43 using namespace std;
44 constexpr auto total = 1 << N;
45#pragma unroll
46 for(size_t index = 0; index < total; ++index)
47 {
48 bitset<N> bits{index};
49 Offset<N> begin, end;
50 bitset<N> mask;
51#pragma unroll
52 for(size_t c = 0; c < N; ++c) // c : component
53 {
54 auto i = bits[c];
55 mask.set(c, i != 0);
56 begin[c] = offsets[c][i];
57 end[c] = offsets[c][i + 1];
58 }
59 f(mask, begin, end);
60 }
61}
62} // namespace muda::details::buffer
63
64
65namespace muda
66{
67template <typename T, typename FConstruct>
68void NDReshaper::resize(int grid_dim,
69 int block_dim,
70 cudaStream_t stream,
71 DeviceBuffer<T>& buffer,
72 size_t new_size,
73 FConstruct&& fct)
74{
75 using namespace details::buffer;
76
77 auto& m_data = buffer.m_data;
78 auto& m_size = buffer.m_size;
79 auto& m_capacity = buffer.m_capacity;
80
81 if(new_size == m_size)
82 return;
83
84 auto old_size = m_size;
85 BufferView<T> old_buffer = buffer.view();
86 BufferView<T> new_buffer;
87
88 if(new_size < m_size)
89 {
90 // destruct the old memory
91 auto to_destruct = buffer.view(new_size, old_size - new_size);
92 kernel_destruct<T>(grid_dim, block_dim, stream, to_destruct);
93 m_size = new_size;
94 new_buffer = old_buffer;
95 return;
96 }
97
98 if(new_size <= m_capacity)
99 {
100 // construct the new memory
101 BufferView<T> to_construct = BufferView<T>{m_data + old_size, new_size - old_size};
102 //auto to_construct = old_buffer.subview(old_size, new_size - old_size);
103 fct(to_construct);
104 m_size = new_size;
105 new_buffer = old_buffer;
106 return;
107 }
108 else
109 {
110 new_buffer = reserve_1d<T>(stream, new_size);
111
112 if(m_data)
113 {
114 auto to_copy_construct = new_buffer.subview(0, old_size);
115 // copy construct on new memory
116 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
117 }
118
119 // construct the rest new memory
120 {
121 BufferView<T> to_construct = new_buffer.subview(old_size);
122 fct(to_construct);
123 }
124
125 if(m_data)
126 {
127 // destruct the old memory
128 kernel_destruct<T>(grid_dim, block_dim, stream, buffer.view());
129 // free the old memory
130 Memory(stream).free(m_data);
131 }
132
133
134 m_data = new_buffer.origin_data();
135 m_size = new_size;
136 m_capacity = new_size;
137 return;
138 }
139}
140
141template <typename T>
142MUDA_HOST void NDReshaper::shrink_to_fit(int grid_dim,
143 int block_dim,
144 cudaStream_t stream,
145 DeviceBuffer<T>& buffer)
146{
147 using namespace details::buffer;
148 auto& m_data = buffer.m_data;
149 auto& m_size = buffer.m_size;
150 auto& m_capacity = buffer.m_capacity;
151
152 auto old_buffer = buffer.view();
153 BufferView<T> new_buffer;
154
155 if(m_size == m_capacity)
156 return;
157
158
159 if(m_size > 0)
160 {
161 // alloc new buffer
162 new_buffer = reserve_1d<T>(stream, m_size);
163 // copy construct on the new buffer
164 kernel_copy_construct<T>(grid_dim, block_dim, stream, new_buffer, old_buffer);
165 }
166
167 if(old_buffer.origin_data())
168 {
169 // destruct the old buffer
170 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
171 // free the old buffer
172 Memory(stream).free(m_data);
173 }
174
175 m_data = new_buffer.origin_data();
176 m_capacity = m_size;
177}
178
179template <typename T>
180MUDA_HOST void NDReshaper::reserve(int grid_dim,
181 int block_dim,
182 cudaStream_t stream,
183 DeviceBuffer<T>& buffer,
184 size_t new_capacity)
185{
186 using namespace details::buffer;
187
188 auto& m_data = buffer.m_data;
189 auto& m_size = buffer.m_size;
190 auto& m_capacity = buffer.m_capacity;
191
192 auto old_buffer = buffer.view();
193
194 if(new_capacity <= buffer.capacity())
195 return;
196
197 BufferView<T> new_buffer = reserve_1d<T>(stream, new_capacity);
198 // copy construct
199 auto to_copy_construct = new_buffer.subview(0, old_buffer.size());
200 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
201
202 if(old_buffer.origin_data())
203 {
204 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
205 Memory(stream).free(old_buffer.origin_data());
206 }
207
208 m_data = new_buffer.origin_data();
209 m_capacity = new_buffer.size();
210}
211
212template <typename T, typename FConstruct>
213MUDA_HOST void NDReshaper::resize(int grid_dim,
214 int block_dim,
215 cudaStream_t stream,
216 DeviceBuffer2D<T>& buffer,
217 Extent2D new_extent,
218 FConstruct&& fct)
219{
220 using namespace details::buffer;
221
222 auto& m_data = buffer.m_data;
223 auto& m_pitch_bytes = buffer.m_pitch_bytes;
224 auto& m_extent = buffer.m_extent;
225 auto& m_capacity = buffer.m_capacity;
226
227 if(new_extent == m_extent)
228 return;
229
230 auto old_extent = m_extent;
231
232 std::list<CopyConstructInfo<Buffer2DView<T>>> copy_construct_infos;
233 std::list<ConstructInfo<Buffer2DView<T>>> construct_infos;
234 std::list<DestructInfo<Buffer2DView<T>>> destruct_infos;
235
236 Buffer2DView<T> old_buffer = buffer.view();
237 Buffer2DView<T> new_buffer;
238 if(new_extent <= m_capacity)
239 {
240 // all dimensions are bigger than the new extent
241 m_extent = new_extent;
242 new_buffer = old_buffer;
243 }
244 else
245 {
246 // at least one dimension is smaller than the new extent
247 // so we need to allocate a new buffer (m_capacity)
248 // which is bigger than the new_extent in all dimensions
249 auto new_capacity = max(new_extent, m_capacity);
250 new_buffer = reserve_2d<T>(stream, new_capacity);
251
252 m_data = new_buffer.origin_data();
253 m_pitch_bytes = new_buffer.pitch_bytes();
254 m_extent = new_extent;
255 m_capacity = new_capacity;
256 }
257
258 constexpr size_t N = 2;
259 Array<Array<size_t, 3>, N> offsets;
260 //tex:
261 // $$
262 // \begin{bmatrix}
263 // 0 & w_0 & w_1 \\
264 // 0 & h_0 & h_1
265 // \end{bmatrix}
266 // $$
267 offsets[0] = {0ull, old_extent.width(), new_extent.width()};
268 offsets[1] = {0ull, old_extent.height(), new_extent.height()};
269 bool need_copy = (new_buffer.data(0) != nullptr);
270 for_all_cell(offsets,
271 [&](std::bitset<N> mask, Offset<N>& begin, Offset<N>& end)
272 {
273 bool copy_construct = !mask.any();
274 if(copy_construct)
275 {
276 // all DOF are fixed
277
278 if(new_buffer.origin_data() != old_buffer.origin_data())
279 {
280 // if new_buffer is allocated, we need to copy the old data
281 Offset2D offset_begin{begin[1], begin[0]};
282 Offset2D offset_end{end[1], end[0]};
283 Extent2D extent = as_extent(offset_end - offset_begin);
284
285 CopyConstructInfo<Buffer2DView<T>> info;
286 info.dst = new_buffer.subview(offset_begin, extent);
287 info.src = old_buffer.subview(offset_begin, extent);
288 copy_construct_infos.push_back(std::move(info));
289 }
290 else
291 {
292 // we don't need to copy the old data
293 }
294 return;
295 }
296 else
297 {
298 // some DOF are fixed
299 bool construct = less(mask, begin, end);
300 if(construct)
301 {
302 Offset2D offset_begin{begin[1], begin[0]};
303 Offset2D offset_end{end[1], end[0]};
304 Extent2D extent = as_extent(offset_end - offset_begin);
305 ConstructInfo<Buffer2DView<T>> info;
306 info.dst = new_buffer.subview(offset_begin, extent);
307 construct_infos.emplace_back(std::move(info));
308 return;
309 }
310 bool destruct = less(mask, end, begin);
311 if(destruct)
312 {
313 swap(mask, begin, end);
314 Offset2D offset_begin{begin[1], begin[0]};
315 Offset2D offset_end{end[1], end[0]};
316 Extent2D extent = as_extent(offset_end - offset_begin);
317 DestructInfo<Buffer2DView<T>> info;
318 info.dst = old_buffer.subview(offset_begin, extent);
319
320 destruct_infos.emplace_back(std::move(info));
321 return;
322 }
323 }
324 // else we need to do nothing
325 });
326
327 // destruct
328 for(auto& info : destruct_infos)
329 kernel_destruct<T>(grid_dim, block_dim, stream, info.dst);
330 // construct
331 for(auto& info : construct_infos)
332 fct(info.dst);
333 // copy construct
334 for(auto& info : copy_construct_infos)
335 kernel_copy_construct<T>(grid_dim, block_dim, stream, info.dst, info.src);
336
337
338 // if the new buffer was allocated, deallocate the old one
339 if(new_buffer.origin_data() != old_buffer.origin_data())
340 {
341 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
342 Memory(stream).free(old_buffer.origin_data());
343 }
344 return;
345}
346
347template <typename T>
348MUDA_HOST void NDReshaper::shrink_to_fit(int grid_dim,
349 int block_dim,
350 cudaStream_t stream,
351 DeviceBuffer2D<T>& buffer)
352{
353 using namespace details::buffer;
354 auto& m_data = buffer.m_data;
355 auto& m_pitch_bytes = buffer.m_pitch_bytes;
356 auto& m_extent = buffer.m_extent;
357 auto& m_capacity = buffer.m_capacity;
358
359 auto old_buffer = buffer.view();
360 Buffer2DView<T> new_buffer;
361
362 if(m_extent == m_capacity)
363 return;
364
365
366 if(!(m_extent == Extent2D::Zero()))
367 {
368 // alloc new buffer
369 new_buffer = reserve_2d<T>(stream, m_extent);
370
371 // copy construct on new buffer
372 kernel_copy_construct<T>(grid_dim, block_dim, stream, new_buffer, old_buffer);
373
374 m_pitch_bytes = new_buffer.pitch_bytes();
375 }
376
377 if(old_buffer.origin_data())
378 {
379 // destruct on old buffer
380 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
381
382 // free old buffer
383 Memory(stream).free(old_buffer.origin_data());
384 }
385
386 m_data = new_buffer.origin_data();
387 m_capacity = m_extent;
388}
389
390template <typename T>
391MUDA_HOST void NDReshaper::reserve(int grid_dim,
392 int block_dim,
393 cudaStream_t stream,
394 DeviceBuffer2D<T>& buffer,
395 Extent2D new_capacity)
396{
397 using namespace details::buffer;
398
399 auto& m_data = buffer.m_data;
400 auto& m_pitch_bytes = buffer.m_pitch_bytes;
401 auto& m_extent = buffer.m_extent;
402 auto& m_capacity = buffer.m_capacity;
403
404 auto old_buffer = buffer.view();
405
406 if(new_capacity <= m_capacity)
407 return;
408
409 new_capacity = max(new_capacity, m_capacity);
410
411 Buffer2DView<T> new_buffer = reserve_2d<T>(stream, new_capacity);
412 // copy construct
413 auto to_copy_construct = new_buffer.subview(Offset2D::Zero(), m_extent);
414 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
415
416 if(old_buffer.origin_data())
417 {
418 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
419 Memory(stream).free(old_buffer.origin_data());
420 }
421
422 m_data = new_buffer.origin_data();
423 m_pitch_bytes = new_buffer.pitch_bytes();
424 m_capacity = new_capacity;
425}
426
427template <typename T, typename FConstruct>
428MUDA_HOST void NDReshaper::resize(int grid_dim,
429 int block_dim,
430 cudaStream_t stream,
431 DeviceBuffer3D<T>& buffer,
432 Extent3D new_extent,
433 FConstruct&& fct)
434{
435 using namespace details::buffer;
436
437 auto& m_data = buffer.m_data;
438 auto& m_pitch_bytes = buffer.m_pitch_bytes;
439 auto& m_pitch_bytes_area = buffer.m_pitch_bytes_area;
440 auto& m_extent = buffer.m_extent;
441 auto& m_capacity = buffer.m_capacity;
442
443 if(new_extent == m_extent)
444 return;
445
446 auto old_extent = m_extent;
447
448 std::list<CopyConstructInfo<Buffer3DView<T>>> copy_construct_infos;
449 std::list<ConstructInfo<Buffer3DView<T>>> construct_infos;
450 std::list<DestructInfo<Buffer3DView<T>>> destruct_infos;
451
452 Buffer3DView<T> old_buffer = buffer.view();
453 Buffer3DView<T> new_buffer;
454
455 if(new_extent <= m_capacity)
456 {
457 // all dimensions are bigger than the new extent
458 m_extent = new_extent;
459 new_buffer = old_buffer;
460 }
461 else
462 {
463 // at least one dimension is smaller than the new extent
464 // so we need to allocate a new buffer (m_capacity)
465 // which is bigger than the new_extent in all dimensions
466 auto new_capacity = max(new_extent, m_capacity);
467 new_buffer = reserve_3d<T>(stream, new_capacity);
468
469 m_data = new_buffer.origin_data();
470 m_pitch_bytes = new_buffer.pitch_bytes();
471 m_pitch_bytes_area = new_buffer.pitch_bytes_area();
472 m_extent = new_extent;
473 m_capacity = new_capacity;
474 }
475
476 constexpr size_t N = 3;
477 Array<Array<size_t, 3>, N> offsets;
478 //tex:
479 // $$
480 // \begin{bmatrix}
481 // 0 & w_0 & w_1 \\
482 // 0 & h_0 & h_1 \\
483 // 0 & d_0 & d_1
484 // \end{bmatrix}
485 // $$
486 offsets[0] = {0ull, old_extent.width(), new_extent.width()};
487 offsets[1] = {0ull, old_extent.height(), new_extent.height()};
488 offsets[2] = {0ull, old_extent.depth(), new_extent.depth()};
489 bool need_copy = (new_buffer.data(0) != nullptr);
490 for_all_cell(
491 offsets,
492 [&](std::bitset<N> mask, Offset<N>& begin, Offset<N>& end)
493 {
494 bool copy_construct = !mask.any();
495 if(copy_construct)
496 {
497 // all DOF are fixed
498 if(new_buffer.origin_data() != old_buffer.origin_data())
499 {
500 // if new_buffer is allocated, we need to copy the old data
501 Offset3D offset_begin{begin[2], begin[1], begin[0]};
502 Offset3D offset_end{end[2], end[1], end[0]};
503 Extent3D extent = as_extent(offset_end - offset_begin);
504
505 CopyConstructInfo<Buffer3DView<T>> info;
506 info.dst = new_buffer.subview(offset_begin, extent);
507 info.src = old_buffer.subview(offset_begin, extent);
508 copy_construct_infos.emplace_back(info);
509 }
510 else
511 {
512 // we don't need to copy the old data
513 }
514 return;
515 }
516 else
517 {
518 // some DOF are fixed
519 bool construct = less(mask, begin, end);
520 if(construct)
521 {
522 Offset3D offset_begin{begin[2], begin[1], begin[0]};
523 Offset3D offset_end{end[2], end[1], end[0]};
524 Extent3D extent = as_extent(offset_end - offset_begin);
525
526 ConstructInfo<Buffer3DView<T>> info;
527 info.dst = new_buffer.subview(offset_begin, extent);
528 construct_infos.emplace_back(std::move(info));
529 return;
530 }
531 bool destruct = less(mask, end, begin);
532 if(destruct)
533 {
534 swap(mask, begin, end);
535 Offset3D offset_begin{begin[2], begin[1], begin[0]};
536 Offset3D offset_end{end[2], end[1], end[0]};
537 Extent3D extent = as_extent(offset_end - offset_begin);
538
539 DestructInfo<Buffer3DView<T>> info;
540 info.dst = old_buffer.subview(offset_begin, extent);
541
542 destruct_infos.emplace_back(std::move(info));
543 return;
544 }
545 }
546 // else we need to do nothing
547 });
548
549 // destruct
550 for(auto& info : destruct_infos)
551 kernel_destruct<T>(grid_dim, block_dim, stream, info.dst);
552 // construct
553 for(auto& info : construct_infos)
554 fct(info.dst);
555 // copy construct
556 for(auto& info : copy_construct_infos)
557 kernel_copy_construct<T>(grid_dim, block_dim, stream, info.dst, info.src);
558
559 // if the new buffer was allocated, deallocate the old one
560 if(new_buffer.origin_data() != old_buffer.origin_data())
561 {
562 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
563 Memory(stream).free(old_buffer.origin_data());
564 }
565
566
567 return;
568}
569
570template <typename T>
571MUDA_HOST void NDReshaper::shrink_to_fit(int grid_dim,
572 int block_dim,
573 cudaStream_t stream,
574 DeviceBuffer3D<T>& buffer)
575{
576 using namespace details::buffer;
577 auto& m_data = buffer.m_data;
578 auto& m_pitch_bytes = buffer.m_pitch_bytes;
579 auto& m_pitch_bytes_area = buffer.m_pitch_bytes_area;
580 auto& m_extent = buffer.m_extent;
581 auto& m_capacity = buffer.m_capacity;
582
583 auto old_buffer = buffer.view();
584 Buffer3DView<T> new_buffer;
585
586 if(m_extent == m_capacity)
587 return;
588
589 if(!(m_extent == Extent3D::Zero()))
590 {
591 // alloc new buffer
592 new_buffer = reserve_3d<T>(stream, m_extent);
593
594 // copy construct on new buffer
595 kernel_copy_construct<T>(grid_dim, block_dim, stream, new_buffer, old_buffer);
596
597 m_pitch_bytes = new_buffer.pitch_bytes();
598 m_pitch_bytes_area = new_buffer.pitch_bytes_area();
599 }
600
601 if(old_buffer.origin_data())
602 {
603 // destruct on old buffer
604 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
605
606 // free old buffer
607 Memory(stream).free(old_buffer.origin_data());
608 }
609
610 m_data = new_buffer.origin_data();
611 m_capacity = m_extent;
612}
613
614template <typename T>
615MUDA_HOST void NDReshaper::reserve(int grid_dim,
616 int block_dim,
617 cudaStream_t stream,
618 DeviceBuffer3D<T>& buffer,
619 Extent3D new_capacity)
620{
621 using namespace details::buffer;
622
623 auto& m_data = buffer.m_data;
624 auto& m_pitch_bytes = buffer.m_pitch_bytes;
625 auto& m_pitch_bytes_area = buffer.m_pitch_bytes_area;
626 auto& m_extent = buffer.m_extent;
627 auto& m_capacity = buffer.m_capacity;
628
629 auto old_buffer = buffer.view();
630
631 if(new_capacity <= m_capacity)
632 return;
633
634 new_capacity = max(new_capacity, m_capacity);
635
636 Buffer3DView<T> new_buffer = reserve_3d<T>(stream, new_capacity);
637 // copy construct
638 auto to_copy_construct = new_buffer.subview(Offset3D::Zero(), m_extent);
639 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
640
641 if(old_buffer.origin_data())
642 {
643 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
644 Memory(stream).free(old_buffer.origin_data());
645 }
646
647 m_data = new_buffer.origin_data();
648 m_pitch_bytes = new_buffer.pitch_bytes();
649 m_pitch_bytes_area = new_buffer.pitch_bytes_area();
650 m_capacity = new_capacity;
651}
652} // namespace muda
Definition nd_reshaper.inl:23
Definition nd_reshaper.inl:15
Definition nd_reshaper.inl:30
A light-weight wrapper of cuda device memory. Like std::vector, allow user to resize,...
A light-weight wrapper of cuda device memory3D, allows user to resize, reserve, shrink_to_fit,...