5#include <muda/buffer/device_buffer_2d.h>
7#include <muda/buffer/reshape_nd/reserve.h>
8#include <muda/buffer/reshape_nd/masked_compare.h>
9#include <muda/buffer/reshape_nd/masked_swap.h>
11namespace muda::details::buffer
13template <
typename BufferView>
21template <
typename BufferView>
28template <
typename BufferView>
35template <
typename T,
size_t N>
36using Array = std::array<T, N>;
38using Offset = std::array<size_t, N>;
40template <
typename F,
size_t N>
41void for_all_cell(
const Array<Array<size_t, 3>, N>& offsets, F&& f)
44 constexpr auto total = 1 << N;
46 for(
size_t index = 0; index < total; ++index)
48 bitset<N> bits{index};
52 for(
size_t c = 0; c < N; ++c)
56 begin[c] = offsets[c][i];
57 end[c] = offsets[c][i + 1];
67template <
typename T,
typename FConstruct>
68void NDReshaper::resize(
int grid_dim,
71 DeviceBuffer<T>& buffer,
75 using namespace details::buffer;
77 auto& m_data = buffer.m_data;
78 auto& m_size = buffer.m_size;
79 auto& m_capacity = buffer.m_capacity;
81 if(new_size == m_size)
84 auto old_size = m_size;
85 BufferView<T> old_buffer = buffer.view();
86 BufferView<T> new_buffer;
91 auto to_destruct = buffer.view(new_size, old_size - new_size);
92 kernel_destruct<T>(grid_dim, block_dim, stream, to_destruct);
94 new_buffer = old_buffer;
98 if(new_size <= m_capacity)
101 BufferView<T> to_construct = BufferView<T>{m_data + old_size, new_size - old_size};
105 new_buffer = old_buffer;
110 new_buffer = reserve_1d<T>(stream, new_size);
114 auto to_copy_construct = new_buffer.subview(0, old_size);
116 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
121 BufferView<T> to_construct = new_buffer.subview(old_size);
128 kernel_destruct<T>(grid_dim, block_dim, stream, buffer.view());
130 Memory(stream).free(m_data);
134 m_data = new_buffer.origin_data();
136 m_capacity = new_size;
142MUDA_HOST
void NDReshaper::shrink_to_fit(
int grid_dim,
145 DeviceBuffer<T>& buffer)
147 using namespace details::buffer;
148 auto& m_data = buffer.m_data;
149 auto& m_size = buffer.m_size;
150 auto& m_capacity = buffer.m_capacity;
152 auto old_buffer = buffer.view();
153 BufferView<T> new_buffer;
155 if(m_size == m_capacity)
162 new_buffer = reserve_1d<T>(stream, m_size);
164 kernel_copy_construct<T>(grid_dim, block_dim, stream, new_buffer, old_buffer);
167 if(old_buffer.origin_data())
170 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
172 Memory(stream).free(m_data);
175 m_data = new_buffer.origin_data();
180MUDA_HOST
void NDReshaper::reserve(
int grid_dim,
183 DeviceBuffer<T>& buffer,
186 using namespace details::buffer;
188 auto& m_data = buffer.m_data;
189 auto& m_size = buffer.m_size;
190 auto& m_capacity = buffer.m_capacity;
192 auto old_buffer = buffer.view();
194 if(new_capacity <= buffer.capacity())
197 BufferView<T> new_buffer = reserve_1d<T>(stream, new_capacity);
199 auto to_copy_construct = new_buffer.subview(0, old_buffer.size());
200 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
202 if(old_buffer.origin_data())
204 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
205 Memory(stream).free(old_buffer.origin_data());
208 m_data = new_buffer.origin_data();
209 m_capacity = new_buffer.size();
212template <
typename T,
typename FConstruct>
213MUDA_HOST
void NDReshaper::resize(
int grid_dim,
216 DeviceBuffer2D<T>& buffer,
220 using namespace details::buffer;
222 auto& m_data = buffer.m_data;
223 auto& m_pitch_bytes = buffer.m_pitch_bytes;
224 auto& m_extent = buffer.m_extent;
225 auto& m_capacity = buffer.m_capacity;
227 if(new_extent == m_extent)
230 auto old_extent = m_extent;
232 std::list<CopyConstructInfo<Buffer2DView<T>>> copy_construct_infos;
233 std::list<ConstructInfo<Buffer2DView<T>>> construct_infos;
234 std::list<DestructInfo<Buffer2DView<T>>> destruct_infos;
236 Buffer2DView<T> old_buffer = buffer.view();
237 Buffer2DView<T> new_buffer;
238 if(new_extent <= m_capacity)
241 m_extent = new_extent;
242 new_buffer = old_buffer;
249 auto new_capacity = max(new_extent, m_capacity);
250 new_buffer = reserve_2d<T>(stream, new_capacity);
252 m_data = new_buffer.origin_data();
253 m_pitch_bytes = new_buffer.pitch_bytes();
254 m_extent = new_extent;
255 m_capacity = new_capacity;
258 constexpr size_t N = 2;
259 Array<Array<size_t, 3>, N> offsets;
267 offsets[0] = {0ull, old_extent.width(), new_extent.width()};
268 offsets[1] = {0ull, old_extent.height(), new_extent.height()};
269 bool need_copy = (new_buffer.data(0) !=
nullptr);
270 for_all_cell(offsets,
271 [&](std::bitset<N> mask, Offset<N>& begin, Offset<N>& end)
273 bool copy_construct = !mask.any();
278 if(new_buffer.origin_data() != old_buffer.origin_data())
281 Offset2D offset_begin{begin[1], begin[0]};
282 Offset2D offset_end{end[1], end[0]};
283 Extent2D extent = as_extent(offset_end - offset_begin);
285 CopyConstructInfo<Buffer2DView<T>> info;
286 info.dst = new_buffer.subview(offset_begin, extent);
287 info.src = old_buffer.subview(offset_begin, extent);
288 copy_construct_infos.push_back(std::move(info));
299 bool construct = less(mask, begin, end);
302 Offset2D offset_begin{begin[1], begin[0]};
303 Offset2D offset_end{end[1], end[0]};
304 Extent2D extent = as_extent(offset_end - offset_begin);
305 ConstructInfo<Buffer2DView<T>> info;
306 info.dst = new_buffer.subview(offset_begin, extent);
307 construct_infos.emplace_back(std::move(info));
310 bool destruct = less(mask, end, begin);
313 swap(mask, begin, end);
314 Offset2D offset_begin{begin[1], begin[0]};
315 Offset2D offset_end{end[1], end[0]};
316 Extent2D extent = as_extent(offset_end - offset_begin);
317 DestructInfo<Buffer2DView<T>> info;
318 info.dst = old_buffer.subview(offset_begin, extent);
320 destruct_infos.emplace_back(std::move(info));
328 for(
auto& info : destruct_infos)
329 kernel_destruct<T>(grid_dim, block_dim, stream, info.dst);
331 for(
auto& info : construct_infos)
334 for(
auto& info : copy_construct_infos)
335 kernel_copy_construct<T>(grid_dim, block_dim, stream, info.dst, info.src);
339 if(new_buffer.origin_data() != old_buffer.origin_data())
341 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
342 Memory(stream).free(old_buffer.origin_data());
348MUDA_HOST
void NDReshaper::shrink_to_fit(
int grid_dim,
351 DeviceBuffer2D<T>& buffer)
353 using namespace details::buffer;
354 auto& m_data = buffer.m_data;
355 auto& m_pitch_bytes = buffer.m_pitch_bytes;
356 auto& m_extent = buffer.m_extent;
357 auto& m_capacity = buffer.m_capacity;
359 auto old_buffer = buffer.view();
360 Buffer2DView<T> new_buffer;
362 if(m_extent == m_capacity)
366 if(!(m_extent == Extent2D::Zero()))
369 new_buffer = reserve_2d<T>(stream, m_extent);
372 kernel_copy_construct<T>(grid_dim, block_dim, stream, new_buffer, old_buffer);
374 m_pitch_bytes = new_buffer.pitch_bytes();
377 if(old_buffer.origin_data())
380 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
383 Memory(stream).free(old_buffer.origin_data());
386 m_data = new_buffer.origin_data();
387 m_capacity = m_extent;
391MUDA_HOST
void NDReshaper::reserve(
int grid_dim,
394 DeviceBuffer2D<T>& buffer,
395 Extent2D new_capacity)
397 using namespace details::buffer;
399 auto& m_data = buffer.m_data;
400 auto& m_pitch_bytes = buffer.m_pitch_bytes;
401 auto& m_extent = buffer.m_extent;
402 auto& m_capacity = buffer.m_capacity;
404 auto old_buffer = buffer.view();
406 if(new_capacity <= m_capacity)
409 new_capacity = max(new_capacity, m_capacity);
411 Buffer2DView<T> new_buffer = reserve_2d<T>(stream, new_capacity);
413 auto to_copy_construct = new_buffer.subview(Offset2D::Zero(), m_extent);
414 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
416 if(old_buffer.origin_data())
418 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
419 Memory(stream).free(old_buffer.origin_data());
422 m_data = new_buffer.origin_data();
423 m_pitch_bytes = new_buffer.pitch_bytes();
424 m_capacity = new_capacity;
427template <
typename T,
typename FConstruct>
428MUDA_HOST
void NDReshaper::resize(
int grid_dim,
431 DeviceBuffer3D<T>& buffer,
435 using namespace details::buffer;
437 auto& m_data = buffer.m_data;
438 auto& m_pitch_bytes = buffer.m_pitch_bytes;
439 auto& m_pitch_bytes_area = buffer.m_pitch_bytes_area;
440 auto& m_extent = buffer.m_extent;
441 auto& m_capacity = buffer.m_capacity;
443 if(new_extent == m_extent)
446 auto old_extent = m_extent;
448 std::list<CopyConstructInfo<Buffer3DView<T>>> copy_construct_infos;
449 std::list<ConstructInfo<Buffer3DView<T>>> construct_infos;
450 std::list<DestructInfo<Buffer3DView<T>>> destruct_infos;
452 Buffer3DView<T> old_buffer = buffer.view();
453 Buffer3DView<T> new_buffer;
455 if(new_extent <= m_capacity)
458 m_extent = new_extent;
459 new_buffer = old_buffer;
466 auto new_capacity = max(new_extent, m_capacity);
467 new_buffer = reserve_3d<T>(stream, new_capacity);
469 m_data = new_buffer.origin_data();
470 m_pitch_bytes = new_buffer.pitch_bytes();
471 m_pitch_bytes_area = new_buffer.pitch_bytes_area();
472 m_extent = new_extent;
473 m_capacity = new_capacity;
476 constexpr size_t N = 3;
477 Array<Array<size_t, 3>, N> offsets;
486 offsets[0] = {0ull, old_extent.width(), new_extent.width()};
487 offsets[1] = {0ull, old_extent.height(), new_extent.height()};
488 offsets[2] = {0ull, old_extent.depth(), new_extent.depth()};
489 bool need_copy = (new_buffer.data(0) !=
nullptr);
492 [&](std::bitset<N> mask, Offset<N>& begin, Offset<N>& end)
494 bool copy_construct = !mask.any();
498 if(new_buffer.origin_data() != old_buffer.origin_data())
501 Offset3D offset_begin{begin[2], begin[1], begin[0]};
502 Offset3D offset_end{end[2], end[1], end[0]};
503 Extent3D extent = as_extent(offset_end - offset_begin);
505 CopyConstructInfo<Buffer3DView<T>> info;
506 info.dst = new_buffer.subview(offset_begin, extent);
507 info.src = old_buffer.subview(offset_begin, extent);
508 copy_construct_infos.emplace_back(info);
519 bool construct = less(mask, begin, end);
522 Offset3D offset_begin{begin[2], begin[1], begin[0]};
523 Offset3D offset_end{end[2], end[1], end[0]};
524 Extent3D extent = as_extent(offset_end - offset_begin);
526 ConstructInfo<Buffer3DView<T>> info;
527 info.dst = new_buffer.subview(offset_begin, extent);
528 construct_infos.emplace_back(std::move(info));
531 bool destruct = less(mask, end, begin);
534 swap(mask, begin, end);
535 Offset3D offset_begin{begin[2], begin[1], begin[0]};
536 Offset3D offset_end{end[2], end[1], end[0]};
537 Extent3D extent = as_extent(offset_end - offset_begin);
539 DestructInfo<Buffer3DView<T>> info;
540 info.dst = old_buffer.subview(offset_begin, extent);
542 destruct_infos.emplace_back(std::move(info));
550 for(
auto& info : destruct_infos)
551 kernel_destruct<T>(grid_dim, block_dim, stream, info.dst);
553 for(
auto& info : construct_infos)
556 for(
auto& info : copy_construct_infos)
557 kernel_copy_construct<T>(grid_dim, block_dim, stream, info.dst, info.src);
560 if(new_buffer.origin_data() != old_buffer.origin_data())
562 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
563 Memory(stream).free(old_buffer.origin_data());
571MUDA_HOST
void NDReshaper::shrink_to_fit(
int grid_dim,
574 DeviceBuffer3D<T>& buffer)
576 using namespace details::buffer;
577 auto& m_data = buffer.m_data;
578 auto& m_pitch_bytes = buffer.m_pitch_bytes;
579 auto& m_pitch_bytes_area = buffer.m_pitch_bytes_area;
580 auto& m_extent = buffer.m_extent;
581 auto& m_capacity = buffer.m_capacity;
583 auto old_buffer = buffer.view();
584 Buffer3DView<T> new_buffer;
586 if(m_extent == m_capacity)
589 if(!(m_extent == Extent3D::Zero()))
592 new_buffer = reserve_3d<T>(stream, m_extent);
595 kernel_copy_construct<T>(grid_dim, block_dim, stream, new_buffer, old_buffer);
597 m_pitch_bytes = new_buffer.pitch_bytes();
598 m_pitch_bytes_area = new_buffer.pitch_bytes_area();
601 if(old_buffer.origin_data())
604 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
607 Memory(stream).free(old_buffer.origin_data());
610 m_data = new_buffer.origin_data();
611 m_capacity = m_extent;
615MUDA_HOST
void NDReshaper::reserve(
int grid_dim,
618 DeviceBuffer3D<T>& buffer,
619 Extent3D new_capacity)
621 using namespace details::buffer;
623 auto& m_data = buffer.m_data;
624 auto& m_pitch_bytes = buffer.m_pitch_bytes;
625 auto& m_pitch_bytes_area = buffer.m_pitch_bytes_area;
626 auto& m_extent = buffer.m_extent;
627 auto& m_capacity = buffer.m_capacity;
629 auto old_buffer = buffer.view();
631 if(new_capacity <= m_capacity)
634 new_capacity = max(new_capacity, m_capacity);
636 Buffer3DView<T> new_buffer = reserve_3d<T>(stream, new_capacity);
638 auto to_copy_construct = new_buffer.subview(Offset3D::Zero(), m_extent);
639 kernel_copy_construct<T>(grid_dim, block_dim, stream, to_copy_construct, old_buffer);
641 if(old_buffer.origin_data())
643 kernel_destruct<T>(grid_dim, block_dim, stream, old_buffer);
644 Memory(stream).free(old_buffer.origin_data());
647 m_data = new_buffer.origin_data();
648 m_pitch_bytes = new_buffer.pitch_bytes();
649 m_pitch_bytes_area = new_buffer.pitch_bytes_area();
650 m_capacity = new_capacity;
Definition nd_reshaper.inl:23
Definition nd_reshaper.inl:15
Definition nd_reshaper.inl:30
A light-weight wrapper of cuda device memory. Like std::vector, allow user to resize,...
A light-weight wrapper of cuda device memory3D, allows user to resize, reserve, shrink_to_fit,...