MUDA
Loading...
Searching...
No Matches
sub_field_interface.inl
1namespace muda
2{
3MUDA_INLINE SubFieldInterface::~SubFieldInterface()
4{
5 if(m_data_buffer)
6 Memory().free(m_data_buffer).wait();
7};
8
9template <typename F>
10void SubFieldInterface::resize_data_buffer(size_t size, F&& func)
11{
12 if(m_data_buffer == nullptr)
13 {
14 Memory().alloc(&m_data_buffer, size).set(m_data_buffer, size, 0).wait();
15 func(nullptr, 0, m_data_buffer, size);
16 m_data_buffer_size = size;
17 }
18 else if(size > m_data_buffer_size)
19 {
20 auto old_ptr = m_data_buffer;
21 auto old_size = m_data_buffer_size;
22
23 std::byte* new_ptr = nullptr;
24 Memory().alloc(&new_ptr, size);
25 func(old_ptr, old_size, new_ptr, size);
26 Memory().free(old_ptr);
27
28 // m_data_buffer should be updated at last
29 // because the old field entry view need to
30 // copy the data to new place.
31 m_data_buffer = new_ptr;
32 m_data_buffer_size = size;
33 }
34 else
35 {
36 func(m_data_buffer, m_data_buffer_size, m_data_buffer, size);
37 }
38 wait_stream(nullptr);
39}
40
41MUDA_INLINE void SubFieldInterface::resize(size_t num_elements)
42{
43 m_new_cores.resize(m_entries.size());
44
45 // let subclass fill the new field entry cores
46 auto buffer_byte_size = require_total_buffer_byte_size(num_elements);
47
48 resize_data_buffer(
49 buffer_byte_size,
50 [&](std::byte* old_ptr, size_t old_size, std::byte* new_ptr, size_t new_size)
51 {
52 for(size_t i = 0; i < m_entries.size(); i++)
53 {
54 auto& e = m_entries[i];
55 auto& c = m_new_cores[i];
56 c = e->m_core; // copy the old core to the new core
57 c.m_buffer = new_ptr; // set new ptr to new core
58 c.m_info.elem_count = num_elements; // set new element count
59 }
60
61 calculate_new_cores(new_ptr, new_size, num_elements, m_new_cores);
62 async_upload_temp_cores();
63
64 for(size_t i = 0; i < m_entries.size(); i++)
65 {
66 auto& e = m_entries[i];
67 auto& c = m_new_cores[i];
68 auto& host_device_c = m_host_device_new_cores[i];
69
70 if(old_ptr)
71 e->async_copy_to_new_place(host_device_c.view()); // copy to new place (with layout)
72
73 e->m_core = c; // update the core
74 }
75
76 aync_upload_cores();
77 }); // sync here.
78
79 m_num_elements = num_elements;
80}
81
82MUDA_INLINE void SubFieldInterface::build()
83{
84 build_impl();
85 // no need to upload, because there is no data at all
86 // we wait until `resize()` to call: aync_upload_cores();
87 // aync_upload_cores();
88 wait_stream(nullptr);
89}
90
91MUDA_INLINE uint32_t SubFieldInterface::round_up(uint32_t x, uint32_t n)
92{
93 MUDA_ASSERT((n & (n - 1)) == 0, "n is not power of 2");
94 return (x + n - 1) & ~(n - 1);
95}
96
97MUDA_INLINE uint32_t SubFieldInterface::align(uint32_t offset,
98 uint32_t size,
99 uint32_t min_alignment,
100 uint32_t max_alignment)
101{
102 auto alignment = std::clamp(size, min_alignment, max_alignment);
103 return round_up(offset, alignment);
104}
105
106MUDA_INLINE void muda::SubFieldInterface::aync_upload_cores()
107{
108 for(auto&& e : m_entries)
109 {
110 *e->m_host_device_core.host_data() = e->m_core;
111 BufferLaunch().copy(e->m_host_device_core.buffer_view(), &e->m_core);
112 }
113}
114MUDA_INLINE void muda::SubFieldInterface::async_upload_temp_cores()
115{
116 m_host_device_new_cores.resize(m_new_cores.size());
117 for(size_t i = 0; i < m_new_cores.size(); ++i)
118 {
119 auto& core = m_new_cores[i];
120 auto& host_device_core = m_host_device_new_cores[i];
121 *host_device_core.host_data() = core;
122 BufferLaunch().copy(host_device_core.buffer_view(), &core);
123 }
124}
125} // namespace muda