3MUDA_INLINE SubFieldInterface::~SubFieldInterface()
6 Memory().free(m_data_buffer).wait();
10void SubFieldInterface::resize_data_buffer(
size_t size, F&& func)
12 if(m_data_buffer ==
nullptr)
14 Memory().alloc(&m_data_buffer, size).set(m_data_buffer, size, 0).wait();
15 func(
nullptr, 0, m_data_buffer, size);
16 m_data_buffer_size = size;
18 else if(size > m_data_buffer_size)
20 auto old_ptr = m_data_buffer;
21 auto old_size = m_data_buffer_size;
23 std::byte* new_ptr =
nullptr;
24 Memory().alloc(&new_ptr, size);
25 func(old_ptr, old_size, new_ptr, size);
26 Memory().free(old_ptr);
31 m_data_buffer = new_ptr;
32 m_data_buffer_size = size;
36 func(m_data_buffer, m_data_buffer_size, m_data_buffer, size);
41MUDA_INLINE
void SubFieldInterface::resize(
size_t num_elements)
43 m_new_cores.resize(m_entries.size());
46 auto buffer_byte_size = require_total_buffer_byte_size(num_elements);
50 [&](std::byte* old_ptr,
size_t old_size, std::byte* new_ptr,
size_t new_size)
52 for(
size_t i = 0; i < m_entries.size(); i++)
54 auto& e = m_entries[i];
55 auto& c = m_new_cores[i];
58 c.m_info.elem_count = num_elements;
61 calculate_new_cores(new_ptr, new_size, num_elements, m_new_cores);
62 async_upload_temp_cores();
64 for(
size_t i = 0; i < m_entries.size(); i++)
66 auto& e = m_entries[i];
67 auto& c = m_new_cores[i];
68 auto& host_device_c = m_host_device_new_cores[i];
71 e->async_copy_to_new_place(host_device_c.view());
79 m_num_elements = num_elements;
82MUDA_INLINE
void SubFieldInterface::build()
91MUDA_INLINE uint32_t SubFieldInterface::round_up(uint32_t x, uint32_t n)
93 MUDA_ASSERT((n & (n - 1)) == 0,
"n is not power of 2");
94 return (x + n - 1) & ~(n - 1);
97MUDA_INLINE uint32_t SubFieldInterface::align(uint32_t offset,
99 uint32_t min_alignment,
100 uint32_t max_alignment)
102 auto alignment = std::clamp(size, min_alignment, max_alignment);
103 return round_up(offset, alignment);
106MUDA_INLINE
void muda::SubFieldInterface::aync_upload_cores()
108 for(
auto&& e : m_entries)
110 *e->m_host_device_core.host_data() = e->m_core;
111 BufferLaunch().copy(e->m_host_device_core.buffer_view(), &e->m_core);
114MUDA_INLINE
void muda::SubFieldInterface::async_upload_temp_cores()
116 m_host_device_new_cores.resize(m_new_cores.size());
117 for(
size_t i = 0; i < m_new_cores.size(); ++i)
119 auto& core = m_new_cores[i];
120 auto& host_device_core = m_host_device_new_cores[i];
121 *host_device_core.host_data() = core;
122 BufferLaunch().copy(host_device_core.buffer_view(), &core);