MUDA
Loading...
Searching...
No Matches
soa_sub_field.inl
1namespace muda
2{
3MUDA_INLINE void SubFieldImpl<FieldEntryLayout::SoA>::build_impl()
4{
5 auto min_alignment = build_options().min_alignment;
6 auto max_alignment = build_options().max_alignment;
7 // we use the max alignment as the base array size
8 auto base_array_size = max_alignment;
9 // e.g. base array size = 4
10 // a "Struct" is something like the following, where M/V/S are 3 different entries, has type of matrix/vector/scalar
11 //tex:
12 // $$
13 // \begin{bmatrix}
14 // M_{11} & M_{11} & M_{11} & M_{11}\\
15 // M_{21} & M_{21} & M_{21} & M_{21}\\
16 // M_{12} & M_{12} & M_{12} & M_{12}\\
17 // M_{22} & M_{22} & M_{22} & M_{22}\\
18 // V_x & V_x & V_x & V_x\\
19 // V_y & V_y & V_y & V_y\\
20 // V_z & V_z & V_z & V_z\\
21 // S & S & S & S \\
22 // \end{bmatrix}
23 // $$
24 uint32_t struct_stride = 0; // the stride of the "Struct"=> SoA total size
25 for(auto& e : m_entries) // in an entry, the elem type is the same (e.g. float/int/double...)
26 {
27 // elem type = float/double/int ... or User Type
28 auto elem_byte_size = e->elem_byte_size();
29 // total elem count in innermost array:
30 // scalar=1 vector3 = 3, vector4 = 4, matrix3x3 = 9, matrix4x4 = 16, and so on
31 auto elem_count = e->shape().x * e->shape().y;
32 struct_stride = align(struct_stride, elem_byte_size, min_alignment, max_alignment);
33 auto total_elem_count_in_base_array = e->shape().x * e->shape().y * base_array_size;
34 // now struct_stride is the offset of the entry in the "Struct"
35 e->m_core.m_info.offset_in_base_struct = struct_stride;
36 struct_stride += elem_byte_size * total_elem_count_in_base_array;
37 }
38
39 MUDA_ASSERT(struct_stride % base_array_size == 0,
40 "m_struct_stride should be multiple of base_array_size");
41
42 m_base_struct_stride = struct_stride;
43
44 m_h_copy_map_buffer.reserve(4 * m_entries.size());
45 for(size_t i = 0; i < m_entries.size(); ++i)
46 {
47 auto& e = m_entries[i];
48 e->m_core.m_info.struct_stride = m_struct_stride;
49 auto btye_in_base_array = e->elem_byte_size() * max_alignment; // the size of the entry in the base array
50 auto first_comp_offset_in_base_struct =
51 e->m_core.m_info.offset_in_base_struct; // the offset of the entry in the base struct
52 auto comp_count = e->shape().x * e->shape().y;
53 for(int i = 0; i < comp_count; ++i)
54 {
55 details::SoACopyMap copy_map;
56 copy_map.offset_in_base_struct =
57 first_comp_offset_in_base_struct + i * btye_in_base_array;
58 copy_map.elem_byte_size = e->elem_byte_size();
59 m_h_copy_map_buffer.push_back(copy_map);
60 }
61 }
62 // copy to device
63 m_copy_map_buffer = m_h_copy_map_buffer;
64}
65
66//namespace details
67//{
68// void soa_map_copy(BufferView<SoACopyMap> copy_maps,
69// size_t base_strcut_stride,
70// uint32_t base,
71// uint32_t old_count_of_base,
72// uint32_t new_count_of_base,
73// std::byte* old_ptr,
74// std::byte* new_ptr)
75// {
76// auto rounded_old_count = old_count_of_base * base;
77// Memory().set(new_ptr, new_count_of_base * base_strcut_stride, 0).wait();
78// ParallelFor(LIGHT_WORKLOAD_BLOCK_SIZE)
79// .apply(old_count_of_base * base,
80// [old_ptr,
81// new_ptr,
82// rounded_old_count,
83// old_count_of_base,
84// new_count_of_base,
85// copy_maps = copy_maps.viewer()] __device__(int i) mutable
86// {
87// for(int j = 0; j < copy_maps.dim(); ++j)
88// {
89// auto map = copy_maps(j);
90// auto total_byte = rounded_old_count * map.elem_byte_size; // the total byte
91//
92// auto old_offset_in_struct =
93// map.offset_in_base_struct * old_count_of_base;
94//
95// auto new_offset_in_struct =
96// map.offset_in_base_struct * new_count_of_base;
97//
98// for(int k = 0; k < map.elem_byte_size; ++k)
99// {
100// auto begin = rounded_old_count * k;
101// auto offset = begin + i;
102//
103// auto old_offset = old_offset_in_struct + offset;
104//
105// auto new_offset = new_offset_in_struct + offset;
106//
107// new_ptr[new_offset] = old_ptr[old_offset];
108// }
109// }
110// })
111// .wait();
112// }
113//} // namespace details
114
115MUDA_INLINE size_t SubFieldImpl<FieldEntryLayout::SoA>::require_total_buffer_byte_size(size_t num_elements)
116{
117 auto base = m_build_options.max_alignment;
118 auto old_count_of_base = (m_num_elements + base - 1) / base;
119 auto new_count_of_base = (num_elements + base - 1) / base;
120 auto rounded_new_count = base * new_count_of_base;
121 auto total_bytes = m_base_struct_stride * new_count_of_base;
122 return total_bytes;
123}
124
125MUDA_INLINE void SubFieldImpl<FieldEntryLayout::SoA>::calculate_new_cores(
126 std::byte* byte_buffer, size_t total_bytes, size_t element_count, span<FieldEntryCore> new_cores)
127{
128 auto base = m_build_options.max_alignment;
129 auto old_count_of_base = (m_num_elements + base - 1) / base;
130 auto new_count_of_base = (element_count + base - 1) / base;
131 auto rounded_new_count = base * new_count_of_base;
132
133 for(auto& new_core : new_cores)
134 {
135 new_core.m_info.struct_stride = m_struct_stride;
136 new_core.m_info.offset_in_struct =
137 new_core.m_info.offset_in_base_struct * new_count_of_base;
138 new_core.m_info.elem_count_based_stride =
139 new_core.m_info.elem_byte_size * rounded_new_count;
140 }
141}
142} // namespace muda