92 using real_type = Real;
93 using index_type = std::uint32_t;
94 using object_type = Object;
97 using aabb_getter_type = AABBGetter;
98 using morton_code_calculator_type = MortonCodeCalculator;
103 BVH(
const BVH&) =
default;
105 BVH& operator=(
const BVH&) =
default;
106 BVH& operator=(
BVH&&) =
default;
110 this->m_objects.clear();
111 this->m_aabbs.clear();
112 this->m_nodes.clear();
119 static_cast<uint32_t
>(m_nodes.size()),
120 static_cast<uint32_t
>(m_objects.size()),
121 thrust::raw_pointer_cast(m_nodes.data()),
122 thrust::raw_pointer_cast(m_aabbs.data()),
123 thrust::raw_pointer_cast(m_objects.data())};
129 static_cast<uint32_t
>(m_nodes.size()),
130 static_cast<uint32_t
>(m_objects.size()),
131 thrust::raw_pointer_cast(m_nodes.data()),
132 thrust::raw_pointer_cast(m_aabbs.data()),
133 thrust::raw_pointer_cast(m_objects.data())};
137 void build(cudaStream_t stream =
nullptr)
139 auto policy = thrust::system::cuda::par_nosync.on(stream);
142 if(m_objects.size() == 0u)
149 const uint32_t num_objects = m_objects.size();
150 const uint32_t num_internal_nodes = num_objects - 1;
151 const uint32_t num_nodes = num_objects * 2 - 1;
156 const auto inf = std::numeric_limits<real_type>::infinity();
158 default_aabb.upper.x = -inf;
159 default_aabb.lower.x = inf;
160 default_aabb.upper.y = -inf;
161 default_aabb.lower.y = inf;
162 default_aabb.upper.z = -inf;
163 default_aabb.lower.z = inf;
165 this->m_aabbs.resize(num_nodes, default_aabb);
166 m_morton.resize(num_objects);
167 m_indices.resize(num_objects);
168 m_morton64.resize(num_objects);
170 default_node.parent_idx = 0xFFFFFFFF;
171 default_node.left_idx = 0xFFFFFFFF;
172 default_node.right_idx = 0xFFFFFFFF;
173 default_node.object_idx = 0xFFFFFFFF;
174 m_nodes.resize(num_nodes, default_node);
175 m_flag_container.clear();
176 m_flag_container.resize(num_internal_nodes, 0);
178 thrust::transform(policy,
179 this->m_objects.begin(),
180 this->m_objects.end(),
181 m_aabbs.begin() + num_internal_nodes,
192 const auto aabb_whole = thrust::reduce(
194 m_aabbs.data() + num_internal_nodes,
195 m_aabbs.data() + m_aabbs.size(),
198 {
return merge(lhs, rhs); });
200 thrust::transform(policy,
201 this->m_objects.begin(),
202 this->m_objects.end(),
203 m_aabbs.begin() + num_internal_nodes,
205 morton_code_calculator_type(aabb_whole));
212 thrust::make_counting_iterator<index_type>(0),
213 thrust::make_counting_iterator<index_type>(num_objects),
217 thrust::stable_sort_by_key(
221 thrust::make_zip_iterator(thrust::make_tuple(m_aabbs.begin() + num_internal_nodes,
222 m_indices.begin())));
228 const auto uniqued = thrust::unique_copy(
229 policy, m_morton.begin(), m_morton.end(), m_morton64.begin());
231 const bool morton_code_is_unique = (m_morton64.end() == uniqued);
232 if(!morton_code_is_unique)
234 thrust::transform(policy,
239 [] __device__ __host__(
const uint32_t m,
const uint32_t idx)
241 unsigned long long int m64 = m;
251 thrust::transform(policy,
254 this->m_nodes.begin() + num_internal_nodes,
255 [] __device__ __host__(
const index_type idx)
258 n.parent_idx = 0xFFFFFFFF;
259 n.left_idx = 0xFFFFFFFF;
260 n.right_idx = 0xFFFFFFFF;
268 if(morton_code_is_unique)
270 const uint32_t* node_code = thrust::raw_pointer_cast(m_morton.data());
271 details::construct_internal_nodes(
272 policy, thrust::raw_pointer_cast(m_nodes.data()), node_code, num_objects);
276 const unsigned long long int* node_code =
277 thrust::raw_pointer_cast(m_morton64.data());
278 details::construct_internal_nodes(
279 policy, thrust::raw_pointer_cast(m_nodes.data()), node_code, num_objects);
285 const auto flags = thrust::raw_pointer_cast(m_flag_container.data());
288 thrust::for_each(policy,
289 thrust::make_counting_iterator<index_type>(num_internal_nodes),
290 thrust::make_counting_iterator<index_type>(num_nodes),
291 [nodes = thrust::raw_pointer_cast(m_nodes.data()),
292 aabbs = thrust::raw_pointer_cast(m_aabbs.data()),
293 flags] __device__(index_type idx)
295 uint32_t parent = nodes[idx].parent_idx;
296 while(parent != 0xFFFFFFFF)
298 const int old = atomicCAS(flags + parent, 0, 1);
305 MUDA_KERNEL_ASSERT(old == 1,
"old=%d", old);
309 const auto lidx = nodes[parent].left_idx;
310 const auto ridx = nodes[parent].right_idx;
311 const auto lbox = aabbs[lidx];
312 const auto rbox = aabbs[ridx];
313 aabbs[parent] = merge(lbox, rbox);
316 parent = nodes[parent].parent_idx;
322 const auto& objects()
const noexcept {
return m_objects; }
323 auto& objects()
noexcept {
return m_objects; }
324 const auto& aabbs()
const noexcept {
return m_aabbs; }
325 const auto& nodes()
const noexcept {
return m_nodes; }
327 const auto& host_objects()
const noexcept
332 const auto& host_aabbs()
const noexcept
337 const auto& host_nodes()
const noexcept
353 mutable bool m_host_dirty =
true;
354 mutable thrust::host_vector<object_type> m_h_objects;
355 mutable thrust::host_vector<aabb_type> m_h_aabbs;
356 mutable thrust::host_vector<node_type> m_h_nodes;
358 void download_if_dirty()
const
362 m_h_objects = m_objects;
365 m_host_dirty =
false;