14 size_t m_shared_memory_size;
15 cudaStream_t m_stream;
19 MUDA_GENERIC
Kernel(dim3 grid_dim, dim3 m_block_dim,
size_t shared_memory_size, cudaStream_t stream, F f)
20 : m_grid_dim(grid_dim)
21 , m_block_dim(m_block_dim)
22 , m_shared_memory_size(shared_memory_size)
27 MUDA_KERNEL_ASSERT(stream == details::stream::tail_launch()
28 || stream == details::stream::fire_and_forget(),
29 "Kernel Launch on device with invalid stream! "
30 "Only Stream::TailLaunch{} and Stream::FireAndForget{} are allowed");
39 MUDA_GENERIC
Kernel(dim3 grid_dim, dim3 m_block_dim, F f)
40 :
Kernel{grid_dim, m_block_dim, 0, 0, f}
44 MUDA_GENERIC
Kernel(dim3 grid_dim, dim3 m_block_dim,
size_t shared_memory_size, F f)
45 :
Kernel{grid_dim, m_block_dim, shared_memory_size, 0, f}
49 MUDA_GENERIC
Kernel(dim3 grid_dim, dim3 m_block_dim, cudaStream_t stream, F f)
50 :
Kernel{grid_dim, m_block_dim, 0, stream, f}
54 MUDA_GENERIC
Kernel(cudaStream_t stream, F f)
55 :
Kernel{1, 1, 0, stream, f}
59 template <
typename... Args>
60 MUDA_GENERIC
void operator()(Args&&... args) &&
62 static_assert(std::is_invocable_v<F, Args...>,
"invalid arguments");
63#if MUDA_WITH_DEVICE_STREAM_MODEL
64 m_kernel<<<m_grid_dim, m_block_dim, m_shared_memory_size, m_stream>>>(
65 std::forward<Args>(args)...);
66 checkCudaErrors(cudaGetLastError());
68 cudaStream_t stream =
nullptr;
69 if(m_stream == details::stream::tail_launch())
71 checkCudaErrors(cudaDeviceSynchronize());
73 else if(m_stream == details::stream::fire_and_forget())
81 m_kernel<<<m_grid_dim, m_block_dim, m_shared_memory_size, stream>>>(
82 std::forward<Args>(args)...);
83 checkCudaErrors(cudaGetLastError());