3MUDA_INLINE Graph::Graph()
5 checkCudaErrors(cudaGraphCreate(&m_handle, 0));
8MUDA_INLINE Graph::~Graph()
11 checkCudaErrors(cudaGraphDestroy(m_handle));
15MUDA_INLINE Graph::Graph(Graph&& o)
16 : m_handle(std::move(o.m_handle))
17 , m_cached(std::move(o.m_cached))
22MUDA_INLINE Graph& Graph::operator=(Graph&& o)
26 m_handle = std::move(o.m_handle);
27 m_cached = std::move(o.m_cached);
32MUDA_INLINE
auto Graph::instantiate() -> S<GraphExec>
34 auto ret = std::make_shared<GraphExec>();
35 checkCudaErrors(cudaGraphInstantiate(&ret->m_handle, m_handle,
nullptr,
nullptr, 0));
39MUDA_INLINE
auto Graph::instantiate(Flags<GraphInstantiateFlagBit> flags) -> S<GraphExec>
41 auto ret = std::make_shared<GraphExec>();
42#if MUDA_WITH_DEVICE_STREAM_MODEL
44 cudaGraphInstantiateWithFlags(&ret->m_handle, m_handle,
static_cast<int>(flags)));
46 checkCudaErrors(cudaGraphInstantiateWithFlags(
47 &ret->m_handle, m_handle,
static_cast<int>(flags & GraphInstantiateFlagBit::FreeOnLaunch)));
54auto Graph::add_kernel_node(
const S<KernelNodeParms<T>>& kernelParms,
55 const std::vector<S<GraphNode>>& deps) -> S<KernelNode>
57 auto ret = std::make_shared<KernelNode>();
58 std::vector<cudaGraphNode_t> nodes = map_dependencies(deps);
59 checkCudaErrors(cudaGraphAddKernelNode(
60 &ret->m_handle, m_handle, nodes.data(), nodes.size(), kernelParms->handle()));
65auto Graph::add_kernel_node(
const S<KernelNodeParms<T>>& kernelParms) -> S<KernelNode>
67 auto ret = std::make_shared<KernelNode>();
68 checkCudaErrors(cudaGraphAddKernelNode(
69 &ret->m_handle, m_handle,
nullptr, 0, kernelParms->handle()));
74auto Graph::add_host_node(
const S<HostNodeParms<T>>& hostParms,
75 const std::vector<S<GraphNode>>& deps) -> S<HostNode>
77 m_cached.push_back(hostParms);
78 auto ret = std::make_shared<HostNode>();
79 std::vector<cudaGraphNode_t> nodes = map_dependencies(deps);
80 checkCudaErrors(cudaGraphAddHostNode(
81 &ret->m_handle, m_handle, nodes.data(), nodes.size(), hostParms->handle()));
86auto Graph::add_host_node(
const S<HostNodeParms<T>>& hostParms) -> S<HostNode>
88 m_cached.push_back(hostParms);
89 auto ret = std::make_shared<HostNode>();
91 cudaGraphAddHostNode(&ret->m_handle, m_handle,
nullptr, 0, hostParms->handle()));
96MUDA_INLINE
auto Graph::add_memcpy_node(
void* dst,
100 const std::vector<S<GraphNode>>& deps) -> S<MemcpyNode>
102 auto ret = std::make_shared<MemcpyNode>();
103 std::vector<cudaGraphNode_t> nodes = map_dependencies(deps);
104 checkCudaErrors(cudaGraphAddMemcpyNode1D(
105 &ret->m_handle, m_handle, nodes.data(), nodes.size(), dst, src, size_bytes, kind));
109MUDA_INLINE
auto Graph::add_memcpy_node(
void* dst,
const void* src,
size_t size_bytes, cudaMemcpyKind kind)
112 auto ret = std::make_shared<MemcpyNode>();
113 checkCudaErrors(cudaGraphAddMemcpyNode1D(
114 &ret->m_handle, m_handle,
nullptr, 0, dst, src, size_bytes, kind));
119MUDA_INLINE
auto Graph::add_memcpy_node(
const cudaMemcpy3DParms& parms,
120 const std::vector<S<GraphNode>>& deps) -> S<MemcpyNode>
122 auto ret = std::make_shared<MemcpyNode>();
123 std::vector<cudaGraphNode_t> nodes = map_dependencies(deps);
124 checkCudaErrors(cudaGraphAddMemcpyNode(
125 &ret->m_handle, m_handle, nodes.data(), nodes.size(), &parms));
129MUDA_INLINE
auto Graph::add_memset_node(
const cudaMemsetParams& parms,
130 const std::vector<S<GraphNode>>& deps) -> S<MemsetNode>
132 auto ret = std::make_shared<MemsetNode>();
133 std::vector<cudaGraphNode_t> nodes = map_dependencies(deps);
134 checkCudaErrors(cudaGraphAddMemsetNode(
135 &ret->m_handle, m_handle, nodes.data(), nodes.size(), &parms));
139MUDA_INLINE
auto Graph::add_memset_node(
const cudaMemsetParams& parms) -> S<MemsetNode>
141 auto ret = std::make_shared<MemsetNode>();
142 checkCudaErrors(cudaGraphAddMemsetNode(&ret->m_handle, m_handle,
nullptr, 0, &parms));
146MUDA_INLINE
auto Graph::add_memcpy_node(
const cudaMemcpy3DParms& parms) -> S<MemcpyNode>
148 auto ret = std::make_shared<MemcpyNode>();
149 checkCudaErrors(cudaGraphAddMemcpyNode(&ret->m_handle, m_handle,
nullptr, 0, &parms));
153MUDA_INLINE
auto Graph::add_event_record_node(cudaEvent_t e,
154 const std::vector<S<GraphNode>>& deps)
155 -> S<EventRecordNode>
157 auto ret = std::make_shared<EventRecordNode>();
158 std::vector<cudaGraphNode_t> nodes = map_dependencies(deps);
159 checkCudaErrors(cudaGraphAddEventRecordNode(
160 &ret->m_handle, m_handle, nodes.data(), nodes.size(), e));
164MUDA_INLINE
auto Graph::add_event_record_node(cudaEvent_t e) -> S<EventRecordNode>
166 auto ret = std::make_shared<EventRecordNode>();
167 checkCudaErrors(cudaGraphAddEventRecordNode(&ret->m_handle, m_handle,
nullptr, 0, e));
171MUDA_INLINE
auto Graph::add_event_wait_node(cudaEvent_t e,
172 const std::vector<S<GraphNode>>& deps)
175 auto ret = std::make_shared<EventWaitNode>();
176 std::vector<cudaGraphNode_t> nodes = map_dependencies(deps);
177 checkCudaErrors(cudaGraphAddEventWaitNode(
178 &ret->m_handle, m_handle, nodes.data(), nodes.size(), e));
182MUDA_INLINE
auto Graph::add_event_wait_node(cudaEvent_t e) -> S<EventWaitNode>
184 auto ret = std::make_shared<EventWaitNode>();
185 checkCudaErrors(cudaGraphAddEventWaitNode(&ret->m_handle, m_handle,
nullptr, 0, e));
189MUDA_INLINE
void Graph::add_dependency(S<GraphNode> from, S<GraphNode> to)
192 cudaGraphAddDependencies(m_handle, &(from->m_handle), &(to->m_handle), 1));
195MUDA_INLINE std::vector<cudaGraphNode_t> Graph::map_dependencies(
const std::vector<S<GraphNode>>& deps)
197 std::vector<cudaGraphNode_t> nodes;
198 nodes.reserve(deps.size());
200 nodes.push_back(d->m_handle);