27 template <
typename InputIteratorT,
typename OutputIteratorT>
28 DeviceScan& ExclusiveSum(InputIteratorT d_in, OutputIteratorT d_out,
int num_items)
30 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::ExclusiveSum(
31 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
35 template <
typename InputIteratorT,
typename OutputIteratorT,
typename ScanOpT,
typename InitValueT>
37 OutputIteratorT d_out,
39 InitValueT init_value,
42 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::ExclusiveScan(
43 d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, _stream,
false));
47 template <
typename InputIteratorT,
typename OutputIteratorT>
48 DeviceScan& InclusiveSum(InputIteratorT d_in, OutputIteratorT d_out,
int num_items)
50 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::InclusiveSum(
51 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
54 template <
typename InputIteratorT,
typename OutputIteratorT,
typename ScanOpT>
55 DeviceScan& InclusiveScan(InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op,
int num_items)
57 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::InclusiveScan(
58 d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, num_items, _stream,
false));
61 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename EqualityOpT = cub::Equality>
62 DeviceScan& ExclusiveSumByKey(KeysInputIteratorT d_keys_in,
63 ValuesInputIteratorT d_values_in,
64 ValuesOutputIteratorT d_values_out,
66 EqualityOpT equality_op = EqualityOpT())
68 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::ExclusiveSumByKey(
69 d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, num_items, equality_op, _stream,
false));
72 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename ScanOpT,
typename InitValueT,
typename EqualityOpT = cub::Equality>
73 DeviceScan& ExclusiveScanByKey(KeysInputIteratorT d_keys_in,
74 ValuesInputIteratorT d_values_in,
75 ValuesOutputIteratorT d_values_out,
77 InitValueT init_value,
79 EqualityOpT equality_op = EqualityOpT())
81 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::ExclusiveScanByKey(d_temp_storage,
94 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename EqualityOpT = cub::Equality>
95 DeviceScan& InclusiveSumByKey(KeysInputIteratorT d_keys_in,
96 ValuesInputIteratorT d_values_in,
97 ValuesOutputIteratorT d_values_out,
99 EqualityOpT equality_op = EqualityOpT())
101 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::InclusiveSumByKey(
102 d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, num_items, equality_op, _stream,
false));
105 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename ScanOpT,
typename EqualityOpT = cub::Equality>
106 DeviceScan& InclusiveScanByKey(KeysInputIteratorT d_keys_in,
107 ValuesInputIteratorT d_values_in,
108 ValuesOutputIteratorT d_values_out,
111 EqualityOpT equality_op = EqualityOpT())
113 MUDA_CUB_WRAPPER_IMPL(cub::DeviceScan::InclusiveScanByKey(d_temp_storage,
127 template <
typename InputIteratorT,
typename OutputIteratorT>
128 DeviceScan& ExclusiveSum(
void* d_temp_storage,
129 size_t& temp_storage_bytes,
131 OutputIteratorT d_out,
134 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceScan::ExclusiveSum(
135 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
139 template <
typename InputIteratorT,
typename OutputIteratorT,
typename ScanOpT,
typename InitValueT>
140 DeviceScan& ExclusiveScan(
void* d_temp_storage,
141 size_t& temp_storage_bytes,
143 OutputIteratorT d_out,
145 InitValueT init_value,
148 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceScan::ExclusiveScan(
149 d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, _stream,
false));
153 template <
typename InputIteratorT,
typename OutputIteratorT>
154 DeviceScan& InclusiveSum(
void* d_temp_storage,
155 size_t& temp_storage_bytes,
157 OutputIteratorT d_out,
160 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceScan::InclusiveSum(
161 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
164 template <
typename InputIteratorT,
typename OutputIteratorT,
typename ScanOpT>
165 DeviceScan& InclusiveScan(
void* d_temp_storage,
166 size_t& temp_storage_bytes,
168 OutputIteratorT d_out,
172 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceScan::InclusiveScan(
173 d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, num_items, _stream,
false));
176 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename EqualityOpT = cub::Equality>
177 DeviceScan& ExclusiveSumByKey(
void* d_temp_storage,
178 size_t& temp_storage_bytes,
179 KeysInputIteratorT d_keys_in,
180 ValuesInputIteratorT d_values_in,
181 ValuesOutputIteratorT d_values_out,
183 EqualityOpT equality_op = EqualityOpT())
185 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceScan::ExclusiveSumByKey(
186 d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, num_items, equality_op, _stream,
false));
189 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename ScanOpT,
typename InitValueT,
typename EqualityOpT = cub::Equality>
190 DeviceScan& ExclusiveScanByKey(
void* d_temp_storage,
191 size_t& temp_storage_bytes,
192 KeysInputIteratorT d_keys_in,
193 ValuesInputIteratorT d_values_in,
194 ValuesOutputIteratorT d_values_out,
196 InitValueT init_value,
198 EqualityOpT equality_op = EqualityOpT())
200 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(
201 cub::DeviceScan::ExclusiveScanByKey(d_temp_storage,
214 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename EqualityOpT = cub::Equality>
215 DeviceScan& InclusiveSumByKey(
void* d_temp_storage,
216 size_t& temp_storage_bytes,
217 KeysInputIteratorT d_keys_in,
218 ValuesInputIteratorT d_values_in,
219 ValuesOutputIteratorT d_values_out,
221 EqualityOpT equality_op = EqualityOpT())
223 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceScan::InclusiveSumByKey(
224 d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, num_items, equality_op, _stream,
false));
227 template <
typename KeysInputIteratorT,
typename ValuesInputIteratorT,
typename ValuesOutputIteratorT,
typename ScanOpT,
typename EqualityOpT = cub::Equality>
228 DeviceScan& InclusiveScanByKey(
void* d_temp_storage,
229 size_t& temp_storage_bytes,
230 KeysInputIteratorT d_keys_in,
231 ValuesInputIteratorT d_values_in,
232 ValuesOutputIteratorT d_values_out,
235 EqualityOpT equality_op = EqualityOpT())
237 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceScan::InclusiveScanByKey(
238 d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, scan_op, num_items, equality_op, _stream,
false));