20 template <
typename InputIteratorT,
typename OutputIteratorT,
typename ReductionOpT,
typename T>
22 OutputIteratorT d_out,
24 ReductionOpT reduction_op,
27 MUDA_CUB_WRAPPER_IMPL(cub::DeviceReduce::Reduce(
28 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, _stream,
false));
31 template <
typename InputIteratorT,
typename OutputIteratorT>
32 DeviceReduce& Sum(InputIteratorT d_in, OutputIteratorT d_out,
int num_items)
34 MUDA_CUB_WRAPPER_IMPL(cub::DeviceReduce::Sum(
35 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
39 template <
typename InputIteratorT,
typename OutputIteratorT>
40 DeviceReduce& Min(InputIteratorT d_in, OutputIteratorT d_out,
int num_items)
42 MUDA_CUB_WRAPPER_IMPL(cub::DeviceReduce::Min(
43 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
47 template <
typename InputIteratorT,
typename OutputIteratorT>
48 DeviceReduce& ArgMin(InputIteratorT d_in, OutputIteratorT d_out,
int num_items)
50 MUDA_CUB_WRAPPER_IMPL(cub::DeviceReduce::ArgMin(
51 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
55 template <
typename InputIteratorT,
typename OutputIteratorT>
56 DeviceReduce& Max(InputIteratorT d_in, OutputIteratorT d_out,
int num_items)
59 MUDA_CUB_WRAPPER_IMPL(cub::DeviceReduce::Max(
60 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
64 template <
typename InputIteratorT,
typename OutputIteratorT>
65 DeviceReduce& ArgMax(InputIteratorT d_in, OutputIteratorT d_out,
int num_items)
67 MUDA_CUB_WRAPPER_IMPL(cub::DeviceReduce::ArgMax(
68 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
71 template <
typename KeysInputIteratorT,
typename UniqueOutputIteratorT,
typename ValuesInputIteratorT,
typename AggregatesOutputIteratorT,
typename NumRunsOutputIteratorT,
typename ReductionOpT>
73 UniqueOutputIteratorT d_unique_out,
74 ValuesInputIteratorT d_values_in,
75 AggregatesOutputIteratorT d_aggregates_out,
76 NumRunsOutputIteratorT d_num_runs_out,
77 ReductionOpT reduction_op,
80 MUDA_CUB_WRAPPER_IMPL(cub::DeviceReduce::ReduceByKey(d_temp_storage,
94 template <
typename InputIteratorT,
typename OutputIteratorT,
typename ReductionOpT,
typename T>
96 size_t& temp_storage_bytes,
98 OutputIteratorT d_out,
100 ReductionOpT reduction_op,
104 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceReduce::Reduce(
105 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, _stream,
false));
108 template <
typename InputIteratorT,
typename OutputIteratorT>
110 size_t& temp_storage_bytes,
112 OutputIteratorT d_out,
115 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceReduce::Sum(
116 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
120 template <
typename InputIteratorT,
typename OutputIteratorT>
122 size_t& temp_storage_bytes,
124 OutputIteratorT d_out,
127 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceReduce::Min(
128 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
131 template <
typename InputIteratorT,
typename OutputIteratorT>
133 size_t& temp_storage_bytes,
135 OutputIteratorT d_out,
138 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceReduce::ArgMin(
139 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
142 template <
typename InputIteratorT,
typename OutputIteratorT>
144 size_t& temp_storage_bytes,
146 OutputIteratorT d_out,
150 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceReduce::Max(
151 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
154 template <
typename InputIteratorT,
typename OutputIteratorT>
156 size_t& temp_storage_bytes,
158 OutputIteratorT d_out,
161 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceReduce::ArgMax(
162 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
165 template <
typename KeysInputIteratorT,
typename UniqueOutputIteratorT,
typename ValuesInputIteratorT,
typename AggregatesOutputIteratorT,
typename NumRunsOutputIteratorT,
typename ReductionOpT>
167 size_t& temp_storage_bytes,
168 KeysInputIteratorT d_keys_in,
169 UniqueOutputIteratorT d_unique_out,
170 ValuesInputIteratorT d_values_in,
171 AggregatesOutputIteratorT d_aggregates_out,
172 NumRunsOutputIteratorT d_num_runs_out,
173 ReductionOpT reduction_op,
176 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(
177 cub::DeviceReduce::ReduceByKey(d_temp_storage,