18 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT,
typename ReductionOp,
typename T>
20 OutputIteratorT d_out,
22 BeginOffsetIteratorT d_begin_offsets,
23 EndOffsetIteratorT d_end_offsets,
24 ReductionOp reduction_op,
29 MUDA_CUB_WRAPPER_IMPL(cub::DeviceSegmentedReduce::Reduce(d_temp_storage,
42 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
44 OutputIteratorT d_out,
46 BeginOffsetIteratorT d_begin_offsets,
47 EndOffsetIteratorT d_end_offsets)
49 MUDA_CUB_WRAPPER_IMPL(cub::DeviceSegmentedReduce::Sum(
50 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
53 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
55 OutputIteratorT d_out,
57 BeginOffsetIteratorT d_begin_offsets,
58 EndOffsetIteratorT d_end_offsets)
60 MUDA_CUB_WRAPPER_IMPL(cub::DeviceSegmentedReduce::Min(
61 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
64 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
66 OutputIteratorT d_out,
68 BeginOffsetIteratorT d_begin_offsets,
69 EndOffsetIteratorT d_end_offsets)
71 MUDA_CUB_WRAPPER_IMPL(cub::DeviceSegmentedReduce::ArgMin(
72 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
75 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
77 OutputIteratorT d_out,
79 BeginOffsetIteratorT d_begin_offsets,
80 EndOffsetIteratorT d_end_offsets)
82 MUDA_CUB_WRAPPER_IMPL(cub::DeviceSegmentedReduce::Max(
83 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
86 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
88 OutputIteratorT d_out,
90 BeginOffsetIteratorT d_begin_offsets,
91 EndOffsetIteratorT d_end_offsets)
93 MUDA_CUB_WRAPPER_IMPL(cub::DeviceSegmentedReduce::ArgMax(
94 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
99 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT,
typename ReductionOp,
typename T>
101 size_t& temp_storage_bytes,
103 OutputIteratorT d_out,
105 BeginOffsetIteratorT d_begin_offsets,
106 EndOffsetIteratorT d_end_offsets,
107 ReductionOp reduction_op,
110 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(
111 cub::DeviceSegmentedReduce::Reduce(d_temp_storage,
124 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
126 size_t& temp_storage_bytes,
128 OutputIteratorT d_out,
130 BeginOffsetIteratorT d_begin_offsets,
131 EndOffsetIteratorT d_end_offsets)
133 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceSegmentedReduce::Sum(
134 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
137 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
139 size_t& temp_storage_bytes,
141 OutputIteratorT d_out,
143 BeginOffsetIteratorT d_begin_offsets,
144 EndOffsetIteratorT d_end_offsets)
146 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceSegmentedReduce::Min(
147 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
150 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
152 size_t& temp_storage_bytes,
154 OutputIteratorT d_out,
156 BeginOffsetIteratorT d_begin_offsets,
157 EndOffsetIteratorT d_end_offsets)
159 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceSegmentedReduce::ArgMin(
160 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
163 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
165 size_t& temp_storage_bytes,
167 OutputIteratorT d_out,
169 BeginOffsetIteratorT d_begin_offsets,
170 EndOffsetIteratorT d_end_offsets)
172 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceSegmentedReduce::Max(
173 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));
177 template <
typename InputIteratorT,
typename OutputIteratorT>
179 size_t& temp_storage_bytes,
181 OutputIteratorT d_out,
184 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceSegmentedReduce::ArgMax(
185 d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, _stream,
false));
188 template <
typename InputIteratorT,
typename OutputIteratorT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
190 size_t& temp_storage_bytes,
192 OutputIteratorT d_out,
194 BeginOffsetIteratorT d_begin_offsets,
195 EndOffsetIteratorT d_end_offsets)
197 MUDA_CUB_WRAPPER_FOR_COMPUTE_GRAPH_IMPL(cub::DeviceSegmentedReduce::ArgMax(
198 d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, _stream,
false));