|
template<typename InputIteratorT , typename OutputIteratorT > |
DeviceScan & | ExclusiveSum (InputIteratorT d_in, OutputIteratorT d_out, int num_items) |
|
template<typename InputIteratorT , typename OutputIteratorT , typename ScanOpT , typename InitValueT > |
DeviceScan & | ExclusiveScan (InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, InitValueT init_value, int num_items) |
|
template<typename InputIteratorT , typename OutputIteratorT > |
DeviceScan & | InclusiveSum (InputIteratorT d_in, OutputIteratorT d_out, int num_items) |
|
template<typename InputIteratorT , typename OutputIteratorT , typename ScanOpT > |
DeviceScan & | InclusiveScan (InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, int num_items) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename EqualityOpT = cub::Equality> |
DeviceScan & | ExclusiveSumByKey (KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename ScanOpT , typename InitValueT , typename EqualityOpT = cub::Equality> |
DeviceScan & | ExclusiveScanByKey (KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, ScanOpT scan_op, InitValueT init_value, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename EqualityOpT = cub::Equality> |
DeviceScan & | InclusiveSumByKey (KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename ScanOpT , typename EqualityOpT = cub::Equality> |
DeviceScan & | InclusiveScanByKey (KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, ScanOpT scan_op, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
template<typename InputIteratorT , typename OutputIteratorT > |
DeviceScan & | ExclusiveSum (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items) |
|
template<typename InputIteratorT , typename OutputIteratorT , typename ScanOpT , typename InitValueT > |
DeviceScan & | ExclusiveScan (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, InitValueT init_value, int num_items) |
|
template<typename InputIteratorT , typename OutputIteratorT > |
DeviceScan & | InclusiveSum (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, int num_items) |
|
template<typename InputIteratorT , typename OutputIteratorT , typename ScanOpT > |
DeviceScan & | InclusiveScan (void *d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op, int num_items) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename EqualityOpT = cub::Equality> |
DeviceScan & | ExclusiveSumByKey (void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename ScanOpT , typename InitValueT , typename EqualityOpT = cub::Equality> |
DeviceScan & | ExclusiveScanByKey (void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, ScanOpT scan_op, InitValueT init_value, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename EqualityOpT = cub::Equality> |
DeviceScan & | InclusiveSumByKey (void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
template<typename KeysInputIteratorT , typename ValuesInputIteratorT , typename ValuesOutputIteratorT , typename ScanOpT , typename EqualityOpT = cub::Equality> |
DeviceScan & | InclusiveScanByKey (void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, ValuesInputIteratorT d_values_in, ValuesOutputIteratorT d_values_out, ScanOpT scan_op, int num_items, EqualityOpT equality_op=EqualityOpT()) |
|
| CubWrapper (Stream &stream=Stream::Default()) |
|
void | kernel_name (std::string_view)=delete |
|
MUDA_GENERIC | LaunchBase (::cudaStream_t stream) MUDA_NOEXCEPT |
|
T & | push_range (const std::string &name) |
|
T & | pop_range () |
|
T & | kernel_name (std::string_view name) |
|
T & | file_line (std::string_view file, int line) |
|
T & | record (cudaEvent_t e, int flag=cudaEventRecordDefault) |
|
T & | record (ComputeGraphVar< cudaEvent_t > &e, const std::vector< ComputeGraphVarBase * > &vars) |
|
template<typename... ViewT> |
T & | record (ComputeGraphVar< cudaEvent_t > &e, ComputeGraphVar< ViewT > &... vars) |
|
T & | when (cudaEvent_t e, int flag=cudaEventWaitDefault) |
|
T & | wait (cudaEvent_t e, int flag=cudaEventWaitDefault) |
|
T & | wait (const ComputeGraphVar< cudaEvent_t > &e, const std::vector< ComputeGraphVarBase * > &vars) |
|
template<typename... ViewT> |
T & | wait (const ComputeGraphVar< cudaEvent_t > &e, ComputeGraphVar< ViewT > &... vars) |
|
T & | wait () |
|
T & | callback (const std::function< void(::cudaStream_t, ::cudaError)> &callback) |
|
template<typename Next > |
Next | next (Next n) |
|
template<typename Next , typename... Args> |
Next | next (Args &&... args) |
|
template<typename T > |
MUDA_GENERIC | LaunchBase (cudaStream_t stream) MUDA_NOEXCEPT |
|
MUDA_GENERIC | LaunchCore (::cudaStream_t stream) MUDA_NOEXCEPT |
|
void | init_stream (::cudaStream_t s) |
|
void | push_range (const std::string &name) |
|
void | pop_range () |
|
void | record (cudaEvent_t e, int flag=cudaEventRecordDefault) |
|
void | record (ComputeGraphVar< cudaEvent_t > &e, const std::vector< ComputeGraphVarBase * > &vars) |
|
template<typename... ViewT> |
void | record (ComputeGraphVar< cudaEvent_t > &e, ComputeGraphVar< ViewT > &... vars) |
|
void | when (cudaEvent_t e, int flag=cudaEventWaitDefault) |
|
void | wait (cudaEvent_t e, int flag=cudaEventWaitDefault) |
|
void | wait (const ComputeGraphVar< cudaEvent_t > &e, const std::vector< ComputeGraphVarBase * > &vars) |
|
template<typename... ViewT> |
void | wait (const ComputeGraphVar< cudaEvent_t > &e, ComputeGraphVar< ViewT > &... vars) |
|
void | wait () |
|
void | callback (const std::function< void(::cudaStream_t, ::cudaError)> &callback) |
|
template<typename... ViewT> |
MUDA_INLINE void | record (ComputeGraphVar< cudaEvent_t > &e, ComputeGraphVar< ViewT > &... vars) |
|
template<typename... ViewT> |
MUDA_INLINE void | wait (const ComputeGraphVar< cudaEvent_t > &e, ComputeGraphVar< ViewT > &... vars) |
|