MUDA
Loading...
Searching...
No Matches
matrix_format_converter_impl.h
1#pragma once
2#include <muda/ext/linear_system/linear_system_handles.h>
3#include <muda/ext/linear_system/device_dense_matrix.h>
4#include <muda/ext/linear_system/device_dense_vector.h>
5#include <muda/ext/linear_system/device_triplet_matrix.h>
6#include <muda/ext/linear_system/device_doublet_vector.h>
7#include <muda/ext/linear_system/device_bcoo_matrix.h>
8#include <muda/ext/linear_system/device_bcoo_vector.h>
9#include <muda/ext/linear_system/device_bsr_matrix.h>
10#include <muda/ext/linear_system/device_csr_matrix.h>
11#include <muda/type_traits/cuda_arch.h>
12#include <muda/buffer/device_var.h>
13
14namespace muda
15{
16namespace details
17{
19 {
20 protected:
21 LinearSystemHandles& m_handles;
22 cudaDataType_t m_data_type;
23 int m_N;
24
25 public:
26 MatrixFormatConverterBase(LinearSystemHandles& context, cudaDataType_t data_type, int N)
27 : m_handles(context)
28 , m_data_type(data_type)
29 , m_N(N)
30 {
31 }
32
33 virtual ~MatrixFormatConverterBase() = default;
34
35 auto dim() const { return m_N; }
36 auto data_type() const { return m_data_type; }
37 auto cublas() const { return m_handles.cublas(); }
38 auto cusparse() const { return m_handles.cusparse(); }
39 auto cusolver_sp() const { return m_handles.cusolver_sp(); }
40 auto cusolver_dn() const { return m_handles.cusolver_dn(); }
41
42 template <typename T>
43 void loose_resize(DeviceBuffer<T>& buf, size_t new_size)
44 {
45 if(buf.capacity() < new_size)
46 buf.reserve(new_size * m_handles.reserve_ratio());
47 buf.resize(new_size);
48 }
49 };
50
51 template <typename T, int N>
53 {
54 using BlockMatrix = typename DeviceTripletMatrix<T, N>::BlockMatrix;
55 using SegmentVector = typename DeviceDoubletVector<T, N>::SegmentVector;
56
57 DeviceBuffer<int> sort_index;
58 DeviceBuffer<int> sort_index_input;
59 DeviceBuffer<int> sort_index_tmp;
60
61 DeviceBuffer<int> col_tmp;
62 DeviceBuffer<int> row_tmp;
63
64 DeviceBCOOMatrix<T, N> temp_bcoo_matrix;
65 DeviceBCOOVector<T, N> temp_bcoo_vector;
66
67 DeviceBuffer<int> unique_indices;
68 DeviceBuffer<int> unique_counts;
69 DeviceBuffer<int> offsets;
70
71 DeviceVar<int> count;
72
73 DeviceBuffer<int2> ij_pairs;
75 DeviceBuffer<uint64_t> ij_hash_input;
76 DeviceBuffer<int2> unique_ij_pairs;
77
79 DeviceBuffer<BlockMatrix> unique_blocks;
80 DeviceBuffer<SegmentVector> unique_segments;
81 DeviceBuffer<SegmentVector> temp_segments;
82
83 DeviceBuffer<T> unique_values;
84
85 public:
87 : MatrixFormatConverterBase(handles, cuda_data_type<T>(), N)
88 {
89 }
90
91 virtual ~MatrixFormatConverter() = default;
92
93
94 // Triplet -> BCOO
95 void convert(const DeviceTripletMatrix<T, N>& from, DeviceBCOOMatrix<T, N>& to);
96
97 void radix_sort_indices_and_blocks(const DeviceTripletMatrix<T, N>& from,
99 void make_unique_indices_and_blocks(const DeviceTripletMatrix<T, N>& from,
101
102 void merge_sort_indices_and_blocks(const DeviceTripletMatrix<T, N>& from,
104 void make_unique_indices(const DeviceTripletMatrix<T, N>& from,
106 void make_unique_blocks(const DeviceTripletMatrix<T, N>& from,
108
109
110 // BCOO -> Dense Matrix
111 void convert(const DeviceBCOOMatrix<T, N>& from,
113 bool clear_dense_matrix = true);
114
115 // BCOO -> COO
116 void convert(const DeviceBCOOMatrix<T, N>& from, DeviceCOOMatrix<T>& to);
117 void expand_blocks(const DeviceBCOOMatrix<T, N>& from, DeviceCOOMatrix<T>& to);
118 void sort_indices_and_values(const DeviceBCOOMatrix<T, N>& from,
120
121 // BCOO -> BSR
122 void convert(const DeviceBCOOMatrix<T, N>& from, DeviceBSRMatrix<T, N>& to);
123 void convert(DeviceBCOOMatrix<T, N>&& from, DeviceBSRMatrix<T, N>& to);
124 void calculate_block_offsets(const DeviceBCOOMatrix<T, N>& from,
126
127 // Doublet -> BCOO
128 void convert(const DeviceDoubletVector<T, N>& from, DeviceBCOOVector<T, N>& to);
129
130 void merge_sort_indices_and_segments(const DeviceDoubletVector<T, N>& from,
132 void make_unique_indices(const DeviceDoubletVector<T, N>& from,
134 void make_unique_segments(const DeviceDoubletVector<T, N>& from,
136
137 // BCOO -> Dense Vector
138 void convert(const DeviceBCOOVector<T, N>& from,
140 bool clear_dense_vector = true);
141 void set_unique_segments_to_dense_vector(const DeviceBCOOVector<T, N>& from,
143 bool clear_dense_vector);
144
145 // Triplet -> Dense Vector
146 void convert(const DeviceDoubletVector<T, N>& from,
148 bool clear_dense_vector = true);
149
150 // BSR -> CSR
151 void convert(const DeviceBSRMatrix<T, N>& from, DeviceCSRMatrix<T>& to);
152 };
153
154 template <typename T>
156 {
157 DeviceBuffer<int> sort_index;
158 DeviceBuffer<int> sort_index_input;
159 DeviceBuffer<int> sort_index_tmp;
160
161 DeviceBuffer<int> col_tmp;
162 DeviceBuffer<int> row_tmp;
163
164 DeviceBuffer<int> unique_indices;
165 DeviceCOOMatrix<T> temp_coo_matrix;
166 DeviceCOOVector<T> temp_coo_vector;
167
168 DeviceBuffer<int> unique_counts;
169 DeviceBuffer<int> offsets;
170
171 DeviceVar<int> count;
172
173 DeviceBuffer<int2> ij_pairs;
175 DeviceBuffer<uint64_t> ij_hash_input;
176 DeviceBuffer<int2> unique_ij_pairs;
177
178 muda::DeviceBuffer<T> values_sorted;
179 DeviceBuffer<T> unique_values;
180 DeviceBuffer<T> temp_values;
181
182 public:
184 : MatrixFormatConverterBase(handles, cuda_data_type<T>(), 1)
185 {
186 }
187
188 virtual ~MatrixFormatConverter() = default;
189
190 // Triplet -> COO
191 void convert(const DeviceTripletMatrix<T, 1>& from, DeviceCOOMatrix<T>& to);
192
193 void radix_sort_indices_and_blocks(const DeviceTripletMatrix<T, 1>& from,
195 void make_unique_indices_and_blocks(const DeviceTripletMatrix<T, 1>& from,
197
198 void merge_sort_indices_and_values(const DeviceTripletMatrix<T, 1>& from,
200 void make_unique_indices(const DeviceTripletMatrix<T, 1>& from,
202 void make_unique_values(const DeviceTripletMatrix<T, 1>& from,
204
205
206 // COO -> Dense Matrix
207 void convert(const DeviceCOOMatrix<T>& from,
209 bool clear_dense_matrix = true);
210
211 // COO -> CSR
212 void convert(const DeviceCOOMatrix<T>& from, DeviceCSRMatrix<T>& to);
213 void convert(DeviceCOOMatrix<T>&& from, DeviceCSRMatrix<T>& to);
214 void calculate_block_offsets(const DeviceCOOMatrix<T>& from,
216
217 // Doublet -> COO
218 void convert(const DeviceDoubletVector<T, 1>& from, DeviceCOOVector<T>& to);
219 void merge_sort_indices_and_values(const DeviceDoubletVector<T, 1>& from,
221 void make_unique_indices(const DeviceDoubletVector<T, 1>& from,
223 void make_unique_values(const DeviceDoubletVector<T, 1>& from,
225
226
227 // COO -> Dense Vector
228 void convert(const DeviceCOOVector<T>& from,
230 bool clear_dense_vector = true);
231 void set_unique_values_to_dense_vector(const DeviceDoubletVector<T, 1>& from,
233 bool clear_dense_vector);
234
235 // Triplet -> Dense Vector
236 void convert(const DeviceDoubletVector<T, 1>& from,
238 bool clear_dense_vector = true);
239 };
240} // namespace details
241} // namespace muda
242
243#include "details/matrix_format_converter_impl_block.inl"
244#include "details/matrix_format_converter_impl.inl"
Definition device_bcoo_matrix.h:18
Definition device_bcoo_vector.h:28
Definition device_bcoo_vector.h:10
Definition device_bsr_matrix.h:16
A std::vector like wrapper of cuda device memory, allows user to:
Definition device_buffer.h:46
Definition device_csr_matrix.h:16
Definition device_dense_matrix.h:16
Definition device_dense_vector.h:16
Definition device_doublet_vector.h:16
Definition device_triplet_matrix.h:14
Definition device_var.h:11
Definition linear_system_handles.h:16
Definition matrix_format_converter_impl.h:19
Definition matrix_format_converter_impl.h:53