File matrix_format_converter_impl.h
File List > ext > linear_system > matrix_format_converter_impl.h
Go to the documentation of this file
#pragma once
#include <muda/ext/linear_system/linear_system_handles.h>
#include <muda/ext/linear_system/device_dense_matrix.h>
#include <muda/ext/linear_system/device_dense_vector.h>
#include <muda/ext/linear_system/device_triplet_matrix.h>
#include <muda/ext/linear_system/device_doublet_vector.h>
#include <muda/ext/linear_system/device_bcoo_matrix.h>
#include <muda/ext/linear_system/device_bcoo_vector.h>
#include <muda/ext/linear_system/device_bsr_matrix.h>
#include <muda/ext/linear_system/device_csr_matrix.h>
#include <muda/type_traits/cuda_arch.h>
#include <muda/buffer/device_var.h>
namespace muda
{
namespace details
{
class MatrixFormatConverterBase
{
protected:
LinearSystemHandles& m_handles;
cudaDataType_t m_data_type;
int m_N;
public:
MatrixFormatConverterBase(LinearSystemHandles& context, cudaDataType_t data_type, int N)
: m_handles(context)
, m_data_type(data_type)
, m_N(N)
{
}
virtual ~MatrixFormatConverterBase() = default;
auto dim() const { return m_N; }
auto data_type() const { return m_data_type; }
auto cublas() const { return m_handles.cublas(); }
auto cusparse() const { return m_handles.cusparse(); }
auto cusolver_sp() const { return m_handles.cusolver_sp(); }
auto cusolver_dn() const { return m_handles.cusolver_dn(); }
template <typename T>
void loose_resize(DeviceBuffer<T>& buf, size_t new_size)
{
if(buf.capacity() < new_size)
buf.reserve(new_size * m_handles.reserve_ratio());
buf.resize(new_size);
}
};
template <typename T, int N>
class MatrixFormatConverter : public MatrixFormatConverterBase
{
using BlockMatrix = typename DeviceTripletMatrix<T, N>::BlockMatrix;
using SegmentVector = typename DeviceDoubletVector<T, N>::SegmentVector;
DeviceBuffer<int> sort_index;
DeviceBuffer<int> sort_index_input;
DeviceBuffer<int> sort_index_tmp;
DeviceBuffer<int> col_tmp;
DeviceBuffer<int> row_tmp;
DeviceBCOOMatrix<T, N> temp_bcoo_matrix;
DeviceBCOOVector<T, N> temp_bcoo_vector;
DeviceBuffer<int> unique_indices;
DeviceBuffer<int> unique_counts;
DeviceBuffer<int> offsets;
DeviceVar<int> count;
DeviceBuffer<int2> ij_pairs;
DeviceBuffer<uint64_t> ij_hash;
DeviceBuffer<uint64_t> ij_hash_input;
DeviceBuffer<int2> unique_ij_pairs;
muda::DeviceBuffer<BlockMatrix> blocks_sorted;
DeviceBuffer<BlockMatrix> unique_blocks;
DeviceBuffer<SegmentVector> unique_segments;
DeviceBuffer<SegmentVector> temp_segments;
DeviceBuffer<T> unique_values;
public:
MatrixFormatConverter(LinearSystemHandles& handles)
: MatrixFormatConverterBase(handles, cuda_data_type<T>(), N)
{
}
virtual ~MatrixFormatConverter() = default;
// Triplet -> BCOO
void convert(const DeviceTripletMatrix<T, N>& from, DeviceBCOOMatrix<T, N>& to);
void radix_sort_indices_and_blocks(const DeviceTripletMatrix<T, N>& from,
DeviceBCOOMatrix<T, N>& to);
void make_unique_indices_and_blocks(const DeviceTripletMatrix<T, N>& from,
DeviceBCOOMatrix<T, N>& to);
void merge_sort_indices_and_blocks(const DeviceTripletMatrix<T, N>& from,
DeviceBCOOMatrix<T, N>& to);
void make_unique_indices(const DeviceTripletMatrix<T, N>& from,
DeviceBCOOMatrix<T, N>& to);
void make_unique_blocks(const DeviceTripletMatrix<T, N>& from,
DeviceBCOOMatrix<T, N>& to);
// BCOO -> Dense Matrix
void convert(const DeviceBCOOMatrix<T, N>& from,
DeviceDenseMatrix<T>& to,
bool clear_dense_matrix = true);
// BCOO -> COO
void convert(const DeviceBCOOMatrix<T, N>& from, DeviceCOOMatrix<T>& to);
void expand_blocks(const DeviceBCOOMatrix<T, N>& from, DeviceCOOMatrix<T>& to);
void sort_indices_and_values(const DeviceBCOOMatrix<T, N>& from,
DeviceCOOMatrix<T>& to);
// BCOO -> BSR
void convert(const DeviceBCOOMatrix<T, N>& from, DeviceBSRMatrix<T, N>& to);
void convert(DeviceBCOOMatrix<T, N>&& from, DeviceBSRMatrix<T, N>& to);
void calculate_block_offsets(const DeviceBCOOMatrix<T, N>& from,
DeviceBSRMatrix<T, N>& to);
// Doublet -> BCOO
void convert(const DeviceDoubletVector<T, N>& from, DeviceBCOOVector<T, N>& to);
void merge_sort_indices_and_segments(const DeviceDoubletVector<T, N>& from,
DeviceBCOOVector<T, N>& to);
void make_unique_indices(const DeviceDoubletVector<T, N>& from,
DeviceBCOOVector<T, N>& to);
void make_unique_segments(const DeviceDoubletVector<T, N>& from,
DeviceBCOOVector<T, N>& to);
// BCOO -> Dense Vector
void convert(const DeviceBCOOVector<T, N>& from,
DeviceDenseVector<T>& to,
bool clear_dense_vector = true);
void set_unique_segments_to_dense_vector(const DeviceBCOOVector<T, N>& from,
DeviceDenseVector<T>& to,
bool clear_dense_vector);
// Triplet -> Dense Vector
void convert(const DeviceDoubletVector<T, N>& from,
DeviceDenseVector<T>& to,
bool clear_dense_vector = true);
// BSR -> CSR
void convert(const DeviceBSRMatrix<T, N>& from, DeviceCSRMatrix<T>& to);
};
template <typename T>
class MatrixFormatConverter<T, 1> : public MatrixFormatConverterBase
{
DeviceBuffer<int> sort_index;
DeviceBuffer<int> sort_index_input;
DeviceBuffer<int> sort_index_tmp;
DeviceBuffer<int> col_tmp;
DeviceBuffer<int> row_tmp;
DeviceBuffer<int> unique_indices;
DeviceCOOMatrix<T> temp_coo_matrix;
DeviceCOOVector<T> temp_coo_vector;
DeviceBuffer<int> unique_counts;
DeviceBuffer<int> offsets;
DeviceVar<int> count;
DeviceBuffer<int2> ij_pairs;
DeviceBuffer<uint64_t> ij_hash;
DeviceBuffer<uint64_t> ij_hash_input;
DeviceBuffer<int2> unique_ij_pairs;
muda::DeviceBuffer<T> values_sorted;
DeviceBuffer<T> unique_values;
DeviceBuffer<T> temp_values;
public:
MatrixFormatConverter(LinearSystemHandles& handles)
: MatrixFormatConverterBase(handles, cuda_data_type<T>(), 1)
{
}
virtual ~MatrixFormatConverter() = default;
// Triplet -> COO
void convert(const DeviceTripletMatrix<T, 1>& from, DeviceCOOMatrix<T>& to);
void radix_sort_indices_and_blocks(const DeviceTripletMatrix<T, 1>& from,
DeviceBCOOMatrix<T, 1>& to);
void make_unique_indices_and_blocks(const DeviceTripletMatrix<T, 1>& from,
DeviceBCOOMatrix<T, 1>& to);
void merge_sort_indices_and_values(const DeviceTripletMatrix<T, 1>& from,
DeviceCOOMatrix<T>& to);
void make_unique_indices(const DeviceTripletMatrix<T, 1>& from,
DeviceCOOMatrix<T>& to);
void make_unique_values(const DeviceTripletMatrix<T, 1>& from,
DeviceCOOMatrix<T>& to);
// COO -> Dense Matrix
void convert(const DeviceCOOMatrix<T>& from,
DeviceDenseMatrix<T>& to,
bool clear_dense_matrix = true);
// COO -> CSR
void convert(const DeviceCOOMatrix<T>& from, DeviceCSRMatrix<T>& to);
void convert(DeviceCOOMatrix<T>&& from, DeviceCSRMatrix<T>& to);
void calculate_block_offsets(const DeviceCOOMatrix<T>& from,
DeviceCSRMatrix<T>& to);
// Doublet -> COO
void convert(const DeviceDoubletVector<T, 1>& from, DeviceCOOVector<T>& to);
void merge_sort_indices_and_values(const DeviceDoubletVector<T, 1>& from,
DeviceCOOVector<T>& to);
void make_unique_indices(const DeviceDoubletVector<T, 1>& from,
DeviceCOOVector<T>& to);
void make_unique_values(const DeviceDoubletVector<T, 1>& from,
DeviceCOOVector<T>& to);
// COO -> Dense Vector
void convert(const DeviceCOOVector<T>& from,
DeviceDenseVector<T>& to,
bool clear_dense_vector = true);
void set_unique_values_to_dense_vector(const DeviceDoubletVector<T, 1>& from,
DeviceDenseVector<T>& to,
bool clear_dense_vector);
// Triplet -> Dense Vector
void convert(const DeviceDoubletVector<T, 1>& from,
DeviceDenseVector<T>& to,
bool clear_dense_vector = true);
};
} // namespace details
} // namespace muda
#include "details/matrix_format_converter_impl_block.inl"
#include "details/matrix_format_converter_impl.inl"