muda-doc/linear__system__context_8h_source.html

#pragma once

#include <cublas_v2.h>

#include <cusparse_v2.h>

#include <cusolverDn.h>

#include <cusolverSp.h>

#include <list>

#include <muda/buffer/device_buffer.h>

#include <muda/literal/unit.h>

#include <muda/mstl/span.h>

#include <muda/ext/linear_system/dense_vector_view.h>

#include <muda/ext/linear_system/dense_matrix_view.h>

#include <muda/ext/linear_system/matrix_format_converter.h>

#include <muda/ext/linear_system/linear_system_handles.h>

#include <muda/ext/linear_system/linear_system_solve_tolerance.h>

#include <muda/ext/linear_system/linear_system_solve_reorder.h>

namespace muda

{


class LinearSystemContextCreateInfo

{

  public:

    cudaStream_t stream = nullptr;

    // base size of temp buffer, if buffer is not enough

    // we create a new buffer with size = buffer_byte_size_base * 2 / 4 / 8 / 16 / ...

    // and we will not release the old buffer because of safety

    size_t buffer_byte_size_base = 256_M;

};


class LinearSystemContext

{

  private:

    LinearSystemHandles                m_handles;

    std::list<DeviceBuffer<std::byte>> m_buffers;

    std::list<std::vector<std::byte>>  m_host_buffers;

    DeviceBuffer<std::byte>            m_scalar_buffer;


    LinearSystemContextCreateInfo    m_create_info;

    std::list<std::function<void()>> m_sync_callbacks;

    std::string                      m_current_label;


    void                  set_pointer_mode_device();

    void                  set_pointer_mode_host();

    void                  shrink_temp_buffers();

    void                  add_sync_callback(std::function<void()>&& callback);

    BufferView<std::byte> temp_buffer(size_t size);

    span<std::byte>       temp_host_buffer(size_t size);

    template <typename T>

    BufferView<T> temp_buffer(size_t size);

    template <typename T>

    span<T> temp_host_buffer(size_t size);

    template <typename T>

    std::vector<T*> temp_buffers(size_t size_in_buffer, size_t num_buffer);

    template <typename T>

    std::vector<T*> temp_host_buffers(size_t size_in_buffer, size_t num_buffer);


    LinearSystemSolveTolerance m_tolerance;

    LinearSystemSolveReorder   m_reorder;

    MatrixFormatConverter      m_converter;


  private:

    auto cublas() const { return m_handles.cublas(); }

    auto cusparse() const { return m_handles.cusparse(); }

    auto cusolver_dn() const { return m_handles.cusolver_dn(); }

    auto cusolver_sp() const { return m_handles.cusolver_sp(); }


  public:

    LinearSystemContext(const LinearSystemContextCreateInfo& info = {});

    LinearSystemContext(const LinearSystemContext&)            = delete;

    LinearSystemContext& operator=(const LinearSystemContext&) = delete;

    LinearSystemContext(LinearSystemContext&&)                 = delete;

    LinearSystemContext& operator=(LinearSystemContext&&)      = delete;

    ~LinearSystemContext();


    void label(std::string_view label) { m_current_label = label; }

    auto label() const -> std::string_view { return m_current_label; }

    auto stream() const { return m_handles.stream(); }

    void stream(cudaStream_t stream);

    void sync();


    /***********************************************************************************************

                                                Settings

    ***********************************************************************************************/


    auto& tolerance() { return m_tolerance; }

    auto& reorder() { return m_reorder; }

    auto  reserve_ratio() const { return m_handles.m_reserve_ratio; }

    void  reserve_ratio(float ratio) { m_handles.m_reserve_ratio = ratio; }


  public:

    /***********************************************************************************************

                                              Converter

    ***********************************************************************************************/

    // Triplet -> BCOO

    template <typename T, int N>

    void convert(const DeviceTripletMatrix<T, N>& from, DeviceBCOOMatrix<T, N>& to);


    // BCOO -> Dense Matrix

    template <typename T, int N>

    void convert(const DeviceBCOOMatrix<T, N>& from,

                 DeviceDenseMatrix<T>&         to,

                 bool                          clear_dense_matrix = true);


    // BCOO -> COO

    template <typename T, int N>

    void convert(const DeviceBCOOMatrix<T, N>& from, DeviceCOOMatrix<T>& to);


    // BCOO -> BSR

    template <typename T, int N>

    void convert(const DeviceBCOOMatrix<T, N>& from, DeviceBSRMatrix<T, N>& to);


    // Doublet -> BCOO

    template <typename T, int N>

    void convert(const DeviceDoubletVector<T, N>& from, DeviceBCOOVector<T, N>& to);


    // BCOO -> Dense Vector

    template <typename T, int N>

    void convert(const DeviceBCOOVector<T, N>& from,

                 DeviceDenseVector<T>&         to,

                 bool                          clear_dense_vector = true);


    // Doublet -> Dense Vector

    template <typename T, int N>

    void convert(const DeviceDoubletVector<T, N>& from,

                 DeviceDenseVector<T>&            to,

                 bool                             clear_dense_vector = true);


    // BSR -> CSR

    template <typename T, int N>

    void convert(const DeviceBSRMatrix<T, N>& from, DeviceCSRMatrix<T>& to);


    // Triplet -> COO

    template <typename T>

    void convert(const DeviceTripletMatrix<T, 1>& from, DeviceCOOMatrix<T>& to);


    // COO -> Dense Matrix

    template <typename T>

    void convert(const DeviceCOOMatrix<T>& from,

                 DeviceDenseMatrix<T>&     to,

                 bool                      clear_dense_matrix = true);


    // COO -> CSR

    template <typename T>

    void convert(const DeviceCOOMatrix<T>& from, DeviceCSRMatrix<T>& to);

    template <typename T>

    void convert(DeviceCOOMatrix<T>&& from, DeviceCSRMatrix<T>& to);


    // Doublet -> COO

    template <typename T>

    void convert(const DeviceDoubletVector<T, 1>& from, DeviceCOOVector<T>& to);


    // COO -> Dense Vector

    template <typename T>

    void convert(const DeviceCOOVector<T>& from,

                 DeviceDenseVector<T>&     to,

                 bool                      clear_dense_vector = true);


    // Doublet -> Dense Vector

    template <typename T>

    void convert(const DeviceDoubletVector<T, 1>& from,

                 DeviceDenseVector<T>&            to,

                 bool                             clear_dense_vector = true);


  public:

    /***********************************************************************************************

                                                Norm

    ***********************************************************************************************/

    template <typename T>

    T norm(CDenseVectorView<T> x);

    template <typename T>

    void norm(CDenseVectorView<T> x, VarView<T> result);

    template <typename T>

    void norm(CDenseVectorView<T> x, T* result);


    /***********************************************************************************************

                                                Dot

    ***********************************************************************************************/

    template <typename T>

    T dot(CDenseVectorView<T> x, CDenseVectorView<T> y);

    template <typename T>

    void dot(CDenseVectorView<T> x, CDenseVectorView<T> y, VarView<T> result);

    template <typename T>

    void dot(CDenseVectorView<T> x, CDenseVectorView<T> y, T* result);


    /***********************************************************************************************

                                              Max/Min

    ***********************************************************************************************/

    //TODO:


    /***********************************************************************************************

                                               Axpby

                                      y = alpha * x + beta * y

    ***********************************************************************************************/

    // y = alpha * x + beta * y

    template <typename T>

    void axpby(const T& alpha, CDenseVectorView<T> x, const T& beta, DenseVectorView<T> y);

    // y = alpha * x + beta * y

    template <typename T>

    void axpby(CVarView<T> alpha, CDenseVectorView<T> x, CVarView<T> beta, DenseVectorView<T> y);

    // z = x + y

    template <typename T>

    void plus(CDenseVectorView<T> x, CDenseVectorView<T> y, DenseVectorView<T> z);


    /***********************************************************************************************

                                                Spmv

                                        y = a * A * x + b * y

    ***********************************************************************************************/

    // BSR

    template <typename T, int N>

    void spmv(const T&             a,

              CBSRMatrixView<T, N> A,

              CDenseVectorView<T>  x,

              const T&             b,

              DenseVectorView<T>&  y);

    template <typename T, int N>

    void spmv(CBSRMatrixView<T, N> A, CDenseVectorView<T> x, DenseVectorView<T> y);

    // CSR

    template <typename T>

    void spmv(const T& a, CCSRMatrixView<T> A, CDenseVectorView<T> x, const T& b, DenseVectorView<T>& y);

    template <typename T>

    void spmv(CCSRMatrixView<T> A, CDenseVectorView<T> x, DenseVectorView<T> y);

    // BCOO & Triplet

    template <typename T, int N>

    void spmv(const T&                 a,

              CTripletMatrixView<T, N> A,

              CDenseVectorView<T>      x,

              const T&                 b,

              DenseVectorView<T>&      y);

    template <typename T, int N>

    void spmv(CTripletMatrixView<T, N> A, CDenseVectorView<T> x, DenseVectorView<T> y);

    // COO

    template <typename T>

    void spmv(const T& a, CCOOMatrixView<T> A, CDenseVectorView<T> x, const T& b, DenseVectorView<T>& y);

    template <typename T>

    void spmv(CCOOMatrixView<T> A, CDenseVectorView<T> x, DenseVectorView<T> y);


    /***********************************************************************************************

                                                 Mv

                                        y = a * A * x + b * y

    ***********************************************************************************************/

    template <typename T>

    void mv(CDenseMatrixView<T> A,

            const T&            alpha,

            CDenseVectorView<T> x,

            const T&            beta,

            DenseVectorView<T>  y);

    template <typename T>

    void mv(CDenseMatrixView<T> A,

            CVarView<T>         alpha,

            CDenseVectorView<T> x,

            CVarView<T>         beta,

            DenseVectorView<T>  y);

    template <typename T>

    void mv(CDenseMatrixView<T> A, CDenseVectorView<T> x, DenseVectorView<T> y);


    /***********************************************************************************************

                                                Solve

                                              A * x = b

    ***********************************************************************************************/

    // solve Ax = b, A will be modified for factorization

    // and b will be modified to store the solution

    template <typename T>

    void solve(DenseMatrixView<T> A_to_fact, DenseVectorView<T> b_to_x);

    // solve Ax = b

    // A is the CSR Matrix

    template <typename T>

    void solve(DenseVectorView<T> x, CCSRMatrixView<T> A, CDenseVectorView<T> b);


  private:

    template <typename T>

    void generic_spmv(const T&                  a,

                      cusparseOperation_t       op,

                      cusparseSpMatDescr_t      A,

                      const cusparseDnVecDescr* x,

                      const T&                  b,

                      cusparseDnVecDescr_t      y);

    template <typename T>

    void sysv(DenseMatrixView<T> A_to_fact, DenseVectorView<T> b_to_x);

    template <typename T>

    void gesv(DenseMatrixView<T> A_to_fact, DenseVectorView<T> b_to_x);

};


}  // namespace muda


#include "details/linear_system_context.inl"

#include "details/routines/convert.inl"

#include "details/routines/norm.inl"

#include "details/routines/dot.inl"

#include "details/routines/axpby.inl"

#include "details/routines/spmv.inl"

#include "details/routines/mv.inl"

#include "details/routines/solve.inl"

#include "details/routines/mm.inl"

muda::BSRMatrixViewBase
Definition bsr_matrix_view.h:8

muda::BufferViewT< false, T >

muda::CDenseMatrixView
Definition dense_matrix_view.h:93

muda::COOMatrixViewBase
Definition bcoo_matrix_view.h:15

muda::CSRMatrixViewBase
Definition csr_matrix_view.h:8

muda::DenseMatrixView
Definition dense_matrix_view.h:108

muda::DenseVectorViewBase
Definition dense_vector_view.h:10

muda::DeviceBCOOMatrix
Definition device_bcoo_matrix.h:18

muda::DeviceBCOOVector< T, 1 >
Definition device_bcoo_vector.h:28

muda::DeviceBCOOVector
Definition device_bcoo_vector.h:10

muda::DeviceBSRMatrix
Definition device_bsr_matrix.h:16

muda::DeviceBuffer
A std::vector like wrapper of cuda device memory, allows user to:
Definition device_buffer.h:46

muda::DeviceCSRMatrix
Definition device_csr_matrix.h:16

muda::DeviceDenseMatrix
Definition device_dense_matrix.h:16

muda::DeviceDenseVector
Definition device_dense_vector.h:16

muda::DeviceDoubletVector
Definition device_doublet_vector.h:16

muda::DeviceTripletMatrix
Definition device_triplet_matrix.h:14

muda::LinearSystemContextCreateInfo
Definition linear_system_context.h:19

muda::LinearSystemContext
Definition linear_system_context.h:28

muda::LinearSystemHandles
Definition linear_system_handles.h:16

muda::LinearSystemSolveReorder
Definition linear_system_solve_reorder.h:11

muda::LinearSystemSolveTolerance
Definition linear_system_solve_tolerance.h:7

muda::MatrixFormatConverter
Definition matrix_format_converter.h:48

muda::TripletMatrixViewBase
Definition triplet_matrix_view.h:10

muda::VarViewT
Definition var_view.h:11

device_buffer.h
A light-weight wrapper of cuda device memory. Like std::vector, allow user to resize,...