QMCPACK
syclSolverInverter< T_FP > Class Template Reference

implements matrix inversion via cuSolverDN More...

+ Collaboration diagram for syclSolverInverter< T_FP >:

Public Member Functions

template<typename TMAT , typename TREAL , typename = std::enable_if_t<std::is_same<TMAT, T_FP>::value>>
std::enable_if_t< std::is_same< TMAT, T_FP >::value > invert_transpose (const Matrix< TMAT > &logdetT, Matrix< TMAT > &Ainv, Matrix< TMAT, SYCLAllocator< TMAT >> &Ainv_gpu, std::complex< TREAL > &log_value, sycl::queue &m_queue)
 compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT are the same More...
 
template<typename TMAT , typename TREAL , typename = std::enable_if_t<!std::is_same<TMAT, T_FP>::value>>
std::enable_if_t<!std::is_same< TMAT, T_FP >::value > invert_transpose (const Matrix< TMAT > &logdetT, Matrix< TMAT > &Ainv, Matrix< TMAT, SYCLAllocator< TMAT >> &Ainv_gpu, std::complex< TREAL > &log_value, sycl::queue &m_queue)
 compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT are not the same More...
 

Private Member Functions

void resize (int norb, sycl::queue &m_queue)
 resize the internal storage More...
 

Private Attributes

Matrix< T_FP, SYCLAllocator< T_FP > > Mat1_gpu
 scratch memory for cusolverDN More...
 
Vector< std::int64_t, SYCLAllocator< std::int64_t > > ipiv
 pivot array + info More...
 
Vector< T_FP, SYCLAllocator< T_FP > > workspace
 workspace More...
 
std::int64_t getrf_ws = 0
 
std::int64_t getri_ws = 0
 

Detailed Description

template<typename T_FP>
class qmcplusplus::syclSolverInverter< T_FP >

implements matrix inversion via cuSolverDN

Template Parameters
T_FPhigh precision for matrix inversion, T_FP >= T

Definition at line 29 of file syclSolverInverter.hpp.

Member Function Documentation

◆ invert_transpose() [1/2]

std::enable_if_t<std::is_same<TMAT, T_FP>::value> invert_transpose ( const Matrix< TMAT > &  logdetT,
Matrix< TMAT > &  Ainv,
Matrix< TMAT, SYCLAllocator< TMAT >> &  Ainv_gpu,
std::complex< TREAL > &  log_value,
sycl::queue m_queue 
)
inline

compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT are the same

Template Parameters
TREALreal type

Definition at line 62 of file syclSolverInverter.hpp.

References Matrix< T, Alloc >::data(), qmcplusplus::cusolver::getrf(), syclSolverInverter< T_FP >::getrf_ws, qmcplusplus::rocsolver::getri(), syclSolverInverter< T_FP >::getri_ws, syclSolverInverter< T_FP >::ipiv, syclSolverInverter< T_FP >::Mat1_gpu, syclSolverInverter< T_FP >::resize(), Matrix< T, Alloc >::rows(), Matrix< T, Alloc >::size(), qmcplusplus::syclBLAS::transpose(), and syclSolverInverter< T_FP >::workspace.

Referenced by qmcplusplus::test_inverse().

67  {
68  const int norb = logdetT.rows();
69  resize(norb, m_queue);
70 
71  m_queue.memcpy(Mat1_gpu.data(), logdetT.data(), logdetT.size() * sizeof(TMAT));
72  syclBLAS::transpose(m_queue, Mat1_gpu.data(), norb, Mat1_gpu.cols(), Ainv_gpu.data(), norb,
73  Ainv_gpu.cols());
74  try
75  {
76  syclSolver::getrf(m_queue, norb, norb, Ainv_gpu.data(), norb, ipiv.data(), workspace.data(), getrf_ws)
77  .wait();
78  }
79  catch (sycl::exception const& ex)
80  {
81  std::ostringstream err;
82  err << "\t\tCaught synchronous SYCL exception during getrf:\n"
83  << ex.what() << " status: " << ex.code() << std::endl;
84  std::cerr << err.str();
85  throw std::runtime_error(err.str());
86  }
87 
88  log_value = computeLogDet_sycl<TREAL>(m_queue, norb, Ainv_gpu.cols(), Ainv_gpu.data(), ipiv.data());
89 
90  syclSolver::getri(m_queue, norb, Ainv_gpu.data(), norb, ipiv.data(), workspace.data(), getri_ws);
91 
92  m_queue.memcpy(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT)).wait();
93  }
Matrix< T_FP, SYCLAllocator< T_FP > > Mat1_gpu
scratch memory for cusolverDN
sycl::event transpose(sycl::queue &q, const T1 *restrict in, int m, int lda, T2 *restrict out, int n, int ldb, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:475
rocblas_status getri(rocblas_handle &handle, int n, double *A, int lda, int *ipiv, int *info)
Definition: rocsolver.hpp:143
Vector< T_FP, SYCLAllocator< T_FP > > workspace
workspace
void resize(int norb, sycl::queue &m_queue)
resize the internal storage
Vector< std::int64_t, SYCLAllocator< std::int64_t > > ipiv
pivot array + info
cusolverStatus_t getrf(cusolverDnHandle_t &handle, int m, int n, double *A, int lda, double *work, int *ipiv, int *info)
Definition: cusolver.hpp:92

◆ invert_transpose() [2/2]

std::enable_if_t<!std::is_same<TMAT, T_FP>::value> invert_transpose ( const Matrix< TMAT > &  logdetT,
Matrix< TMAT > &  Ainv,
Matrix< TMAT, SYCLAllocator< TMAT >> &  Ainv_gpu,
std::complex< TREAL > &  log_value,
sycl::queue m_queue 
)
inline

compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT are not the same

Template Parameters
TREALreal type

Definition at line 100 of file syclSolverInverter.hpp.

References qmcplusplus::syclBLAS::copy_n(), Matrix< T, Alloc >::data(), qmcplusplus::cusolver::getrf(), syclSolverInverter< T_FP >::getrf_ws, qmcplusplus::rocsolver::getri(), syclSolverInverter< T_FP >::getri_ws, syclSolverInverter< T_FP >::ipiv, qmcplusplus::isnan(), syclSolverInverter< T_FP >::Mat1_gpu, norm(), syclSolverInverter< T_FP >::resize(), Matrix< T, Alloc >::rows(), Matrix< T, Alloc >::size(), qmcplusplus::syclBLAS::transpose(), and syclSolverInverter< T_FP >::workspace.

105  {
106  const int norb = logdetT.rows();
107  resize(norb, m_queue);
108  //use Ainv_gpu for transpose
109  m_queue.memcpy(Ainv_gpu.data(), logdetT.data(), logdetT.size() * sizeof(TMAT));
110  //transpose
111  syclBLAS::transpose(m_queue, Ainv_gpu.data(), norb, Ainv_gpu.cols(), Mat1_gpu.data(), norb,
112  Mat1_gpu.cols());
113 
114  //getrf (LU) -> getri (inverse)
115  try
116  {
117  syclSolver::getrf(m_queue, norb, norb, Mat1_gpu.data(), norb, ipiv.data(), workspace.data(), getrf_ws)
118  .wait();
119  }
120  catch (sycl::exception const& ex)
121  {
122  std::ostringstream err;
123  err << "\t\tCaught synchronous SYCL exception during getrf:\n"
124  << ex.what() << " status: " << ex.code() << std::endl;
125  std::cerr << err.str();
126  throw std::runtime_error(err.str());
127  }
128 
129  log_value = computeLogDet_sycl<TREAL>(m_queue, norb, Mat1_gpu.cols(), Mat1_gpu.data(), ipiv.data());
130 
131  syclSolver::getri(m_queue, norb, Mat1_gpu.data(), norb, ipiv.data(), workspace.data(), getri_ws);
132 
133  syclBLAS::copy_n(m_queue, Mat1_gpu.data(), Mat1_gpu.size(), Ainv_gpu.data());
134 
135  m_queue.memcpy(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT)).wait();
136 
137  for(int i = 0; i < norb; i++)
138  if (qmcplusplus::isnan(std::norm(Ainv[i][i])))
139  throw std::runtime_error("Ainv[i][i] is NaN. i = " + std::to_string(i));
140  }
Matrix< T_FP, SYCLAllocator< T_FP > > Mat1_gpu
scratch memory for cusolverDN
sycl::event transpose(sycl::queue &q, const T1 *restrict in, int m, int lda, T2 *restrict out, int n, int ldb, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:475
double norm(const zVec &c)
Definition: VectorOps.h:118
rocblas_status getri(rocblas_handle &handle, int n, double *A, int lda, int *ipiv, int *info)
Definition: rocsolver.hpp:143
Vector< T_FP, SYCLAllocator< T_FP > > workspace
workspace
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
void resize(int norb, sycl::queue &m_queue)
resize the internal storage
Vector< std::int64_t, SYCLAllocator< std::int64_t > > ipiv
pivot array + info
cusolverStatus_t getrf(cusolverDnHandle_t &handle, int m, int n, double *A, int lda, double *work, int *ipiv, int *info)
Definition: cusolver.hpp:92
bool isnan(float a)
return true if the value is NaN.
Definition: math.cpp:18

◆ resize()

void resize ( int  norb,
sycl::queue m_queue 
)
inlineprivate

resize the internal storage

Parameters
norbnumber of electrons/orbitals
delay,maximumdelay 0<delay<=norb

Definition at line 44 of file syclSolverInverter.hpp.

References syclSolverInverter< T_FP >::getrf_ws, syclSolverInverter< T_FP >::getri_ws, syclSolverInverter< T_FP >::ipiv, syclSolverInverter< T_FP >::Mat1_gpu, and syclSolverInverter< T_FP >::workspace.

Referenced by syclSolverInverter< T_FP >::invert_transpose().

45  {
46  if (Mat1_gpu.rows() != norb)
47  {
48  Mat1_gpu.resize(norb, norb);
49  ipiv.resize(norb);
50  getrf_ws = syclSolver::getrf_scratchpad_size<T_FP>(m_queue, norb, norb, norb);
51  getri_ws = syclSolver::getri_scratchpad_size<T_FP>(m_queue, norb, norb);
52  workspace.resize(std::max(getrf_ws, getri_ws));
53  }
54  }
Matrix< T_FP, SYCLAllocator< T_FP > > Mat1_gpu
scratch memory for cusolverDN
Vector< T_FP, SYCLAllocator< T_FP > > workspace
workspace
Vector< std::int64_t, SYCLAllocator< std::int64_t > > ipiv
pivot array + info

Member Data Documentation

◆ getrf_ws

std::int64_t getrf_ws = 0
private

◆ getri_ws

std::int64_t getri_ws = 0
private

◆ ipiv

Vector<std::int64_t, SYCLAllocator<std::int64_t> > ipiv
private

pivot array + info

Definition at line 34 of file syclSolverInverter.hpp.

Referenced by syclSolverInverter< T_FP >::invert_transpose(), and syclSolverInverter< T_FP >::resize().

◆ Mat1_gpu

Matrix<T_FP, SYCLAllocator<T_FP> > Mat1_gpu
private

scratch memory for cusolverDN

Definition at line 32 of file syclSolverInverter.hpp.

Referenced by syclSolverInverter< T_FP >::invert_transpose(), and syclSolverInverter< T_FP >::resize().

◆ workspace

Vector<T_FP, SYCLAllocator<T_FP> > workspace
private

The documentation for this class was generated from the following file: