QMCPACK
syclSolverInverter.hpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 #ifndef QMCPLUSPLUS_SYCL_MKL_SOLVERINVERTOR_H
13 #define QMCPLUSPLUS_SYCL_MKL_SOLVERINVERTOR_H
14 
15 #include "OhmmsPETE/OhmmsVector.h"
16 #include "OhmmsPETE/OhmmsMatrix.h"
17 #include "SYCL/SYCLallocator.hpp"
18 #include "SYCL/syclBLAS.hpp"
19 #include "SYCL/syclSolver.hpp"
21 #include "CPU/math.hpp"
22 
23 namespace qmcplusplus
24 {
25 /** implements matrix inversion via cuSolverDN
26  * @tparam T_FP high precision for matrix inversion, T_FP >= T
27  */
28 template<typename T_FP>
30 {
31  /// scratch memory for cusolverDN
33  /// pivot array + info
35  /// workspace
37  std::int64_t getrf_ws = 0;
38  std::int64_t getri_ws = 0;
39 
40  /** resize the internal storage
41  * @param norb number of electrons/orbitals
42  * @param delay, maximum delay 0<delay<=norb
43  */
44  inline void resize(int norb, sycl::queue& m_queue)
45  {
46  if (Mat1_gpu.rows() != norb)
47  {
48  Mat1_gpu.resize(norb, norb);
49  ipiv.resize(norb);
50  getrf_ws = syclSolver::getrf_scratchpad_size<T_FP>(m_queue, norb, norb, norb);
51  getri_ws = syclSolver::getri_scratchpad_size<T_FP>(m_queue, norb, norb);
52  workspace.resize(std::max(getrf_ws, getri_ws));
53  }
54  }
55 
56 public:
57  /** compute the inverse of the transpose of matrix A and its determinant value in log
58  * when T_FP and TMAT are the same
59  * @tparam TREAL real type
60  */
61  template<typename TMAT, typename TREAL, typename = std::enable_if_t<std::is_same<TMAT, T_FP>::value>>
62  std::enable_if_t<std::is_same<TMAT, T_FP>::value> invert_transpose(const Matrix<TMAT>& logdetT,
63  Matrix<TMAT>& Ainv,
64  Matrix<TMAT, SYCLAllocator<TMAT>>& Ainv_gpu,
65  std::complex<TREAL>& log_value,
66  sycl::queue& m_queue)
67  {
68  const int norb = logdetT.rows();
69  resize(norb, m_queue);
70 
71  m_queue.memcpy(Mat1_gpu.data(), logdetT.data(), logdetT.size() * sizeof(TMAT));
72  syclBLAS::transpose(m_queue, Mat1_gpu.data(), norb, Mat1_gpu.cols(), Ainv_gpu.data(), norb,
73  Ainv_gpu.cols());
74  try
75  {
76  syclSolver::getrf(m_queue, norb, norb, Ainv_gpu.data(), norb, ipiv.data(), workspace.data(), getrf_ws)
77  .wait();
78  }
79  catch (sycl::exception const& ex)
80  {
81  std::ostringstream err;
82  err << "\t\tCaught synchronous SYCL exception during getrf:\n"
83  << ex.what() << " status: " << ex.code() << std::endl;
84  std::cerr << err.str();
85  throw std::runtime_error(err.str());
86  }
87 
88  log_value = computeLogDet_sycl<TREAL>(m_queue, norb, Ainv_gpu.cols(), Ainv_gpu.data(), ipiv.data());
89 
90  syclSolver::getri(m_queue, norb, Ainv_gpu.data(), norb, ipiv.data(), workspace.data(), getri_ws);
91 
92  m_queue.memcpy(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT)).wait();
93  }
94 
95  /** compute the inverse of the transpose of matrix A and its determinant value in log
96  * when T_FP and TMAT are not the same
97  * @tparam TREAL real type
98  */
99  template<typename TMAT, typename TREAL, typename = std::enable_if_t<!std::is_same<TMAT, T_FP>::value>>
100  std::enable_if_t<!std::is_same<TMAT, T_FP>::value> invert_transpose(const Matrix<TMAT>& logdetT,
101  Matrix<TMAT>& Ainv,
102  Matrix<TMAT, SYCLAllocator<TMAT>>& Ainv_gpu,
103  std::complex<TREAL>& log_value,
104  sycl::queue& m_queue)
105  {
106  const int norb = logdetT.rows();
107  resize(norb, m_queue);
108  //use Ainv_gpu for transpose
109  m_queue.memcpy(Ainv_gpu.data(), logdetT.data(), logdetT.size() * sizeof(TMAT));
110  //transpose
111  syclBLAS::transpose(m_queue, Ainv_gpu.data(), norb, Ainv_gpu.cols(), Mat1_gpu.data(), norb,
112  Mat1_gpu.cols());
113 
114  //getrf (LU) -> getri (inverse)
115  try
116  {
117  syclSolver::getrf(m_queue, norb, norb, Mat1_gpu.data(), norb, ipiv.data(), workspace.data(), getrf_ws)
118  .wait();
119  }
120  catch (sycl::exception const& ex)
121  {
122  std::ostringstream err;
123  err << "\t\tCaught synchronous SYCL exception during getrf:\n"
124  << ex.what() << " status: " << ex.code() << std::endl;
125  std::cerr << err.str();
126  throw std::runtime_error(err.str());
127  }
128 
129  log_value = computeLogDet_sycl<TREAL>(m_queue, norb, Mat1_gpu.cols(), Mat1_gpu.data(), ipiv.data());
130 
131  syclSolver::getri(m_queue, norb, Mat1_gpu.data(), norb, ipiv.data(), workspace.data(), getri_ws);
132 
133  syclBLAS::copy_n(m_queue, Mat1_gpu.data(), Mat1_gpu.size(), Ainv_gpu.data());
134 
135  m_queue.memcpy(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT)).wait();
136 
137  for(int i = 0; i < norb; i++)
138  if (qmcplusplus::isnan(std::norm(Ainv[i][i])))
139  throw std::runtime_error("Ainv[i][i] is NaN. i = " + std::to_string(i));
140  }
141 };
142 } // namespace qmcplusplus
143 
144 #endif // QMCPLUSPLUS_CUSOLVERINVERTOR_H
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
allocator for SYCL device memory T data type ALIGN alignment in bytes
Matrix< T_FP, SYCLAllocator< T_FP > > Mat1_gpu
scratch memory for cusolverDN
std::enable_if_t<!std::is_same< TMAT, T_FP >::value > invert_transpose(const Matrix< TMAT > &logdetT, Matrix< TMAT > &Ainv, Matrix< TMAT, SYCLAllocator< TMAT >> &Ainv_gpu, std::complex< TREAL > &log_value, sycl::queue &m_queue)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
implements matrix inversion via cuSolverDN
sycl::event transpose(sycl::queue &q, const T1 *restrict in, int m, int lda, T2 *restrict out, int n, int ldb, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:475
size_type size() const
Definition: OhmmsMatrix.h:76
double norm(const zVec &c)
Definition: VectorOps.h:118
rocblas_status getri(rocblas_handle &handle, int n, double *A, int lda, int *ipiv, int *info)
Definition: rocsolver.hpp:143
Vector< T_FP, SYCLAllocator< T_FP > > workspace
workspace
size_type rows() const
Definition: OhmmsMatrix.h:77
Declaraton of Vector<T,Alloc> Manage memory through Alloc directly and allow referencing an existing ...
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
this file provides three C++ memory allocators using SYCL specific memory allocation functions...
std::enable_if_t< std::is_same< TMAT, T_FP >::value > invert_transpose(const Matrix< TMAT > &logdetT, Matrix< TMAT > &Ainv, Matrix< TMAT, SYCLAllocator< TMAT >> &Ainv_gpu, std::complex< TREAL > &log_value, sycl::queue &m_queue)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
void resize(int norb, sycl::queue &m_queue)
resize the internal storage
Vector< std::int64_t, SYCLAllocator< std::int64_t > > ipiv
pivot array + info
cusolverStatus_t getrf(cusolverDnHandle_t &handle, int m, int n, double *A, int lda, double *work, int *ipiv, int *info)
Definition: cusolver.hpp:92
bool isnan(float a)
return true if the value is NaN.
Definition: math.cpp:18