QMCPACK
DiracMatrix.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 #ifndef QMCPLUSPLUS_DIRAC_MATRIX_H
13 #define QMCPLUSPLUS_DIRAC_MATRIX_H
14 
15 #include "CPU/Blasf.h"
16 #include "CPU/BlasThreadingEnv.h"
17 #include "OhmmsPETE/OhmmsMatrix.h"
19 #include "Concurrency/OpenMP.h"
20 #include "CPU/SIMD/algorithm.hpp"
21 
22 namespace qmcplusplus
23 {
24 /** wrappers around xgetrf lapack routines
25  * \param[in] n rows
26  * \param[in] m cols
27  * \param[inout] a matrix contains LU matrix after call
28  * \param[in] lda leading dimension of a
29  * \param[out] piv pivot vector
30  */
31 inline int Xgetrf(int n, int m, float* restrict a, int lda, int* restrict piv)
32 {
33  int status;
34  sgetrf(n, m, a, lda, piv, status);
35  return status;
36 }
37 
38 inline int Xgetrf(int n, int m, std::complex<float>* restrict a, int lda, int* restrict piv)
39 {
40  int status;
41  cgetrf(n, m, a, lda, piv, status);
42  return status;
43 }
44 
45 inline int Xgetrf(int n, int m, double* restrict a, int lda, int* restrict piv)
46 {
47  int status;
48  dgetrf(n, m, a, lda, piv, status);
49  return status;
50 }
51 
52 inline int Xgetrf(int n, int m, std::complex<double>* restrict a, int lda, int* restrict piv)
53 {
54  int status;
55  zgetrf(n, m, a, lda, piv, status);
56  return status;
57 }
58 
59 /** inversion of a float matrix after lu factorization*/
60 inline int Xgetri(int n, float* restrict a, int lda, int* restrict piv, float* restrict work, int& lwork)
61 {
62  int status;
63  sgetri(n, a, lda, piv, work, lwork, status);
64  return status;
65 }
66 
67 inline int Xgetri(int n,
68  std::complex<float>* restrict a,
69  int lda,
70  int* restrict piv,
71  std::complex<float>* restrict work,
72  int& lwork)
73 {
74  int status;
75  cgetri(n, a, lda, piv, work, lwork, status);
76  return status;
77 }
78 
79 inline int Xgetri(int n, double* restrict a, int lda, int* restrict piv, double* restrict work, int& lwork)
80 {
81  int status;
82  dgetri(n, a, lda, piv, work, lwork, status);
83  return status;
84 }
85 
86 /** inversion of a std::complex<double> matrix after lu factorization*/
87 inline int Xgetri(int n,
88  std::complex<double>* restrict a,
89  int lda,
90  int* restrict piv,
91  std::complex<double>* restrict work,
92  int& lwork)
93 {
94  int status;
95  zgetri(n, a, lda, piv, work, lwork, status);
96  return status;
97 }
98 
99 template<typename T, typename T_FP>
100 inline void computeLogDet(const T* restrict diag, int n, const int* restrict pivot, std::complex<T_FP>& logdet)
101 {
102  logdet = std::complex<T_FP>();
103  for (size_t i = 0; i < n; i++)
104  logdet += std::log(std::complex<T_FP>((pivot[i] == i + 1) ? diag[i] : -diag[i]));
105 }
106 
107 /** helper class to compute matrix inversion and the log value of determinant
108  * @tparam T_FP the datatype used in the actual computation of matrix inversion
109  */
110 template<typename T_FP>
112 {
116  int Lwork;
117  /// scratch space used for mixed precision
119  /// LU diagonal elements
121 
122  /// reset internal work space
123  inline void reset(T_FP* invMat_ptr, const int lda)
124  {
125  m_pivot.resize(lda);
126  Lwork = -1;
127  T_FP tmp;
128  Real_FP lw;
129  int status = Xgetri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork);
130  if (status != 0)
131  {
132  std::ostringstream msg;
133  msg << "Xgetri failed with error " << status << std::endl;
134  throw std::runtime_error(msg.str());
135  }
136 
137  lw = std::real(tmp);
138  Lwork = static_cast<int>(lw);
139  m_work.resize(Lwork);
140  LU_diag.resize(lda);
141  }
142 
143  /** compute the inverse of invMat (in place) and the log value of determinant
144  * @tparam TREAL real type
145  * @param n invMat is n x n matrix
146  * @param lda the first dimension of invMat
147  * @param LogDet log determinant value of invMat before inversion
148  */
149  template<typename TREAL>
150  inline void computeInvertAndLog(T_FP* invMat, const int n, const int lda, std::complex<TREAL>& LogDet)
151  {
153  if (Lwork < lda)
154  reset(invMat, lda);
155  int status = Xgetrf(n, n, invMat, lda, m_pivot.data());
156  if (status != 0)
157  {
158  std::ostringstream msg;
159  msg << "Xgetrf failed with error " << status << std::endl;
160  throw std::runtime_error(msg.str());
161  }
162  for (int i = 0; i < n; i++)
163  LU_diag[i] = invMat[i * lda + i];
164  computeLogDet(LU_diag.data(), n, m_pivot.data(), LogDet);
165  status = Xgetri(n, invMat, lda, m_pivot.data(), m_work.data(), Lwork);
166  if (status != 0)
167  {
168  std::ostringstream msg;
169  msg << "Xgetri failed with error " << status << std::endl;
170  throw std::runtime_error(msg.str());
171  }
172  }
173 
174 public:
175  DiracMatrix() : Lwork(0) {}
176 
177  /** compute the inverse of the transpose of matrix A and its determinant value in log
178  * when T_FP and TMAT are the same
179  * @tparam TMAT matrix value type
180  * @tparam TREAL real type
181  */
182  template<typename TMAT,
183  typename ALLOC1,
184  typename ALLOC2,
185  typename TREAL,
186  typename = std::enable_if_t<qmc_allocator_traits<ALLOC1>::is_host_accessible>,
187  typename = std::enable_if_t<qmc_allocator_traits<ALLOC2>::is_host_accessible>>
188  inline std::enable_if_t<std::is_same<T_FP, TMAT>::value> invert_transpose(const Matrix<TMAT, ALLOC1>& amat,
189  Matrix<TMAT, ALLOC2>& invMat,
190  std::complex<TREAL>& LogDet)
191  {
192  const int n = invMat.rows();
193  const int lda = invMat.cols();
194  simd::transpose(amat.data(), n, amat.cols(), invMat.data(), n, lda);
195  computeInvertAndLog(invMat.data(), n, lda, LogDet);
196  }
197 
198  /** compute the inverse of the transpose of matrix A and its determinant value in log
199  * when T_FP and TMAT are not the same and need scratch space psiM_fp
200  * @tparam TMAT matrix value type
201  * @tparam TREAL real type
202  */
203  template<typename TMAT,
204  typename ALLOC1,
205  typename ALLOC2,
206  typename TREAL,
207  typename = std::enable_if_t<qmc_allocator_traits<ALLOC1>::is_host_accessible>,
208  typename = std::enable_if_t<qmc_allocator_traits<ALLOC2>::is_host_accessible>>
209  inline std::enable_if_t<!std::is_same<T_FP, TMAT>::value> invert_transpose(const Matrix<TMAT, ALLOC1>& amat,
210  Matrix<TMAT, ALLOC2>& invMat,
211  std::complex<TREAL>& LogDet)
212  {
213  const int n = invMat.rows();
214  const int lda = invMat.cols();
215  psiM_fp.resize(n, lda);
216  simd::transpose(amat.data(), n, amat.cols(), psiM_fp.data(), n, lda);
217  computeInvertAndLog(psiM_fp.data(), n, lda, LogDet);
218  invMat = psiM_fp;
219  }
220 };
221 } // namespace qmcplusplus
222 
223 #endif // QMCPLUSPLUS_DIRAC_MATRIX_H
void reset(T_FP *invMat_ptr, const int lda)
reset internal work space
Definition: DiracMatrix.h:123
std::vector< T, aligned_allocator< T > > aligned_vector
QMCTraits::RealType real
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
aligned_vector< T_FP > LU_diag
LU diagonal elements.
Definition: DiracMatrix.h:120
service class for explicitly managing the threading of BLAS/LAPACK calls from OpenMP parallel region ...
std::enable_if_t<!std::is_same< T_FP, TMAT >::value > invert_transpose(const Matrix< TMAT, ALLOC1 > &amat, Matrix< TMAT, ALLOC2 > &invMat, std::complex< TREAL > &LogDet)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
Definition: DiracMatrix.h:209
void cgetri(const int &n, std::complex< float > *a, const int &n0, int const *piv, std::complex< float > *work, const int &, int &st)
void transpose(const T *restrict A, size_t m, size_t lda, TO *restrict B, size_t n, size_t ldb)
transpose of A(m,n) to B(n,m)
Definition: algorithm.hpp:97
void resize(size_type n, size_type m)
Resize the container.
Definition: OhmmsMatrix.h:99
int Xgetrf(int n, int m, float *restrict a, int lda, int *restrict piv)
wrappers around xgetrf lapack routines
Definition: DiracMatrix.h:31
size_type cols() const
Definition: OhmmsMatrix.h:78
int Xgetri(int n, float *restrict a, int lda, int *restrict piv, float *restrict work, int &lwork)
inversion of a float matrix after lu factorization
Definition: DiracMatrix.h:60
void sgetri(const int &n, float *a, const int &n0, int const *piv, float *work, const int &, int &st)
helper class to compute matrix inversion and the log value of determinant
Definition: DiracMatrix.h:111
void dgetrf(const int &n, const int &m, double *a, const int &n0, int *piv, int &st)
aligned_vector< int > m_pivot
Definition: DiracMatrix.h:115
void cgetrf(const int &n, const int &m, std::complex< float > *a, const int &n0, int *piv, int &st)
void computeLogDet(const T *restrict diag, int n, const int *restrict pivot, std::complex< T_FP > &logdet)
Definition: DiracMatrix.h:100
void zgetri(const int &n, std::complex< double > *a, const int &n0, int const *piv, std::complex< double > *work, const int &, int &st)
void sgetrf(const int &n, const int &m, float *a, const int &n0, int *piv, int &st)
size_type rows() const
Definition: OhmmsMatrix.h:77
MakeReturn< UnaryNode< FnLog, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t log(const Vector< T1, C1 > &l)
typename RealAlias_impl< T >::value_type RealAlias
If you have a function templated on a value that can be real or complex and you need to get the base ...
aligned_vector< T_FP > m_work
Definition: DiracMatrix.h:114
void computeInvertAndLog(T_FP *invMat, const int n, const int lda, std::complex< TREAL > &LogDet)
compute the inverse of invMat (in place) and the log value of determinant
Definition: DiracMatrix.h:150
SIMD version of functions in algorithm.
std::enable_if_t< std::is_same< T_FP, TMAT >::value > invert_transpose(const Matrix< TMAT, ALLOC1 > &amat, Matrix< TMAT, ALLOC2 > &invMat, std::complex< TREAL > &LogDet)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
Definition: DiracMatrix.h:188
int getNextLevelNumThreads()
get the number of threads at the next parallel level
Definition: OpenMP.h:35
Matrix< T_FP > psiM_fp
scratch space used for mixed precision
Definition: DiracMatrix.h:118
void zgetrf(const int &n, const int &m, std::complex< double > *a, const int &n0, int *piv, int &st)
void dgetri(const int &n, double *a, const int &n0, int const *piv, double *work, const int &, int &st)