12 #ifndef QMCPLUSPLUS_DIRAC_MATRIX_COMPUTE_OMPTARGET_H 13 #define QMCPLUSPLUS_DIRAC_MATRIX_COMPUTE_OMPTARGET_H 44 template<
typename VALUE_FP>
82 for (
int iw = 0; iw < nw; ++iw)
87 auto psi_M_ptr = psi_Ms.
data() + iw *
n *
n;
90 lwork_ =
static_cast<int>(lw);
109 lwork_ =
static_cast<int>(lw);
120 template<
typename TMAT>
127 for (
int i = 0; i <
n; i++)
129 log_value = {0.0, 0.0};
134 template<
typename TMAT>
146 for (
int iw = 0; iw < nw; ++iw)
148 VALUE_FP* LU_M = psi_Ms.
data() + iw *
n *
n;
150 for (
int i = 0; i <
n; i++)
165 std::unique_ptr<Resource>
makeClone()
const override {
return std::make_unique<DiracMatrixComputeOMPTarget>(*this); }
177 template<
typename TMAT>
183 const int n = a_mat.
rows();
185 const int ldb = inv_a_mat.
cols();
197 template<
typename TMAT>
203 const int n = a_mat.
rows();
205 const int ldb = inv_a_mat.
cols();
223 template<
typename TMAT, PlatformKind PL>
229 for (
int iw = 0; iw < a_mats.size(); iw++)
231 auto& Ainv = inv_a_mats[iw].get();
257 #endif // QMCPLUSPLUS_DIRAC_MATRIX_COMPUTE_OMPTARGET_H std::unique_ptr< Resource > makeClone() const override
OffloadPinnedVector< int > infos_
std::vector< T, aligned_allocator< T > > aligned_vector
helper functions for EinsplineSetBuilder
class to compute matrix inversion and the log value of determinant of a batch of DiracMatrixes.
std::vector< StdComp, CUDAHostAllocator< StdComp > > log_values(batch_size)
void computeInvertAndLog(OffloadPinnedMatrix< TMAT > &a_mat, const int n, const int lda, LogValue &log_value)
compute the inverse of invMat (in place) and the log value of determinant
RealAlias< VALUE_FP > FullPrecReal
void convert(const PL &lat, const PV &pin, PV &pout)
void mw_invertTranspose(compute::Queue< PL > &resource_ignored, const RefVector< const OffloadPinnedMatrix< TMAT >> &a_mats, const RefVector< OffloadPinnedMatrix< TMAT >> &inv_a_mats, OffloadPinnedVector< LogValue > &log_values)
This covers both mixed and Full precision case.
service class for explicitly managing the threading of BLAS/LAPACK calls from OpenMP parallel region ...
void transpose(const T *restrict A, size_t m, size_t lda, TO *restrict B, size_t n, size_t ldb)
transpose of A(m,n) to B(n,m)
OffloadPinnedVector< int > pivots_
int Xgetrf(int n, int m, float *restrict a, int lda, int *restrict piv)
wrappers around xgetrf lapack routines
OffloadPinnedVector< VALUE_FP > psiM_fp_
Matrices held in memory matrices n^2 * nw elements.
DiracMatrix< VALUE_FP > detEng_
matrix inversion engine
int Xgetri(int n, float *restrict a, int lda, int *restrict piv, float *restrict work, int &lwork)
inversion of a float matrix after lu factorization
void remapCopy(size_t m, size_t n, const T *restrict A, size_t lda, TO *restrict B, size_t ldb)
copy of A(m,n) to B(m,n)
OffloadPinnedVector< VALUE_FP > LU_diags_fp_
void computeLogDet(const T *restrict diag, int n, const int *restrict pivot, std::complex< T_FP > &logdet)
void reset(OffloadPinnedVector< VALUE_FP > &psi_Ms, const int n, const int lda, const int batch_size)
reset internal work space.
OMPallocator is an allocator with fused device and dualspace allocator functionality.
DiracMatrixComputeOMPTarget()
std::enable_if_t< std::is_same< VALUE_FP, TMAT >::value > invert_transpose(HandleResource &resource, const OffloadPinnedMatrix< TMAT > &a_mat, OffloadPinnedMatrix< TMAT > &inv_a_mat, LogValue &log_value)
compute the inverse of the transpose of matrix A and its determinant value in log when VALUE_FP and T...
std::vector< std::reference_wrapper< T > > RefVector
std::complex< FullPrecReal > LogValue
typename RealAlias_impl< T >::value_type RealAlias
If you have a function templated on a value that can be real or complex and you need to get the base ...
void reset(OffloadPinnedMatrix< VALUE_FP > &psi_M, const int n, const int lda)
reset internal work space for single walker case My understanding might be off.
void computeInvertAndLog(OffloadPinnedVector< TMAT > &psi_Ms, const int n, const int lda, OffloadPinnedVector< LogValue > &log_values)
std::enable_if_t<!std::is_same< VALUE_FP, TMAT >::value > invert_transpose(HandleResource &resource, const OffloadPinnedMatrix< TMAT > &a_mat, OffloadPinnedMatrix< TMAT > &inv_a_mat, LogValue &log_value)
compute the inverse of the transpose of matrix A and its determinant value in log when VALUE_FP and T...
SIMD version of functions in algorithm.
std::enable_if_t< std::is_same< T_FP, TMAT >::value > invert_transpose(const Matrix< TMAT, ALLOC1 > &amat, Matrix< TMAT, ALLOC2 > &invMat, std::complex< TREAL > &LogDet)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
int getNextLevelNumThreads()
get the number of threads at the next parallel level
aligned_vector< VALUE_FP > m_work_