12 #ifndef QMCPLUSPLUS_DELAYED_UPDATE_H 13 #define QMCPLUSPLUS_DELAYED_UPDATE_H 28 template<
typename T,
typename T_FP>
58 inline void resize(
int norb,
int delay)
73 template<
typename TREAL>
76 detEng.invert_transpose(logdetT, Ainv, log_value);
96 template<
typename VVT>
106 constexpr T
czero(0);
107 const int norb = Ainv.
rows();
112 BLAS::gemv(
'T', norb,
delay_count,
cone,
U.
data(), norb, invRow.data(), 1,
czero,
p.
data(), 1);
113 BLAS::gemv(
'N',
delay_count,
delay_count, -
cone,
Binv.
data(), lda_Binv,
p.
data(), 1,
czero,
Binv[
delay_count], 1);
114 BLAS::gemv(
'N', norb,
delay_count,
cone,
V.
data(), norb,
Binv[
delay_count], 1,
cone, invRow.data(), 1);
124 template<
typename VVT,
typename RATIOT>
128 constexpr T
czero(0);
129 const int norb = Ainv.
rows();
135 BLAS::gemv(
'T', norb,
delay_count + 1, -
cone,
V.
data(), norb, psiV.data(), 1,
czero,
p.
data(), 1);
137 const T sigma =
static_cast<T
>(RATIOT(1) / ratio_new);
163 constexpr T
czero(0);
164 const int norb = Ainv.
rows();
169 BLAS::gemv(
'T', norb, norb,
cone, Ainv.
data(), norb,
U[0], 1,
czero,
temp.
data(), 1);
182 BLAS::gemm(
'T',
'N',
delay_count, norb, norb,
cone,
U.
data(), norb, Ainv.
data(), norb,
czero,
tempMat.
data(),
186 BLAS::gemm(
'N',
'N', norb,
delay_count,
delay_count,
cone,
V.
data(), norb,
Binv.
data(), lda_Binv,
czero,
188 BLAS::gemm(
'N',
'N', norb, norb,
delay_count, -
cone,
U.
data(), norb,
tempMat.
data(), lda_Binv,
cone,
196 const int block_size = getAlignedSize<T>((norb + num_threads - 1) / num_threads);
197 int num_block = (norb + block_size - 1) / block_size;
199 for (
int ix = 0; ix < num_block; ix++)
201 int x_offset = ix * block_size;
203 Ainv[x_offset], norb,
czero,
tempMat[x_offset], lda_Binv);
209 for (
int iy = 0; iy < num_block; iy++)
211 int y_offset = iy * block_size;
215 #pragma omp for collapse(2) nowait 216 for (
int iy = 0; iy < num_block; iy++)
217 for (
int ix = 0; ix < num_block; ix++)
219 int x_offset = ix * block_size;
220 int y_offset = iy * block_size;
223 Ainv[x_offset] + y_offset, norb);
233 #endif // QMCPLUSPLUS_DELAYED_UPDATE_H void resize(size_type n, Type_t val=Type_t())
Resize the container.
helper functions for EinsplineSetBuilder
void acceptRow(Matrix< T > &Ainv, int rowchanged, const VVT &psiV, const RATIOT ratio_new)
accept a move with the update delayed
service class for explicitly managing the threading of BLAS/LAPACK calls from OpenMP parallel region ...
DiracMatrix< T_FP > detEng
matrix inversion engine
constexpr std::complex< float > czero
constexpr std::complex< float > cone
DelayedUpdate()
default constructor
static bool NestedThreadingSupported()
void invert_transpose(const Matrix< T > &logdetT, Matrix< T > &Ainv, std::complex< TREAL > &log_value)
compute the inverse of the transpose of matrix A
void resize(size_type n, size_type m)
Resize the container.
implements delayed update on CPU using BLAS
static void gemv(int n, int m, const double *restrict amat, const double *restrict x, double *restrict y)
Matrix< T > Binv
Matrix inverse of B, at maximum KxK.
helper class to compute matrix inversion and the log value of determinant
std::vector< int > delay_list
list of delayed electrons
static void ger(int m, int n, double alpha, const double *x, int incx, const double *y, int incy, double *a, int lda)
void initializeInv(const Matrix< T > &Ainv)
initialize internal objects when Ainv is refreshed
void updateInvMat(Matrix< T > &Ainv)
update the full Ainv and reset delay_count
Vector< T > temp
temporal scratch space used by SM-1
Declaraton of Vector<T,Alloc> Manage memory through Alloc directly and allow referencing an existing ...
int delay_count
current number of delays, increase one for each acceptance, reset to 0 after updating Ainv ...
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
int getDelayCount() const
void resize(int norb, int delay)
resize the internal storage
Matrix< T > tempMat
scratch space, used during inverse update
Matrix< T > U
orbital values of delayed electrons
static void gemm(char Atrans, char Btrans, int M, int N, int K, double alpha, const double *A, int lda, const double *restrict B, int ldb, double beta, double *restrict C, int ldc)
Matrix< T > V
rows of Ainv corresponding to delayed electrons
int getNextLevelNumThreads()
get the number of threads at the next parallel level
Vector< T > p
new column of B
void getInvRow(const Matrix< T > &Ainv, int rowchanged, VVT &invRow)
compute the row of up-to-date Ainv