31 std::unique_ptr<Resource>
makeClone()
const override {
return std::make_unique<LCAOMultiWalkerMem>(*this); }
43 #if defined(ENABLE_CUDA) && defined(ENABLE_OFFLOAD) 53 std::unique_ptr<basis_type>&& bs,
58 BasisSetSize(bs ? bs->getBasisSetSize() : 0),
60 useOMPoffload_(use_offload),
65 throw std::runtime_error(
"LCAOrbitalSet cannot take nullptr as its basis set!");
84 myBasisSet(in.myBasisSet->makeClone()),
86 BasisSetSize(in.BasisSetSize),
88 Identity(in.Identity),
89 useOMPoffload_(in.useOMPoffload_),
90 basis_timer_(in.basis_timer_),
91 mo_timer_(in.mo_timer_)
107 throw std::runtime_error(
"LCAOrbitalSet::setOrbitalSetSize should not be called");
115 throw std::runtime_error(
116 "LCAOrbitalSet::checkObject OrbitalSetSize and BasisSetSize must be equal if Identity = true!");
118 throw std::runtime_error(
"LCAOrbitalSet::checkObject C should be nullptr if Identity = true!");
123 throw std::runtime_error(
"LCAOrbitalSet::checkObject C should not be nullptr if Identity = false!");
125 throw std::runtime_error(
"LCAOrbitalSet::checkObject C rows doesn't match OrbitalSetSize.");
127 throw std::runtime_error(
"LCAOrbitalSet::checkObject C columns doesn't match BasisSetSize.");
141 auto resource_index = collection.
addResource(std::make_unique<LCAOMultiWalkerMem>());
168 basis_list.reserve(spo_list.size());
169 for (
size_t iw = 0; iw < spo_list.size(); iw++)
192 template<
typename T,
unsigned D,
typename Alloc>
195 constexpr
char transa =
't';
196 constexpr
char transb =
'n';
199 BLAS::gemm(transa, transb,
B.rows(), D,
B.cols(),
zone,
B.data(),
B.cols(),
A.data(),
A.capacity(), zero,
C.data(),
208 const size_t output_size = psi.size();
213 for (
size_t j = 0; j < output_size; j++)
227 const size_t output_size = psi.size();
239 for (
size_t j = 0; j < output_size; j++)
245 d2psi[j](0, 0) = hxx[j];
246 d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
247 d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
248 d2psi[j](1, 1) = hyy[j];
249 d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
250 d2psi[j](2, 2) = hzz[j];
261 const size_t output_size = psi.cols();
283 for (
size_t j = 0; j < output_size; j++)
285 dpsi[i][j][0] = gx[j];
286 dpsi[i][j][1] = gy[j];
287 dpsi[i][j][2] = gz[j];
289 d2psi[i][j](0, 0) = hxx[j];
290 d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
291 d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
292 d2psi[i][j](1, 1) = hyy[j];
293 d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
294 d2psi[i][j](2, 2) = hzz[j];
296 dghpsi[i][j][0](0, 0) = gh_xxx[j];
297 dghpsi[i][j][0](0, 1) = gh_xxy[j];
298 dghpsi[i][j][0](0, 2) = gh_xxz[j];
299 dghpsi[i][j][0](1, 0) = gh_xxy[j];
300 dghpsi[i][j][0](1, 1) = gh_xyy[j];
301 dghpsi[i][j][0](1, 2) = gh_xyz[j];
302 dghpsi[i][j][0](2, 0) = gh_xxz[j];
303 dghpsi[i][j][0](2, 1) = gh_xyz[j];
304 dghpsi[i][j][0](2, 2) = gh_xzz[j];
306 dghpsi[i][j][1](0, 0) = gh_xxy[j];
307 dghpsi[i][j][1](0, 1) = gh_xyy[j];
308 dghpsi[i][j][1](0, 2) = gh_xyz[j];
309 dghpsi[i][j][1](1, 0) = gh_xyy[j];
310 dghpsi[i][j][1](1, 1) = gh_yyy[j];
311 dghpsi[i][j][1](1, 2) = gh_yyz[j];
312 dghpsi[i][j][1](2, 0) = gh_xyz[j];
313 dghpsi[i][j][1](2, 1) = gh_yyz[j];
314 dghpsi[i][j][1](2, 2) = gh_yzz[j];
316 dghpsi[i][j][2](0, 0) = gh_xxz[j];
317 dghpsi[i][j][2](0, 1) = gh_xyz[j];
318 dghpsi[i][j][2](0, 2) = gh_xzz[j];
319 dghpsi[i][j][2](1, 0) = gh_xyz[j];
320 dghpsi[i][j][2](1, 1) = gh_yyz[j];
321 dghpsi[i][j][2](1, 2) = gh_yzz[j];
322 dghpsi[i][j][2](2, 0) = gh_xzz[j];
323 dghpsi[i][j][2](2, 1) = gh_yzz[j];
324 dghpsi[i][j][2](2, 2) = gh_zzz[j];
334 const size_t output_size = psi.size();
356 for (
size_t j = 0; j < output_size; j++)
362 d2psi[j](0, 0) = hxx[j];
363 d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
364 d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
365 d2psi[j](1, 1) = hyy[j];
366 d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
367 d2psi[j](2, 2) = hzz[j];
369 dghpsi[j][0](0, 0) = gh_xxx[j];
370 dghpsi[j][0](0, 1) = gh_xxy[j];
371 dghpsi[j][0](0, 2) = gh_xxz[j];
372 dghpsi[j][0](1, 0) = gh_xxy[j];
373 dghpsi[j][0](1, 1) = gh_xyy[j];
374 dghpsi[j][0](1, 2) = gh_xyz[j];
375 dghpsi[j][0](2, 0) = gh_xxz[j];
376 dghpsi[j][0](2, 1) = gh_xyz[j];
377 dghpsi[j][0](2, 2) = gh_xzz[j];
379 dghpsi[j][1](0, 0) = gh_xxy[j];
380 dghpsi[j][1](0, 1) = gh_xyy[j];
381 dghpsi[j][1](0, 2) = gh_xyz[j];
382 dghpsi[j][1](1, 0) = gh_xyy[j];
383 dghpsi[j][1](1, 1) = gh_yyy[j];
384 dghpsi[j][1](1, 2) = gh_yyz[j];
385 dghpsi[j][1](2, 0) = gh_xyz[j];
386 dghpsi[j][1](2, 1) = gh_xyy[j];
387 dghpsi[j][1](2, 2) = gh_yzz[j];
389 dghpsi[j][2](0, 0) = gh_xzz[j];
390 dghpsi[j][2](0, 1) = gh_xyz[j];
391 dghpsi[j][2](0, 2) = gh_xzz[j];
392 dghpsi[j][2](1, 0) = gh_xyz[j];
393 dghpsi[j][2](1, 1) = gh_yyz[j];
394 dghpsi[j][2](1, 2) = gh_yzz[j];
395 dghpsi[j][2](2, 0) = gh_xzz[j];
396 dghpsi[j][2](2, 1) = gh_yzz[j];
397 dghpsi[j][2](2, 2) = gh_zzz[j];
403 const size_t output_size = dpsi.size();
408 for (
size_t j = 0; j < output_size; j++)
457 auto& phi_vgl_v = spo_leader.
mw_mem_handle_.getResource().phi_vgl_v;
462 const size_t nw = phi_vgl_v.
size(1);
463 phi_vgl_v.updateFrom();
466 for (
int iw = 0; iw < nw; iw++)
468 const size_t output_size = psi_v_list[iw].get().
size();
469 std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data());
470 std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data());
473 for (
size_t idim = 0; idim <
DIM; idim++)
474 BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim],
DIM);
486 auto& basis_vgl_mw = mw_res.basis_vgl_mw;
491 auto basis_list = spo_leader.extractBasisRefList(spo_list);
492 myBasisSet->mw_evaluateVGL(basis_list, P_list, iat, basis_vgl_mw);
498 const size_t output_size = phi_vgl_v.
size(2);
499 const size_t nw = phi_vgl_v.
size(1);
502 int dummy_handle = 0;
504 success =
ompBLAS::copy(dummy_handle, output_size * nw *
DIM_VGL, basis_vgl_mw.device_data(), 1,
507 throw std::runtime_error(
"In LCAOrbitalSet::mw_evaluateVGLImplGEMM ompBLAS::copy failed.");
515 const size_t requested_orb_size = phi_vgl_v.
size(2);
520 auto* c_devptr =
C->device_data();
553 auto& vp_basis_v_mw = mw_res.vp_basis_v_mw;
555 const size_t nVPs = vp_phi_v.
size(0);
558 auto basis_list = spo_leader.extractBasisRefList(spo_list);
559 myBasisSet->mw_evaluateValueVPs(basis_list, vp_list, vp_basis_v_mw);
565 int dummy_handle = 0;
570 throw std::runtime_error(
"In LCAOrbitalSet::mw_evaluateValueVPsImplGEMM ompBLAS::copy failed.");
578 const size_t requested_orb_size = vp_phi_v.
size(1);
583 auto* c_devptr =
C->device_data();
622 const size_t output_size = phi_v.
size(1);
623 const size_t nw = phi_v.
size(0);
626 for (
int iw = 0; iw < nw; iw++)
627 std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
637 const size_t nw = spo_list.size();
638 auto& mw_res = spo_leader.mw_mem_handle_.getResource();
639 auto& basis_v_mw = mw_res.basis_v_mw;
642 auto basis_list = spo_leader.extractBasisRefList(spo_list);
643 myBasisSet->mw_evaluateValue(basis_list, P_list, iat, basis_v_mw);
649 int dummy_handle = 0;
653 throw std::runtime_error(
"In LCAOrbitalSet::mw_evaluateValueImplGEMM ompBLAS::copy failed.");
661 const size_t requested_orb_size = phi_v.
size(1);
666 auto* c_devptr =
C->device_data();
691 const std::vector<const ValueType*>& invRow_ptr_list,
692 std::vector<std::vector<ValueType>>& ratios_list)
const 702 auto& vp_phi_v = spo_leader.
mw_mem_handle_.getResource().vp_phi_v;
705 const size_t requested_orb_size = psi_list[0].get().size();
706 vp_phi_v.resize(nVPs, requested_orb_size);
712 const size_t nw = vp_list.size();
713 auto& invRow_deviceptr_list = spo_leader.mw_mem_handle_.getResource().invRow_deviceptr_list;
714 auto& rg_buffer = spo_leader.mw_mem_handle_.getResource().rg_buffer;
716 invRow_deviceptr_list.resize(nVPs);
717 rg_buffer.resize(4, nVPs);
719 for (
size_t iw = 0, istart = 0; iw < nw; iw++)
721 const size_t nvp_i = vp_list[iw].getTotalNum();
722 std::fill_n(invRow_deviceptr_list.begin() + istart, nvp_i, invRow_ptr_list[iw]);
725 auto* invRow_deviceptr_list_ptr = invRow_deviceptr_list.data();
726 auto* vp_phi_v_ptr = vp_phi_v.data();
727 auto* rg_buffer_ptr = rg_buffer.data();
728 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for \ 729 map(always,to: invRow_deviceptr_list_ptr[:nVPs]) \ 730 map(to: vp_phi_v_ptr[:nVPs*requested_orb_size]) \ 731 map(always, from: rg_buffer_ptr[:nVPs])")
732 for (
size_t ivp = 0; ivp < nVPs; ivp++)
734 rg_buffer_ptr[ivp] = 0;
735 for (
size_t iorb = 0; iorb < requested_orb_size; iorb++)
736 rg_buffer_ptr[ivp] += vp_phi_v_ptr[ivp * requested_orb_size + iorb] * invRow_deviceptr_list_ptr[ivp][iorb];
739 for (
size_t iw = 0, index = 0; iw < nw; iw++)
740 for (
size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
741 ratios_list[iw][iat] = rg_buffer[0][index++];
744 for (
size_t iw = 0, index = 0; iw < vp_list.size(); iw++)
745 for (
size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
746 ratios_list[iw][iat] =
simd::dot(vp_phi_v.data_at(index++, 0), invRow_ptr_list[iw], requested_orb_size);
752 std::vector<ValueType>& ratios)
758 std::copy_n(psiinv.data(), psiinv.size(), invTemp.data());
780 const std::vector<const ValueType*>& invRow_ptr_list,
782 std::vector<ValueType>& ratios,
783 std::vector<GradType>& grads)
const 787 assert(phi_vgl_v.
size(1) == spo_list.size());
799 const size_t nw = spo_list.size();
800 const size_t norb_requested = phi_vgl_v.
size(2);
804 auto& invRow_deviceptr_list = spo_leader.
mw_mem_handle_.getResource().invRow_deviceptr_list;
805 auto& rg_buffer = spo_leader.mw_mem_handle_.getResource().rg_buffer;
807 invRow_deviceptr_list.resize(nw);
808 rg_buffer.resize(4, nw);
810 for (
size_t iw = 0; iw < nw; iw++)
811 invRow_deviceptr_list[iw] = invRow_ptr_list[iw];
813 auto* invRow_deviceptr_list_ptr = invRow_deviceptr_list.data();
814 auto* phi_vgl_v_ptr = phi_vgl_v.
data();
815 auto* rg_buffer_ptr = rg_buffer.data();
816 const size_t phi_vgl_stride = nw * norb_requested;
818 PRAGMA_OFFLOAD(
"omp target teams distribute \ 819 map(always,to: invRow_deviceptr_list_ptr[:nw]) \ 820 map(to: phi_vgl_v_ptr[:nw*norb_requested]) \ 821 map(always, from: rg_buffer_ptr[:rg_buffer.size()])")
822 for (
size_t iw = 0; iw < nw; iw++)
824 auto* phi_v_ptr = phi_vgl_v_ptr + iw * norb_requested;
825 auto* phi_gx_ptr = phi_v_ptr + phi_vgl_stride;
826 auto* phi_gy_ptr = phi_gx_ptr + phi_vgl_stride;
827 auto* phi_gz_ptr = phi_gy_ptr + phi_vgl_stride;
828 auto* invRow = invRow_deviceptr_list_ptr[iw];
830 ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
831 PRAGMA_OFFLOAD(
"omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)")
832 for (
size_t iorb = 0; iorb < norb_requested; iorb++)
834 ratio += phi_v_ptr[iorb] * invRow[iorb];
835 grad_x += phi_gx_ptr[iorb] * invRow[iorb];
836 grad_y += phi_gy_ptr[iorb] * invRow[iorb];
837 grad_z += phi_gz_ptr[iorb] * invRow[iorb];
840 rg_buffer_ptr[iw] = ratio;
841 rg_buffer_ptr[iw + nw] = grad_x / ratio;
842 rg_buffer_ptr[iw + nw * 2] = grad_y / ratio;
843 rg_buffer_ptr[iw + nw * 3] = grad_z / ratio;
846 for (
size_t iw = 0; iw < nw; iw++)
848 ratios[iw] = rg_buffer[0][iw];
849 grads[iw] = {rg_buffer[1][iw], rg_buffer[2][iw], rg_buffer[3][iw]};
853 for (
int iw = 0; iw < nw; iw++)
855 ratios[iw] =
simd::dot(invRow_ptr_list[iw], phi_vgl_v.
data_at(0, iw, 0), norb_requested);
857 for (
size_t idim = 0; idim <
DIM; idim++)
858 dphi[idim] =
simd::dot(invRow_ptr_list[iw], phi_vgl_v.
data_at(idim + 1, iw, 0), norb_requested) / ratios[iw];
907 const size_t output_size = logdet.cols();
912 for (
size_t j = 0; j < output_size; j++)
914 dlogdet[i][j][0] = gx[j];
915 dlogdet[i][j][1] = gy[j];
916 dlogdet[i][j][2] = gz[j];
927 const size_t output_size = psi.cols();
939 for (
size_t j = 0; j < output_size; j++)
941 dpsi[i][j][0] = gx[j];
942 dpsi[i][j][1] = gy[j];
943 dpsi[i][j][2] = gz[j];
945 d2psi[i][j](0, 0) = hxx[j];
946 d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
947 d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
948 d2psi[i][j](1, 1) = hyy[j];
949 d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
950 d2psi[i][j](2, 2) = hzz[j];
956 const size_t output_size = dpsi.cols();
961 for (
size_t j = 0; j < output_size; j++)
966 dpsi[i][j][0] = -gx[j];
967 dpsi[i][j][1] = -gy[j];
968 dpsi[i][j][2] = -gz[j];
978 const size_t output_size = dpsi.cols();
998 for (
size_t j = 0; j < output_size; j++)
1003 dpsi[i][j][0] = -gx[j];
1004 dpsi[i][j][1] = -gy[j];
1005 dpsi[i][j][2] = -gz[j];
1007 dgpsi[i][j](0, 0) = -hxx[j];
1008 dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
1009 dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
1010 dgpsi[i][j](1, 1) = -hyy[j];
1011 dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
1012 dgpsi[i][j](2, 2) = -hzz[j];
1015 dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
1016 dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
1017 dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
1030 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1040 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1058 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1068 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1087 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1097 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1115 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1123 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1143 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1151 for (
size_t i = 0, iat = first; iat < last; i++, iat++)
1182 if (!use_stored_copy)
base class for Single-particle orbital sets
OrbitalSetTraits< ValueType >::HessVector HessVector
OffloadMWVGLArray basis_vgl_mw
void mw_evaluateVGL(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const final
evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walker...
void gemm(BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const float &alpha, const float *A, int lda, const float *B, int ldb, const float &beta, float *C, int ldc)
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
void takebackResource(ResourceHandle< RS > &res_handle)
helper functions for EinsplineSetBuilder
T dot(const T *restrict a, const T *restrict b, int n, TRES res=TRES())
dot product
QTBase::RealType RealType
OffloadMWVGLArray phi_vgl_v
void evaluateGradSourceRow(const ParticleSet &P, int iel, const ParticleSet &source, int iat_src, GradVector &grad_phi) final
Returns a row of d/dR_iat phi_j(r) evaluated at position r.
Type_t * data_at(const std::array< SIZET, D > &indices)
std::unique_ptr< SPOSet > makeClone() const final
make a clone of itself every derived class must implement this to have threading working correctly...
void fill_n(T *x, size_t count, const T &value)
OffloadMatrix< ValueType > rg_buffer
OffloadVector< const ValueType * > invRow_deviceptr_list
void evaluate_ionderiv_v_row_impl(const vgl_type &temp, GradVector &dlogdet) const
Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogd...
const bool Identity
true if C is an identity matrix
A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet Virtu...
NewTimer & mo_timer_
timer for MO
std::unique_ptr< basis_type > myBasisSet
pointer to the basis set
OrbitalSetTraits< ValueType >::ValueMatrix ValueMatrix
size_t getTotalNum() const
std::shared_ptr< OffloadValueMatrix > C_copy
a copy of the original C before orbital rotation is applied;
SoA adaptor class for Vector<TinyVector<T,D> >
int size() const
return the size of the orbital set Ye: this needs to be replaced by getOrbitalSetSize(); ...
std::unique_ptr< Resource > makeClone() const override
void applyRotation(const ValueMatrix &rot_mat, bool use_stored_copy) final
apply rotation to all the orbitals
void product_Atx(const Matrix< T > &A, const Vector< T > &x, Vector< T > &y)
static function to perform y = A^t x for generic matrix and vector
void evaluateGradSource(const ParticleSet &P, int first, int last, const ParticleSet &source, int iat_src, GradMatrix &grad_phi) final
Calculate ion derivatives of SPO's.
void evaluateVGL(const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) final
evaluate the values, gradients and laplacians of this single-particle orbital set ...
void evaluate_vghgh_impl(const vghgh_type &temp, ValueVector &psi, GradVector &dpsi, HessVector &d2psi, GGGVector &dghpsi) const
Unpacks data in vghgh_type temp object into wavefunction friendly data structures for value...
LCAOrbitalSet(const std::string &my_name, std::unique_ptr< basis_type > &&bs, size_t norbs, bool identity, bool use_offload)
constructor
OffloadVector< size_t > nVP_index_list
void mw_evaluateVGLImplGEMM(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, OffloadMWVGLArray &phi_vgl_v) const
vgh_type Temph
These are temporary VectorSoAContainers to hold value, gradient, and hessian for all basis or SPO fun...
void evaluateVGH(const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) final
evaluate the values, gradients and hessians of this single-particle orbital set
OrbitalSetTraits< ValueType >::GradMatrix GradMatrix
constexpr std::complex< double > zone
Specialized paritlce class for atomistic simulations.
virtual void mw_evaluateValue(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list) const
evaluate the values this single-particle orbital sets of multiple walkers
Array< ValueType, 3, OffloadPinnedAllocator< ValueType > > OffloadMWVGLArray
void finalizeConstruction() override
update C on device
void evaluateValue(const ParticleSet &P, int iat, ValueVector &psi) final
evaluate the values of this single-particle orbital set
QTBase::ValueType ValueType
void product(const Matrix< T > &A, const Matrix< T > &B, Matrix< T > &C)
static function to perform C=AB for real matrices
Array< ValueType, 2, OffloadPinnedAllocator< ValueType > > OffloadMWVArray
const IndexType BasisSetSize
number of Single-particle orbitals
virtual void mw_evaluateVGL(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walker...
virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const
evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ra...
OrbitalSetTraits< ValueType >::ValueVector ValueVector
CASTTYPE & getCastedElement(size_t i) const
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
std::shared_ptr< OffloadValueMatrix > C
pointer to matrix containing the coefficients
vgl_type Tempv
Tempv(OrbitalSetSize) Tempv=C*Temp.
RefVectorWithLeader< basis_type > extractBasisRefList(const RefVectorWithLeader< SPOSet > &spo_list) const
helper function for extracting a list of basis sets from a list of LCAOrbitalSet
IndexType OrbitalSetSize
number of Single-particle orbitals
void mw_evaluateValue(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list) const final
evaluate the values this single-particle orbital sets of multiple walkers
void evaluate_vgh_impl(const vgh_type &temp, ValueVector &psi, GradVector &dpsi, HessVector &d2psi) const
These two functions unpack the data in vgh_type temp object into wavefunction friendly data structure...
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const final
acquire a shared resource from collection
void mw_evaluateValueVPsImplGEMM(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, OffloadMWVArray &phi_v) const
packed walker GEMM implementation with multi virtual particle sets
void checkObject() const final
check consistency between Identity and C
vghgh_type Tempghv
Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)].
OffloadMWVArray basis_v_mw
void evaluateDetRatios(const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios) final
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
T * data()
return the base
LCAOMultiWalkerMem(const LCAOMultiWalkerMem &)
compute::Queue< PlatformKind::OMPTARGET > queue
OffloadMWVArray vp_basis_v_mw
void Product_ABt(const VectorSoaContainer< T, D > &A, const Matrix< T, Alloc > &B, VectorSoaContainer< T, D > &C)
Find a better place for other user classes, Matrix should be padded as well.
std::vector< std::reference_wrapper< T > > RefVector
vgh_type Temphv
Norbitals x [1(value)+3(gradient)+6(hessian)].
CASTTYPE & getCastedLeader() const
compute::BLASHandle< PlatformKind::OMPTARGET > blas_handle
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const final
return a shared resource to collection
ResourceHandle< LCAOMultiWalkerMem > mw_mem_handle_
OrbitalSetTraits< ValueType >::GradHessVector GGGVector
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
void evaluate_ionderiv_v_impl(const vgl_type &temp, int i, GradMatrix &dlogdet) const
Unpacks data in vgl object and calculates/places ionic gradient result into dlogdet.
OrbitalSetTraits< ValueType >::GradVector GradVector
void createResource(ResourceCollection &collection) const final
initialize a shared resource and hand it to collection
const bool useOMPoffload_
whether offload is on or off at runtime.
void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const final
evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ra...
static void copy(int n, const T *restrict a, T *restrict b)
virtual void mw_evaluateDetRatios(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType *> &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers ...
void evaluate_ionderiv_vgl_impl(const vghgh_type &temp, int i, GradMatrix &dlogdet, HessMatrix &dglogdet, GradMatrix &dllogdet) const
Unpacks data in vgl object and calculates/places ionic gradient of value, electron gradient...
void evaluate_vgl_impl(const vgl_type &temp, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) const
helper functions to handle Identity
vghgh_type Tempgh
These are temporary VectorSoAContainers to hold value, gradient, hessian, and gradient hessian for al...
vgl_type Temp
Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L.
static size_t countVPs(const RefVectorWithLeader< const VirtualParticleSet > &vp_list)
class to handle linear combinations of basis orbitals used to evaluate the Dirac determinants.
ResourceHandle< RS > lendResource()
void setOrbitalSetSize(int norbs) final
set the OrbitalSetSize and Identity=false and initialize internal storages
static void gemm(char Atrans, char Btrans, int M, int N, int K, double alpha, const double *A, int lda, const double *restrict B, int ldb, double beta, double *restrict C, int ldc)
ompBLAS_status copy(ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
double B(double x, int k, int i, const std::vector< double > &t)
void evaluate_notranspose(const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, ValueMatrix &d2logdet) final
evaluate the values, gradients and laplacians of this single-particle orbital for [first...
OrbitalSetTraits< ValueType >::GradHessMatrix GGGMatrix
void resize(size_type n)
resize myData
NewTimer & basis_timer_
timer for basis set
A D-dimensional Array class based on PETE.
void mw_evaluateValueImplGEMM(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, OffloadMWVArray &phi_v) const
packed walker GEMM implementation
void mw_evaluateDetRatios(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType *> &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const final
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers ...
void evaluateVGHGH(const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) final
evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set ...
OrbitalSetTraits< ValueType >::HessMatrix HessMatrix