14 #ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSET_H 15 #define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSET_H 30 template<
typename ROT,
typename SH>
46 :
Ylm(lmax, addsignforM),
135 s_orbitals[i] = (
RnlID[
NL[i]][1] == 0);
141 template<
typename LAT,
typename T,
typename PosType,
typename VGL>
144 int TransX, TransY, TransZ;
150 #if not defined(QMC_COMPLEX) 170 auto* restrict psi = vgl.data(0) + offset;
171 const T* restrict ylm_v =
Ylm[0];
172 auto* restrict dpsi_x = vgl.data(1) + offset;
173 const T* restrict ylm_x =
Ylm[1];
174 auto* restrict dpsi_y = vgl.data(2) + offset;
175 const T* restrict ylm_y =
Ylm[2];
176 auto* restrict dpsi_z = vgl.data(3) + offset;
177 const T* restrict ylm_z =
Ylm[3];
178 auto* restrict d2psi = vgl.data(4) + offset;
179 const T* restrict ylm_l =
Ylm[4];
194 TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
198 TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
202 TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
204 dr_new[0] = dr[0] + (TransX *
lattice.R(0, 0) + TransY *
lattice.R(1, 0) + TransZ *
lattice.R(2, 0));
205 dr_new[1] = dr[1] + (TransX *
lattice.R(0, 1) + TransY *
lattice.R(1, 1) + TransZ *
lattice.R(2, 1));
206 dr_new[2] = dr[2] + (TransX *
lattice.R(0, 2) + TransY *
lattice.R(1, 2) + TransZ *
lattice.R(2, 2));
215 const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
216 Ylm.evaluateVGL(x, y, z);
218 MultiRnl.evaluate(r_new, phi, dphi, d2phi);
220 const T rinv =
cone / r_new;
227 const int nl(
NL[ib]);
228 const int lm(
LM[ib]);
229 const T drnloverr = rinv * dphi[nl];
230 const T ang = ylm_v[lm];
231 const T gr_x = drnloverr * x;
232 const T gr_y = drnloverr * y;
233 const T gr_z = drnloverr * z;
234 const T ang_x = ylm_x[lm];
235 const T ang_y = ylm_y[lm];
236 const T ang_z = ylm_z[lm];
237 const T vr = phi[nl];
239 psi[ib] += ang * vr * Phase;
240 dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
241 dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
242 dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
243 d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) +
252 template<
typename LAT,
typename T,
typename PosType,
typename VGH>
255 int TransX, TransY, TransZ;
263 constexpr T
cone(1.);
264 constexpr T ctwo(2.);
266 #if not defined(QMC_COMPLEX) 282 auto* restrict psi = vgh.data(0) + offset;
283 const T* restrict ylm_v =
Ylm[0];
284 auto* restrict dpsi_x = vgh.data(1) + offset;
285 const T* restrict ylm_x =
Ylm[1];
286 auto* restrict dpsi_y = vgh.data(2) + offset;
287 const T* restrict ylm_y =
Ylm[2];
288 auto* restrict dpsi_z = vgh.data(3) + offset;
289 const T* restrict ylm_z =
Ylm[3];
291 auto* restrict dhpsi_xx = vgh.data(4) + offset;
292 const T* restrict ylm_xx =
Ylm[4];
293 auto* restrict dhpsi_xy = vgh.data(5) + offset;
294 const T* restrict ylm_xy =
Ylm[5];
295 auto* restrict dhpsi_xz = vgh.data(6) + offset;
296 const T* restrict ylm_xz =
Ylm[6];
297 auto* restrict dhpsi_yy = vgh.data(7) + offset;
298 const T* restrict ylm_yy =
Ylm[7];
299 auto* restrict dhpsi_yz = vgh.data(8) + offset;
300 const T* restrict ylm_yz =
Ylm[8];
301 auto* restrict dhpsi_zz = vgh.data(9) + offset;
302 const T* restrict ylm_zz =
Ylm[9];
324 TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
328 TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
332 TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
344 const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
345 Ylm.evaluateVGH(x, y, z);
347 MultiRnl.evaluate(r_new, phi, dphi, d2phi);
349 const T rinv =
cone / r_new;
356 const int nl(
NL[ib]);
357 const int lm(
LM[ib]);
358 const T drnloverr = rinv * dphi[nl];
359 const T ang = ylm_v[lm];
360 const T gr_x = drnloverr * x;
361 const T gr_y = drnloverr * y;
362 const T gr_z = drnloverr * z;
368 const T gr2_tmp = rinv * rinv * (d2phi[nl] - drnloverr);
369 const T gr_xx = x * x * gr2_tmp + drnloverr;
370 const T gr_xy = x * y * gr2_tmp;
371 const T gr_xz = x * z * gr2_tmp;
372 const T gr_yy = y * y * gr2_tmp + drnloverr;
373 const T gr_yz = y * z * gr2_tmp;
374 const T gr_zz = z * z * gr2_tmp + drnloverr;
376 const T ang_x = ylm_x[lm];
377 const T ang_y = ylm_y[lm];
378 const T ang_z = ylm_z[lm];
379 const T ang_xx = ylm_xx[lm];
380 const T ang_xy = ylm_xy[lm];
381 const T ang_xz = ylm_xz[lm];
382 const T ang_yy = ylm_yy[lm];
383 const T ang_yz = ylm_yz[lm];
384 const T ang_zz = ylm_zz[lm];
386 const T vr = phi[nl];
388 psi[ib] += ang * vr * Phase;
389 dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
390 dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
391 dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
396 dhpsi_xx[ib] += (gr_xx * ang + ang_xx * vr + ctwo * gr_x * ang_x) * Phase;
397 dhpsi_xy[ib] += (gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x) * Phase;
398 dhpsi_xz[ib] += (gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x) * Phase;
399 dhpsi_yy[ib] += (gr_yy * ang + ang_yy * vr + ctwo * gr_y * ang_y) * Phase;
400 dhpsi_yz[ib] += (gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y) * Phase;
401 dhpsi_zz[ib] += (gr_zz * ang + ang_zz * vr + ctwo * gr_z * ang_z) * Phase;
408 template<
typename LAT,
typename T,
typename PosType,
typename VGHGH>
416 int TransX, TransY, TransZ;
424 constexpr T
cone(1.0);
425 constexpr T ctwo(2.0);
426 constexpr T cthree(3.0);
428 #if not defined(QMC_COMPLEX) 445 auto* restrict psi = vghgh.data(0) + offset;
446 const T* restrict ylm_v =
Ylm[0];
447 auto* restrict dpsi_x = vghgh.data(1) + offset;
448 const T* restrict ylm_x =
Ylm[1];
449 auto* restrict dpsi_y = vghgh.data(2) + offset;
450 const T* restrict ylm_y =
Ylm[2];
451 auto* restrict dpsi_z = vghgh.data(3) + offset;
452 const T* restrict ylm_z =
Ylm[3];
454 auto* restrict dhpsi_xx = vghgh.data(4) + offset;
455 const T* restrict ylm_xx =
Ylm[4];
456 auto* restrict dhpsi_xy = vghgh.data(5) + offset;
457 const T* restrict ylm_xy =
Ylm[5];
458 auto* restrict dhpsi_xz = vghgh.data(6) + offset;
459 const T* restrict ylm_xz =
Ylm[6];
460 auto* restrict dhpsi_yy = vghgh.data(7) + offset;
461 const T* restrict ylm_yy =
Ylm[7];
462 auto* restrict dhpsi_yz = vghgh.data(8) + offset;
463 const T* restrict ylm_yz =
Ylm[8];
464 auto* restrict dhpsi_zz = vghgh.data(9) + offset;
465 const T* restrict ylm_zz =
Ylm[9];
467 auto* restrict dghpsi_xxx = vghgh.data(10) + offset;
468 const T* restrict ylm_xxx =
Ylm[10];
469 auto* restrict dghpsi_xxy = vghgh.data(11) + offset;
470 const T* restrict ylm_xxy =
Ylm[11];
471 auto* restrict dghpsi_xxz = vghgh.data(12) + offset;
472 const T* restrict ylm_xxz =
Ylm[12];
473 auto* restrict dghpsi_xyy = vghgh.data(13) + offset;
474 const T* restrict ylm_xyy =
Ylm[13];
475 auto* restrict dghpsi_xyz = vghgh.data(14) + offset;
476 const T* restrict ylm_xyz =
Ylm[14];
477 auto* restrict dghpsi_xzz = vghgh.data(15) + offset;
478 const T* restrict ylm_xzz =
Ylm[15];
479 auto* restrict dghpsi_yyy = vghgh.data(16) + offset;
480 const T* restrict ylm_yyy =
Ylm[16];
481 auto* restrict dghpsi_yyz = vghgh.data(17) + offset;
482 const T* restrict ylm_yyz =
Ylm[17];
483 auto* restrict dghpsi_yzz = vghgh.data(18) + offset;
484 const T* restrict ylm_yzz =
Ylm[18];
485 auto* restrict dghpsi_zzz = vghgh.data(19) + offset;
486 const T* restrict ylm_zzz =
Ylm[19];
520 TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
524 TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
528 TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
540 const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
541 Ylm.evaluateVGHGH(x, y, z);
543 MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi);
545 const T rinv =
cone / r_new;
546 const T xu = x * rinv, yu = y * rinv, zu = z * rinv;
553 const int nl(
NL[ib]);
554 const int lm(
LM[ib]);
555 const T drnloverr = rinv * dphi[nl];
556 const T ang = ylm_v[lm];
557 const T gr_x = drnloverr * x;
558 const T gr_y = drnloverr * y;
559 const T gr_z = drnloverr * z;
565 const T gr2_tmp = rinv * (d2phi[nl] - drnloverr);
567 const T gr_xx = x * xu * gr2_tmp + drnloverr;
568 const T gr_xy = x * yu * gr2_tmp;
569 const T gr_xz = x * zu * gr2_tmp;
570 const T gr_yy = y * yu * gr2_tmp + drnloverr;
571 const T gr_yz = y * zu * gr2_tmp;
572 const T gr_zz = z * zu * gr2_tmp + drnloverr;
575 const T gr3_tmp = d3phi[nl] - cthree * gr2_tmp;
577 const T gr_xxx = xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu);
578 const T gr_xxy = xu * xu * yu * gr3_tmp + gr2_tmp * yu;
579 const T gr_xxz = xu * xu * zu * gr3_tmp + gr2_tmp * zu;
580 const T gr_xyy = xu * yu * yu * gr3_tmp + gr2_tmp * xu;
581 const T gr_xyz = xu * yu * zu * gr3_tmp;
582 const T gr_xzz = xu * zu * zu * gr3_tmp + gr2_tmp * xu;
583 const T gr_yyy = yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu);
584 const T gr_yyz = yu * yu * zu * gr3_tmp + gr2_tmp * zu;
585 const T gr_yzz = yu * zu * zu * gr3_tmp + gr2_tmp * yu;
586 const T gr_zzz = zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu);
590 const T ang_x = ylm_x[lm];
591 const T ang_y = ylm_y[lm];
592 const T ang_z = ylm_z[lm];
594 const T ang_xx = ylm_xx[lm];
595 const T ang_xy = ylm_xy[lm];
596 const T ang_xz = ylm_xz[lm];
597 const T ang_yy = ylm_yy[lm];
598 const T ang_yz = ylm_yz[lm];
599 const T ang_zz = ylm_zz[lm];
601 const T ang_xxx = ylm_xxx[lm];
602 const T ang_xxy = ylm_xxy[lm];
603 const T ang_xxz = ylm_xxz[lm];
604 const T ang_xyy = ylm_xyy[lm];
605 const T ang_xyz = ylm_xyz[lm];
606 const T ang_xzz = ylm_xzz[lm];
607 const T ang_yyy = ylm_yyy[lm];
608 const T ang_yyz = ylm_yyz[lm];
609 const T ang_yzz = ylm_yzz[lm];
610 const T ang_zzz = ylm_zzz[lm];
612 const T vr = phi[nl];
614 psi[ib] += ang * vr * Phase;
615 dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
616 dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
617 dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
622 dhpsi_xx[ib] += (gr_xx * ang + ang_xx * vr + ctwo * gr_x * ang_x) * Phase;
623 dhpsi_xy[ib] += (gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x) * Phase;
624 dhpsi_xz[ib] += (gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x) * Phase;
625 dhpsi_yy[ib] += (gr_yy * ang + ang_yy * vr + ctwo * gr_y * ang_y) * Phase;
626 dhpsi_yz[ib] += (gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y) * Phase;
627 dhpsi_zz[ib] += (gr_zz * ang + ang_zz * vr + ctwo * gr_z * ang_z) * Phase;
629 dghpsi_xxx[ib] += (gr_xxx * ang + vr * ang_xxx + cthree * gr_xx * ang_x + cthree * gr_x * ang_xx) * Phase;
630 dghpsi_xxy[ib] += (gr_xxy * ang + vr * ang_xxy + gr_xx * ang_y + ang_xx * gr_y + ctwo * gr_xy * ang_x +
631 ctwo * ang_xy * gr_x) *
633 dghpsi_xxz[ib] += (gr_xxz * ang + vr * ang_xxz + gr_xx * ang_z + ang_xx * gr_z + ctwo * gr_xz * ang_x +
634 ctwo * ang_xz * gr_x) *
636 dghpsi_xyy[ib] += (gr_xyy * ang + vr * ang_xyy + gr_yy * ang_x + ang_yy * gr_x + ctwo * gr_xy * ang_y +
637 ctwo * ang_xy * gr_y) *
639 dghpsi_xyz[ib] += (gr_xyz * ang + vr * ang_xyz + gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x +
640 ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y) *
642 dghpsi_xzz[ib] += (gr_xzz * ang + vr * ang_xzz + gr_zz * ang_x + ang_zz * gr_x + ctwo * gr_xz * ang_z +
643 ctwo * ang_xz * gr_z) *
645 dghpsi_yyy[ib] += (gr_yyy * ang + vr * ang_yyy + cthree * gr_yy * ang_y + cthree * gr_y * ang_yy) * Phase;
646 dghpsi_yyz[ib] += (gr_yyz * ang + vr * ang_yyz + gr_yy * ang_z + ang_yy * gr_z + ctwo * gr_yz * ang_y +
647 ctwo * ang_yz * gr_y) *
649 dghpsi_yzz[ib] += (gr_yzz * ang + vr * ang_yzz + gr_zz * ang_y + ang_zz * gr_y + ctwo * gr_yz * ang_z +
650 ctwo * ang_yz * gr_z) *
652 dghpsi_zzz[ib] += (gr_zzz * ang + vr * ang_zzz + cthree * gr_zz * ang_z + cthree * gr_z * ang_zz) * Phase;
661 template<
typename LAT,
typename T,
typename PosType,
typename VT>
664 int TransX, TransY, TransZ;
669 #if not defined(QMC_COMPLEX) 690 TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
694 TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
698 TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
700 dr_new[0] = dr[0] + (TransX *
lattice.R(0, 0) + TransY *
lattice.R(1, 0) + TransZ *
lattice.R(2, 0));
701 dr_new[1] = dr[1] + (TransX *
lattice.R(0, 1) + TransY *
lattice.R(1, 1) + TransZ *
lattice.R(2, 1));
702 dr_new[2] = dr[2] + (TransX *
lattice.R(0, 2) + TransY *
lattice.R(1, 2) + TransZ *
lattice.R(2, 2));
709 Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v);
714 psi[ib] += ylm_v[
LM[ib]] * phi_r[
NL[ib]] * Phase;
740 template<
typename LAT,
typename VT>
747 const size_t nBasTot,
748 const size_t center_idx,
749 const size_t BasisOffset,
750 const size_t NumCenters)
752 assert(
this == &atom_bs_list.
getLeader());
753 auto& atom_bs_leader = atom_bs_list.template getCastedLeader<SoaAtomicBasisSet<ROT, SH>>();
758 const int Nxyz = Nx * Ny * Nz;
760 assert(psi_vgl.size(0) == 5);
761 assert(psi_vgl.size(1) == nElec);
762 assert(psi_vgl.size(2) == nBasTot);
765 auto& ylm_vgl = atom_bs_leader.mw_mem_handle_.getResource().ylm_vgl;
766 auto& rnl_vgl = atom_bs_leader.mw_mem_handle_.getResource().rnl_vgl;
767 auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr;
768 auto& r = atom_bs_leader.mw_mem_handle_.getResource().r;
770 size_t nRnl =
RnlID.size();
771 size_t nYlm =
Ylm.size();
773 ylm_vgl.resize(5, nElec, Nxyz, nYlm);
774 rnl_vgl.resize(3, nElec, Nxyz, nRnl);
775 dr.resize(nElec, Nxyz, 3);
776 r.resize(nElec, Nxyz);
780 auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase;
781 correctphase.resize(nElec);
783 auto* dr_ptr = dr.data();
784 auto* r_ptr = r.data();
786 auto* correctphase_ptr = correctphase.data();
788 auto* Tv_list_ptr = Tv_list.data();
789 auto* displ_list_ptr = displ_list.data();
795 auto* restrict psi_ptr = psi_vgl.data_at(0, 0, 0);
796 auto* restrict dpsi_x_ptr = psi_vgl.data_at(1, 0, 0);
797 auto* restrict dpsi_y_ptr = psi_vgl.data_at(2, 0, 0);
798 auto* restrict dpsi_z_ptr = psi_vgl.data_at(3, 0, 0);
799 auto* restrict d2psi_ptr = psi_vgl.data_at(4, 0, 0);
803 #if not defined(QMC_COMPLEX) 805 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ")
806 for (
size_t i_e = 0; i_e < nElec; i_e++)
807 correctphase_ptr[i_e] = 1.0;
812 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \ 813 Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ")
814 for (
size_t i_e = 0; i_e < nElec; i_e++)
818 for (
size_t i_dim = 0; i_dim < 3; i_dim++)
819 phasearg +=
SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)];
830 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) \ 831 map(to:periodic_image_displacements_ptr[:3*Nxyz]) \ 832 map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ")
833 for (
size_t i_e = 0; i_e < nElec; i_e++)
834 for (
int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
837 for (
size_t i_dim = 0; i_dim < 3; i_dim++)
839 dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] +
840 periodic_image_displacements_ptr[i_dim + 3 * i_xyz]);
841 tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)];
843 r_ptr[i_xyz + Nxyz * i_e] =
std::sqrt(tmp_r2);
855 Ylm.batched_evaluateVGL(dr, ylm_vgl);
865 RealType* restrict phi_ptr = rnl_vgl.data_at(0, 0, 0, 0);
866 RealType* restrict dphi_ptr = rnl_vgl.data_at(1, 0, 0, 0);
867 RealType* restrict d2phi_ptr = rnl_vgl.data_at(2, 0, 0, 0);
870 const RealType* restrict ylm_v_ptr = ylm_vgl.data_at(0, 0, 0, 0);
871 const RealType* restrict ylm_x_ptr = ylm_vgl.data_at(1, 0, 0, 0);
872 const RealType* restrict ylm_y_ptr = ylm_vgl.data_at(2, 0, 0, 0);
873 const RealType* restrict ylm_z_ptr = ylm_vgl.data_at(3, 0, 0, 0);
874 const RealType* restrict ylm_l_ptr = ylm_vgl.data_at(4, 0, 0, 0);
875 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) \ 876 map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \ 877 map(to:ylm_v_ptr[:nYlm*nElec*Nxyz], ylm_x_ptr[:nYlm*nElec*Nxyz], ylm_y_ptr[:nYlm*nElec*Nxyz], ylm_z_ptr[:nYlm*nElec*Nxyz], ylm_l_ptr[:nYlm*nElec*Nxyz], \ 878 phi_ptr[:nRnl*nElec*Nxyz], dphi_ptr[:nRnl*nElec*Nxyz], d2phi_ptr[:nRnl*nElec*Nxyz], \ 879 psi_ptr[:nBasTot*nElec], dpsi_x_ptr[:nBasTot*nElec], dpsi_y_ptr[:nBasTot*nElec], dpsi_z_ptr[:nBasTot*nElec], d2psi_ptr[:nBasTot*nElec], \ 880 correctphase_ptr[:nElec], r_ptr[:nElec*Nxyz], dr_ptr[:3*nElec*Nxyz]) ")
881 for (
int i_e = 0; i_e < nElec; i_e++)
882 for (
int ib = 0; ib < bset_size; ++ib)
884 const int nl(NL_ptr[ib]);
885 const int lm(LM_ptr[ib]);
892 for (
int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
894 const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e];
896 const RealType x = dr_ptr[0 + 3 * (i_xyz + Nxyz * i_e)];
897 const RealType y = dr_ptr[1 + 3 * (i_xyz + Nxyz * i_e)];
898 const RealType z = dr_ptr[2 + 3 * (i_xyz + Nxyz * i_e)];
899 const RealType drnloverr = rinv * dphi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)];
900 const RealType ang = ylm_v_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
901 const RealType gr_x = drnloverr * x;
902 const RealType gr_y = drnloverr * y;
903 const RealType gr_z = drnloverr * z;
904 const RealType ang_x = ylm_x_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
905 const RealType ang_y = ylm_y_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
906 const RealType ang_z = ylm_z_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
907 const RealType vr = phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)];
909 psi += ang * vr * Phase;
910 dpsi_x += (ang * gr_x + vr * ang_x) * Phase;
911 dpsi_y += (ang * gr_y + vr * ang_y) * Phase;
912 dpsi_z += (ang * gr_z + vr * ang_z) * Phase;
913 d2psi += (ang * (ctwo * drnloverr + d2phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]) +
914 ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) +
915 vr * ylm_l_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]) *
919 psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi;
920 dpsi_x_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_x;
921 dpsi_y_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_y;
922 dpsi_z_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_z;
923 d2psi_ptr[BasisOffset + ib + i_e * nBasTot] = d2psi;
947 template<
typename LAT,
typename VT>
954 const size_t nBasTot,
955 const size_t center_idx,
956 const size_t BasisOffset,
957 const size_t NumCenters)
959 assert(
this == &atom_bs_list.
getLeader());
960 auto& atom_bs_leader = atom_bs_list.template getCastedLeader<SoaAtomicBasisSet<ROT, SH>>();
966 const int Nxyz = Nx * Ny * Nz;
967 assert(psi.size(0) == nElec);
968 assert(psi.size(1) == nBasTot);
971 auto& ylm_v = atom_bs_leader.mw_mem_handle_.getResource().ylm_v;
972 auto& rnl_v = atom_bs_leader.mw_mem_handle_.getResource().rnl_v;
973 auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr;
974 auto& r = atom_bs_leader.mw_mem_handle_.getResource().r;
976 const size_t nRnl =
RnlID.size();
977 const size_t nYlm =
Ylm.size();
979 ylm_v.resize(nElec, Nxyz, nYlm);
980 rnl_v.resize(nElec, Nxyz, nRnl);
981 dr.resize(nElec, Nxyz, 3);
982 r.resize(nElec, Nxyz);
985 auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase;
986 correctphase.resize(nElec);
988 auto* dr_ptr = dr.data();
989 auto* r_ptr = r.data();
991 auto* correctphase_ptr = correctphase.data();
993 auto* Tv_list_ptr = Tv_list.data();
994 auto* displ_list_ptr = displ_list.data();
997 auto* latR_ptr =
lattice.R.data();
1002 #if not defined(QMC_COMPLEX) 1004 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ")
1005 for (
size_t i_e = 0; i_e < nElec; i_e++)
1006 correctphase_ptr[i_e] = 1.0;
1011 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \ 1012 Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ")
1013 for (
size_t i_e = 0; i_e < nElec; i_e++)
1017 for (
size_t i_dim = 0; i_dim < 3; i_dim++)
1018 phasearg +=
SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)];
1029 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) \ 1030 map(to:periodic_image_displacements_ptr[:3*Nxyz]) \ 1031 map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ")
1032 for (
size_t i_e = 0; i_e < nElec; i_e++)
1033 for (
int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
1036 for (
size_t i_dim = 0; i_dim < 3; i_dim++)
1038 dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] +
1039 periodic_image_displacements_ptr[i_dim + 3 * i_xyz]);
1040 tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)];
1042 r_ptr[i_xyz + Nxyz * i_e] =
std::sqrt(tmp_r2);
1054 Ylm.batched_evaluateV(dr, ylm_v);
1061 auto* LM_ptr =
LM.
data();
1062 auto* NL_ptr =
NL.
data();
1063 auto* psi_ptr = psi.data();
1066 auto* ylm_ptr = ylm_v.data();
1067 auto* rnl_ptr = rnl_v.data();
1068 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for collapse(2) \ 1069 map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \ 1070 map(to:ylm_ptr[:nYlm*nElec*Nxyz], rnl_ptr[:nRnl*nElec*Nxyz], psi_ptr[:nBasTot*nElec], correctphase_ptr[:nElec])")
1071 for (
int i_e = 0; i_e < nElec; i_e++)
1072 for (
int ib = 0; ib < bset_size; ++ib)
1075 for (
int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
1077 const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e];
1078 psi += ylm_ptr[(i_xyz + Nxyz * i_e) * nYlm + LM_ptr[ib]] *
1079 rnl_ptr[(i_xyz + Nxyz * i_e) * nRnl + NL_ptr[ib]] * Phase;
1081 psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi;
1088 collection.
addResource(std::make_unique<SoaAtomicBSetMultiWalkerMem>());
1094 assert(
this == &atom_basis_list.
getLeader());
1095 atom_basis_list.template getCastedLeader<SoaAtomicBasisSet>().
mw_mem_handle_ =
1102 assert(
this == &atom_basis_list.
getLeader());
1103 collection.
takebackResource(atom_basis_list.template getCastedLeader<SoaAtomicBasisSet>().mw_mem_handle_);
1116 return std::make_unique<SoaAtomicBSetMultiWalkerMem>(*this);
1170 template<
typename COT>
1172 template<
typename COT>
RealType Rmax
maximum radius of this center
typename ROT::GridType GridType
void setCenter(int c, int offset)
set the current offset
void evaluateVGH(const LAT &lattice, const T r, const PosType &dr, const size_t offset, VGH &vgh, PosType Tv)
Declaration of OptimizableObject.
NewTimer & nelec_pbc_timer_
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
void takebackResource(ResourceHandle< RS > &res_handle)
SoaAtomicBSetMultiWalkerMem()
helper functions for EinsplineSetBuilder
TinyVector< double, 3 > SuperTwist
Coordinates of SuperTwist.
const std::shared_ptr< OffloadIntVector > NL_ptr_
index of the corresponding radial orbital with quantum numbers
std::vector< QuantumNumberType > RnlID
container for the quantum-numbers
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< SoaAtomicBasisSet > &atom_basis_list) const
ResourceHandle manages the temporary resource referenced from a collection.
Build a set of radial orbitals at the origin.
constexpr std::complex< float > cone
int BasisSetSize
size of the basis set
void mw_evaluateVGL(const RefVectorWithLeader< SoaAtomicBasisSet > &atom_bs_list, const LAT &lattice, Array< VT, 3, OffloadPinnedAllocator< VT >> &psi_vgl, const Vector< RealType, OffloadPinnedAllocator< RealType >> &displ_list, const Vector< RealType, OffloadPinnedAllocator< RealType >> &Tv_list, const size_t nElec, const size_t nBasTot, const size_t center_idx, const size_t BasisOffset, const size_t NumCenters)
evaluate VGL for multiple electrons
Timer accumulates time and call counts.
void evaluateVGHGH(const LAT &lattice, const T r, const PosType &dr, const size_t offset, VGHGH &vghgh, PosType Tv)
VectorSoaContainer< RealType, 4 > tempS
temporary storage
OffloadVector correctphase
SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem &)
int getBasisSetSize() const
return the number of basis functions
TinyVector< int, 3 > PBCImages
Number of Cell images for the evaluation of the orbital with PBC. If No PBC, should be 0;...
std::unique_ptr< Resource > makeClone() const override
void createResource(ResourceCollection &collection) const
SH Ylm
spherical harmonics
CrystalLattice< OHMMS_PRECISION, OHMMS_DIM > lattice
multi walker shared memory buffer
const std::shared_ptr< OffloadIntVector > LM_ptr_
index of the corresponding real Spherical Harmonic with quantum numbers
size_type size() const
return the current size
QTBase::ValueType ValueType
OffloadIntVector & NL
reference to NL_ptr_
void finalize()
implement a BasisSetBase virtual function
std::shared_ptr< OffloadVector > periodic_image_phase_factors_ptr_
Phase Factor array of images.
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
void setPBCParams(const TinyVector< int, 3 > &pbc_images, const TinyVector< double, 3 > supertwist, const OffloadVector &PeriodicImagePhaseFactors, const OffloadArray2D &PeriodicImageDisplacements)
Set the number of periodic image for the evaluation of the orbitals and the phase factor...
class to handle a set of variables that can be modified during optimizations
SoaAtomicBasisSet(int lmax, bool addsignforM=false)
the constructor
void setRmax(T rmax)
Set Rmax.
typename QMCTraits::ValueType ValueType
void checkOutVariables(const opt_variables_type &active)
void checkInVariables(opt_variables_type &active)
OMPallocator is an allocator with fused device and dualspace allocator functionality.
std::shared_ptr< OffloadArray2D > periodic_image_displacements_ptr_
Displacements of images.
T * data()
return the base
OffloadVector & periodic_image_phase_factors_
reference to the phase factor array of images
OffloadArray2D & periodic_image_displacements_
reference to the displacements of images
void evaluateVGL(const LAT &lattice, const T r, const PosType &dr, const size_t offset, VGL &vgl, PosType Tv)
evaluate VGL
MakeReturn< UnaryNode< FnSqrt, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t sqrt(const Vector< T1, C1 > &l)
ROT MultiRnl
radial orbitals
QMCTraits::RealType RealType
GridType
The different types of grids that we currently allow.
void resetParameters(const opt_variables_type &active)
void mw_evaluateV(const RefVectorWithLeader< SoaAtomicBasisSet > &atom_bs_list, const LAT &lattice, Array< VT, 2, OffloadPinnedAllocator< VT >> &psi, const Vector< RealType, OffloadPinnedAllocator< RealType >> &displ_list, const Vector< RealType, OffloadPinnedAllocator< RealType >> &Tv_list, const size_t nElec, const size_t nBasTot, const size_t center_idx, const size_t BasisOffset, const size_t NumCenters)
evaluate for multiple electrons
OffloadIntVector & LM
reference to LM_ptr_
void updateTo(size_t size=0, std::ptrdiff_t offset=0)
void evaluateV(const LAT &lattice, const T r, const PosType &dr, VT *restrict psi, PosType Tv)
evaluate V
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
ResourceHandle< RS > lendResource()
void updateTo(size_type size=0, std::ptrdiff_t offset=0)
void resize(size_type n)
resize myData
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< SoaAtomicBasisSet > &atom_basis_list) const
typename ROT::RealType RealType
ResourceHandle< SoaAtomicBSetMultiWalkerMem > mw_mem_handle_
multi walker resource handle
void queryOrbitalsForSType(std::vector< bool > &s_orbitals) const
Sets a boolean vector for S-type orbitals. Used for cusp correction.