QMCPACK
SoaAtomicBasisSet.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 //
9 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 /** @file SoaAtomicBasisSet.h
13  */
14 #ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSET_H
15 #define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSET_H
16 
17 #include "CPU/math.hpp"
18 #include "OptimizableObject.h"
19 #include <ResourceCollection.h>
20 
21 namespace qmcplusplus
22 {
23 /* A basis set for a center type
24  *
25  * @tparam ROT : radial function type, e.g.,NGFunctor<T>
26  * @tparam SH : spherical or carteisan Harmonics for (l,m) expansion
27  *
28  * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$
29  */
30 template<typename ROT, typename SH>
32 {
33 public:
34  using RadialOrbital_t = ROT;
35  using RealType = typename ROT::RealType;
36  using GridType = typename ROT::GridType;
37  using ValueType = typename QMCTraits::ValueType;
43 
44  ///the constructor
45  explicit SoaAtomicBasisSet(int lmax, bool addsignforM = false)
46  : Ylm(lmax, addsignforM),
51  NL_ptr_(std::make_shared<OffloadIntVector>()),
52  LM_ptr_(std::make_shared<OffloadIntVector>()),
53  NL(*NL_ptr_),
54  LM(*LM_ptr_),
55  ylm_timer_(createGlobalTimer("SoaAtomicBasisSet::Ylm", timer_level_fine)),
56  rnl_timer_(createGlobalTimer("SoaAtomicBasisSet::Rnl", timer_level_fine)),
57  pbc_timer_(createGlobalTimer("SoaAtomicBasisSet::pbc_images", timer_level_fine)),
58  nelec_pbc_timer_(createGlobalTimer("SoaAtomicBasisSet::nelec_pbc_images", timer_level_fine)),
59  phase_timer_(createGlobalTimer("SoaAtomicBasisSet::phase", timer_level_fine)),
60  psi_timer_(createGlobalTimer("SoaAtomicBasisSet::psi", timer_level_fine))
61  {}
62 
64  {
65  //for(size_t nl=0; nl<Rnl.size(); nl++)
66  // Rnl[nl]->checkInVariables(active);
67  }
68 
70  {
71  //for(size_t nl=0; nl<Rnl.size(); nl++)
72  // Rnl[nl]->checkOutVariables(active);
73  }
74 
75  void resetParameters(const opt_variables_type& active)
76  {
77  //for(size_t nl=0; nl<Rnl.size(); nl++)
78  // Rnl[nl]->resetParameters(active);
79  }
80 
81  /** return the number of basis functions
82  */
83  inline int getBasisSetSize() const
84  {
85  //=NL.size();
86  return BasisSetSize;
87  }
88 
89  /** Set the number of periodic image for the evaluation of the orbitals and the phase factor.
90  * In the case of Non-PBC, PBCImages=(1,1,1), SuperTwist(0,0,0) and the PhaseFactor=1.
91  */
92  void setPBCParams(const TinyVector<int, 3>& pbc_images,
93  const TinyVector<double, 3> supertwist,
94  const OffloadVector& PeriodicImagePhaseFactors,
95  const OffloadArray2D& PeriodicImageDisplacements)
96  {
97  PBCImages = pbc_images;
98  periodic_image_phase_factors_ = PeriodicImagePhaseFactors;
99  periodic_image_displacements_ = PeriodicImageDisplacements;
100  SuperTwist = supertwist;
101 
104  }
105 
106 
107  /** implement a BasisSetBase virtual function
108  *
109  * Set Rmax and BasisSetSize
110  * @todo Should be able to overwrite Rmax to be much smaller than the maximum grid
111  */
112  inline void finalize()
113  {
114  BasisSetSize = LM.size();
115  NL.updateTo();
116  LM.updateTo();
117  tempS.resize(std::max(Ylm.size(), RnlID.size()));
118  }
119 
120  /** Set Rmax */
121  template<typename T>
122  inline void setRmax(T rmax)
123  {
124  Rmax = (rmax > 0) ? rmax : MultiRnl.rmax();
125  }
126 
127  ///set the current offset
128  inline void setCenter(int c, int offset) {}
129 
130  /// Sets a boolean vector for S-type orbitals. Used for cusp correction.
131  void queryOrbitalsForSType(std::vector<bool>& s_orbitals) const
132  {
133  for (int i = 0; i < BasisSetSize; i++)
134  {
135  s_orbitals[i] = (RnlID[NL[i]][1] == 0);
136  }
137  }
138 
139  /** evaluate VGL
140  */
141  template<typename LAT, typename T, typename PosType, typename VGL>
142  inline void evaluateVGL(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGL& vgl, PosType Tv)
143  {
144  int TransX, TransY, TransZ;
145 
146  PosType dr_new;
147  T r_new;
148  // T psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new;
149 
150 #if not defined(QMC_COMPLEX)
151  const ValueType correctphase = 1;
152 #else
153 
154  RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2];
155  RealType s, c;
156  qmcplusplus::sincos(-phasearg, &s, &c);
157  const ValueType correctphase(c, s);
158 #endif
159 
160  constexpr T cone(1);
161  constexpr T ctwo(2);
162 
163 
164  //one can assert the alignment
165  RealType* restrict phi = tempS.data(0);
166  RealType* restrict dphi = tempS.data(1);
167  RealType* restrict d2phi = tempS.data(2);
168 
169  //V,Gx,Gy,Gz,L
170  auto* restrict psi = vgl.data(0) + offset;
171  const T* restrict ylm_v = Ylm[0]; //value
172  auto* restrict dpsi_x = vgl.data(1) + offset;
173  const T* restrict ylm_x = Ylm[1]; //gradX
174  auto* restrict dpsi_y = vgl.data(2) + offset;
175  const T* restrict ylm_y = Ylm[2]; //gradY
176  auto* restrict dpsi_z = vgl.data(3) + offset;
177  const T* restrict ylm_z = Ylm[3]; //gradZ
178  auto* restrict d2psi = vgl.data(4) + offset;
179  const T* restrict ylm_l = Ylm[4]; //lap
180 
181  for (size_t ib = 0; ib < BasisSetSize; ++ib)
182  {
183  psi[ib] = 0;
184  dpsi_x[ib] = 0;
185  dpsi_y[ib] = 0;
186  dpsi_z[ib] = 0;
187  d2psi[ib] = 0;
188  }
189  //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax)
190  int iter = -1;
191  for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X
192  {
193  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
194  TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
195  for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y
196  {
197  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
198  TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
199  for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z
200  {
201  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
202  TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
203 
204  dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0));
205  dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1));
206  dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2));
207 
208  r_new = std::sqrt(dot(dr_new, dr_new));
209 
210  iter++;
211  if (r_new >= Rmax)
212  continue;
213 
214  //SIGN Change!!
215  const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
216  Ylm.evaluateVGL(x, y, z);
217 
218  MultiRnl.evaluate(r_new, phi, dphi, d2phi);
219 
220  const T rinv = cone / r_new;
221 
222  ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table.
223  const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase;
224 
225  for (size_t ib = 0; ib < BasisSetSize; ++ib)
226  {
227  const int nl(NL[ib]);
228  const int lm(LM[ib]);
229  const T drnloverr = rinv * dphi[nl];
230  const T ang = ylm_v[lm];
231  const T gr_x = drnloverr * x;
232  const T gr_y = drnloverr * y;
233  const T gr_z = drnloverr * z;
234  const T ang_x = ylm_x[lm];
235  const T ang_y = ylm_y[lm];
236  const T ang_z = ylm_z[lm];
237  const T vr = phi[nl];
238 
239  psi[ib] += ang * vr * Phase;
240  dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
241  dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
242  dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
243  d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) +
244  vr * ylm_l[lm]) *
245  Phase;
246  }
247  }
248  }
249  }
250  }
251 
252  template<typename LAT, typename T, typename PosType, typename VGH>
253  inline void evaluateVGH(const LAT& lattice, const T r, const PosType& dr, const size_t offset, VGH& vgh, PosType Tv)
254  {
255  int TransX, TransY, TransZ;
256 
257  PosType dr_new;
258  T r_new;
259 
260  //This is needed because of mixed precision builds. Putting 1.0 and 2.0 directly into final
261  //dpsi and dhpsi assignments at the end of this function can occasionally cause whole RHS
262  //to be typed as double, which is bad.
263  constexpr T cone(1.);
264  constexpr T ctwo(2.);
265 
266 #if not defined(QMC_COMPLEX)
267  const ValueType correctphase = 1;
268 #else
269 
270  RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2];
271  RealType s, c;
272  qmcplusplus::sincos(-phasearg, &s, &c);
273  const ValueType correctphase(c, s);
274 #endif
275 
276  //one can assert the alignment
277  RealType* restrict phi = tempS.data(0);
278  RealType* restrict dphi = tempS.data(1);
279  RealType* restrict d2phi = tempS.data(2);
280 
281  //V,Gx,Gy,Gz,L
282  auto* restrict psi = vgh.data(0) + offset;
283  const T* restrict ylm_v = Ylm[0]; //value
284  auto* restrict dpsi_x = vgh.data(1) + offset;
285  const T* restrict ylm_x = Ylm[1]; //gradX
286  auto* restrict dpsi_y = vgh.data(2) + offset;
287  const T* restrict ylm_y = Ylm[2]; //gradY
288  auto* restrict dpsi_z = vgh.data(3) + offset;
289  const T* restrict ylm_z = Ylm[3]; //gradZ
290 
291  auto* restrict dhpsi_xx = vgh.data(4) + offset;
292  const T* restrict ylm_xx = Ylm[4];
293  auto* restrict dhpsi_xy = vgh.data(5) + offset;
294  const T* restrict ylm_xy = Ylm[5];
295  auto* restrict dhpsi_xz = vgh.data(6) + offset;
296  const T* restrict ylm_xz = Ylm[6];
297  auto* restrict dhpsi_yy = vgh.data(7) + offset;
298  const T* restrict ylm_yy = Ylm[7];
299  auto* restrict dhpsi_yz = vgh.data(8) + offset;
300  const T* restrict ylm_yz = Ylm[8];
301  auto* restrict dhpsi_zz = vgh.data(9) + offset;
302  const T* restrict ylm_zz = Ylm[9];
303 
304  for (size_t ib = 0; ib < BasisSetSize; ++ib)
305  {
306  psi[ib] = 0;
307  dpsi_x[ib] = 0;
308  dpsi_y[ib] = 0;
309  dpsi_z[ib] = 0;
310  dhpsi_xx[ib] = 0;
311  dhpsi_xy[ib] = 0;
312  dhpsi_xz[ib] = 0;
313  dhpsi_yy[ib] = 0;
314  dhpsi_yz[ib] = 0;
315  dhpsi_zz[ib] = 0;
316  // d2psi[ib] = 0;
317  }
318 
319  //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax)
320  int iter = -1;
321  for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X
322  {
323  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
324  TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
325  for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y
326  {
327  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
328  TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
329  for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z
330  {
331  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
332  TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
333  dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
334  dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
335  dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
336  r_new = std::sqrt(dot(dr_new, dr_new));
337 
338  //const size_t ib_max=NL.size();
339  iter++;
340  if (r_new >= Rmax)
341  continue;
342 
343  //SIGN Change!!
344  const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
345  Ylm.evaluateVGH(x, y, z);
346 
347  MultiRnl.evaluate(r_new, phi, dphi, d2phi);
348 
349  const T rinv = cone / r_new;
350 
351  ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table.
352  const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase;
353 
354  for (size_t ib = 0; ib < BasisSetSize; ++ib)
355  {
356  const int nl(NL[ib]);
357  const int lm(LM[ib]);
358  const T drnloverr = rinv * dphi[nl];
359  const T ang = ylm_v[lm];
360  const T gr_x = drnloverr * x;
361  const T gr_y = drnloverr * y;
362  const T gr_z = drnloverr * z;
363 
364  //The non-strictly diagonal term in \partial_i \partial_j R_{nl} is
365  // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial R_{nl}}{\partial r})
366  // To save recomputation, I evaluate everything except the x_i*x_j term once, and store it in
367  // gr2_tmp. The full term is obtained by x_i*x_j*gr2_tmp.
368  const T gr2_tmp = rinv * rinv * (d2phi[nl] - drnloverr);
369  const T gr_xx = x * x * gr2_tmp + drnloverr;
370  const T gr_xy = x * y * gr2_tmp;
371  const T gr_xz = x * z * gr2_tmp;
372  const T gr_yy = y * y * gr2_tmp + drnloverr;
373  const T gr_yz = y * z * gr2_tmp;
374  const T gr_zz = z * z * gr2_tmp + drnloverr;
375 
376  const T ang_x = ylm_x[lm];
377  const T ang_y = ylm_y[lm];
378  const T ang_z = ylm_z[lm];
379  const T ang_xx = ylm_xx[lm];
380  const T ang_xy = ylm_xy[lm];
381  const T ang_xz = ylm_xz[lm];
382  const T ang_yy = ylm_yy[lm];
383  const T ang_yz = ylm_yz[lm];
384  const T ang_zz = ylm_zz[lm];
385 
386  const T vr = phi[nl];
387 
388  psi[ib] += ang * vr * Phase;
389  dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
390  dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
391  dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
392 
393 
394  // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j R + R \partial_i \partial_j Y
395  // + (\partial_i R) (\partial_j Y) + (\partial_j R)(\partial_i Y)
396  dhpsi_xx[ib] += (gr_xx * ang + ang_xx * vr + ctwo * gr_x * ang_x) * Phase;
397  dhpsi_xy[ib] += (gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x) * Phase;
398  dhpsi_xz[ib] += (gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x) * Phase;
399  dhpsi_yy[ib] += (gr_yy * ang + ang_yy * vr + ctwo * gr_y * ang_y) * Phase;
400  dhpsi_yz[ib] += (gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y) * Phase;
401  dhpsi_zz[ib] += (gr_zz * ang + ang_zz * vr + ctwo * gr_z * ang_z) * Phase;
402  }
403  }
404  }
405  }
406  }
407 
408  template<typename LAT, typename T, typename PosType, typename VGHGH>
409  inline void evaluateVGHGH(const LAT& lattice,
410  const T r,
411  const PosType& dr,
412  const size_t offset,
413  VGHGH& vghgh,
414  PosType Tv)
415  {
416  int TransX, TransY, TransZ;
417 
418  PosType dr_new;
419  T r_new;
420 
421  //This is needed because of mixed precision builds. Putting 1.0, 2.0, and 3.0 directly into final
422  //dpsi, dhpsi, and dghpsi assignments at the end of this function can occasionally cause whole RHS
423  //to be typed as double, which is bad.
424  constexpr T cone(1.0);
425  constexpr T ctwo(2.0);
426  constexpr T cthree(3.0);
427 
428 #if not defined(QMC_COMPLEX)
429  const ValueType correctphase = 1.0;
430 #else
431 
432  RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2];
433  RealType s, c;
434  qmcplusplus::sincos(-phasearg, &s, &c);
435  const ValueType correctphase(c, s);
436 #endif
437 
438  //one can assert the alignment
439  RealType* restrict phi = tempS.data(0);
440  RealType* restrict dphi = tempS.data(1);
441  RealType* restrict d2phi = tempS.data(2);
442  RealType* restrict d3phi = tempS.data(3);
443 
444  //V,Gx,Gy,Gz,L
445  auto* restrict psi = vghgh.data(0) + offset;
446  const T* restrict ylm_v = Ylm[0]; //value
447  auto* restrict dpsi_x = vghgh.data(1) + offset;
448  const T* restrict ylm_x = Ylm[1]; //gradX
449  auto* restrict dpsi_y = vghgh.data(2) + offset;
450  const T* restrict ylm_y = Ylm[2]; //gradY
451  auto* restrict dpsi_z = vghgh.data(3) + offset;
452  const T* restrict ylm_z = Ylm[3]; //gradZ
453 
454  auto* restrict dhpsi_xx = vghgh.data(4) + offset;
455  const T* restrict ylm_xx = Ylm[4];
456  auto* restrict dhpsi_xy = vghgh.data(5) + offset;
457  const T* restrict ylm_xy = Ylm[5];
458  auto* restrict dhpsi_xz = vghgh.data(6) + offset;
459  const T* restrict ylm_xz = Ylm[6];
460  auto* restrict dhpsi_yy = vghgh.data(7) + offset;
461  const T* restrict ylm_yy = Ylm[7];
462  auto* restrict dhpsi_yz = vghgh.data(8) + offset;
463  const T* restrict ylm_yz = Ylm[8];
464  auto* restrict dhpsi_zz = vghgh.data(9) + offset;
465  const T* restrict ylm_zz = Ylm[9];
466 
467  auto* restrict dghpsi_xxx = vghgh.data(10) + offset;
468  const T* restrict ylm_xxx = Ylm[10];
469  auto* restrict dghpsi_xxy = vghgh.data(11) + offset;
470  const T* restrict ylm_xxy = Ylm[11];
471  auto* restrict dghpsi_xxz = vghgh.data(12) + offset;
472  const T* restrict ylm_xxz = Ylm[12];
473  auto* restrict dghpsi_xyy = vghgh.data(13) + offset;
474  const T* restrict ylm_xyy = Ylm[13];
475  auto* restrict dghpsi_xyz = vghgh.data(14) + offset;
476  const T* restrict ylm_xyz = Ylm[14];
477  auto* restrict dghpsi_xzz = vghgh.data(15) + offset;
478  const T* restrict ylm_xzz = Ylm[15];
479  auto* restrict dghpsi_yyy = vghgh.data(16) + offset;
480  const T* restrict ylm_yyy = Ylm[16];
481  auto* restrict dghpsi_yyz = vghgh.data(17) + offset;
482  const T* restrict ylm_yyz = Ylm[17];
483  auto* restrict dghpsi_yzz = vghgh.data(18) + offset;
484  const T* restrict ylm_yzz = Ylm[18];
485  auto* restrict dghpsi_zzz = vghgh.data(19) + offset;
486  const T* restrict ylm_zzz = Ylm[19];
487 
488  for (size_t ib = 0; ib < BasisSetSize; ++ib)
489  {
490  psi[ib] = 0;
491 
492  dpsi_x[ib] = 0;
493  dpsi_y[ib] = 0;
494  dpsi_z[ib] = 0;
495 
496  dhpsi_xx[ib] = 0;
497  dhpsi_xy[ib] = 0;
498  dhpsi_xz[ib] = 0;
499  dhpsi_yy[ib] = 0;
500  dhpsi_yz[ib] = 0;
501  dhpsi_zz[ib] = 0;
502 
503  dghpsi_xxx[ib] = 0;
504  dghpsi_xxy[ib] = 0;
505  dghpsi_xxz[ib] = 0;
506  dghpsi_xyy[ib] = 0;
507  dghpsi_xyz[ib] = 0;
508  dghpsi_xzz[ib] = 0;
509  dghpsi_yyy[ib] = 0;
510  dghpsi_yyz[ib] = 0;
511  dghpsi_yzz[ib] = 0;
512  dghpsi_zzz[ib] = 0;
513  }
514 
515  //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax)
516  int iter = -1;
517  for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X
518  {
519  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
520  TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
521  for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y
522  {
523  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
524  TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
525  for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z
526  {
527  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
528  TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
529  dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
530  dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
531  dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
532  r_new = std::sqrt(dot(dr_new, dr_new));
533 
534  //const size_t ib_max=NL.size();
535  iter++;
536  if (r_new >= Rmax)
537  continue;
538 
539  //SIGN Change!!
540  const T x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
541  Ylm.evaluateVGHGH(x, y, z);
542 
543  MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi);
544 
545  const T rinv = cone / r_new;
546  const T xu = x * rinv, yu = y * rinv, zu = z * rinv;
547 
548  ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table.
549  const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase;
550 
551  for (size_t ib = 0; ib < BasisSetSize; ++ib)
552  {
553  const int nl(NL[ib]);
554  const int lm(LM[ib]);
555  const T drnloverr = rinv * dphi[nl];
556  const T ang = ylm_v[lm];
557  const T gr_x = drnloverr * x;
558  const T gr_y = drnloverr * y;
559  const T gr_z = drnloverr * z;
560 
561  //The non-strictly diagonal term in \partial_i \partial_j R_{nl} is
562  // \frac{x_i x_j}{r^2}\left(\frac{\partial^2 R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial R_{nl}}{\partial r})
563  // To save recomputation, I evaluate everything except the x_i*x_j term once, and store it in
564  // gr2_tmp. The full term is obtained by x_i*x_j*gr2_tmp. This is p(r) in the notes.
565  const T gr2_tmp = rinv * (d2phi[nl] - drnloverr);
566 
567  const T gr_xx = x * xu * gr2_tmp + drnloverr;
568  const T gr_xy = x * yu * gr2_tmp;
569  const T gr_xz = x * zu * gr2_tmp;
570  const T gr_yy = y * yu * gr2_tmp + drnloverr;
571  const T gr_yz = y * zu * gr2_tmp;
572  const T gr_zz = z * zu * gr2_tmp + drnloverr;
573 
574  //This is q(r) in the notes.
575  const T gr3_tmp = d3phi[nl] - cthree * gr2_tmp;
576 
577  const T gr_xxx = xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu);
578  const T gr_xxy = xu * xu * yu * gr3_tmp + gr2_tmp * yu;
579  const T gr_xxz = xu * xu * zu * gr3_tmp + gr2_tmp * zu;
580  const T gr_xyy = xu * yu * yu * gr3_tmp + gr2_tmp * xu;
581  const T gr_xyz = xu * yu * zu * gr3_tmp;
582  const T gr_xzz = xu * zu * zu * gr3_tmp + gr2_tmp * xu;
583  const T gr_yyy = yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu);
584  const T gr_yyz = yu * yu * zu * gr3_tmp + gr2_tmp * zu;
585  const T gr_yzz = yu * zu * zu * gr3_tmp + gr2_tmp * yu;
586  const T gr_zzz = zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu);
587 
588 
589  //Angular derivatives up to third
590  const T ang_x = ylm_x[lm];
591  const T ang_y = ylm_y[lm];
592  const T ang_z = ylm_z[lm];
593 
594  const T ang_xx = ylm_xx[lm];
595  const T ang_xy = ylm_xy[lm];
596  const T ang_xz = ylm_xz[lm];
597  const T ang_yy = ylm_yy[lm];
598  const T ang_yz = ylm_yz[lm];
599  const T ang_zz = ylm_zz[lm];
600 
601  const T ang_xxx = ylm_xxx[lm];
602  const T ang_xxy = ylm_xxy[lm];
603  const T ang_xxz = ylm_xxz[lm];
604  const T ang_xyy = ylm_xyy[lm];
605  const T ang_xyz = ylm_xyz[lm];
606  const T ang_xzz = ylm_xzz[lm];
607  const T ang_yyy = ylm_yyy[lm];
608  const T ang_yyz = ylm_yyz[lm];
609  const T ang_yzz = ylm_yzz[lm];
610  const T ang_zzz = ylm_zzz[lm];
611 
612  const T vr = phi[nl];
613 
614  psi[ib] += ang * vr * Phase;
615  dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
616  dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
617  dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
618 
619 
620  // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j R + R \partial_i \partial_j Y
621  // + (\partial_i R) (\partial_j Y) + (\partial_j R)(\partial_i Y)
622  dhpsi_xx[ib] += (gr_xx * ang + ang_xx * vr + ctwo * gr_x * ang_x) * Phase;
623  dhpsi_xy[ib] += (gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x) * Phase;
624  dhpsi_xz[ib] += (gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x) * Phase;
625  dhpsi_yy[ib] += (gr_yy * ang + ang_yy * vr + ctwo * gr_y * ang_y) * Phase;
626  dhpsi_yz[ib] += (gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y) * Phase;
627  dhpsi_zz[ib] += (gr_zz * ang + ang_zz * vr + ctwo * gr_z * ang_z) * Phase;
628 
629  dghpsi_xxx[ib] += (gr_xxx * ang + vr * ang_xxx + cthree * gr_xx * ang_x + cthree * gr_x * ang_xx) * Phase;
630  dghpsi_xxy[ib] += (gr_xxy * ang + vr * ang_xxy + gr_xx * ang_y + ang_xx * gr_y + ctwo * gr_xy * ang_x +
631  ctwo * ang_xy * gr_x) *
632  Phase;
633  dghpsi_xxz[ib] += (gr_xxz * ang + vr * ang_xxz + gr_xx * ang_z + ang_xx * gr_z + ctwo * gr_xz * ang_x +
634  ctwo * ang_xz * gr_x) *
635  Phase;
636  dghpsi_xyy[ib] += (gr_xyy * ang + vr * ang_xyy + gr_yy * ang_x + ang_yy * gr_x + ctwo * gr_xy * ang_y +
637  ctwo * ang_xy * gr_y) *
638  Phase;
639  dghpsi_xyz[ib] += (gr_xyz * ang + vr * ang_xyz + gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x +
640  ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y) *
641  Phase;
642  dghpsi_xzz[ib] += (gr_xzz * ang + vr * ang_xzz + gr_zz * ang_x + ang_zz * gr_x + ctwo * gr_xz * ang_z +
643  ctwo * ang_xz * gr_z) *
644  Phase;
645  dghpsi_yyy[ib] += (gr_yyy * ang + vr * ang_yyy + cthree * gr_yy * ang_y + cthree * gr_y * ang_yy) * Phase;
646  dghpsi_yyz[ib] += (gr_yyz * ang + vr * ang_yyz + gr_yy * ang_z + ang_yy * gr_z + ctwo * gr_yz * ang_y +
647  ctwo * ang_yz * gr_y) *
648  Phase;
649  dghpsi_yzz[ib] += (gr_yzz * ang + vr * ang_yzz + gr_zz * ang_y + ang_zz * gr_y + ctwo * gr_yz * ang_z +
650  ctwo * ang_yz * gr_z) *
651  Phase;
652  dghpsi_zzz[ib] += (gr_zzz * ang + vr * ang_zzz + cthree * gr_zz * ang_z + cthree * gr_z * ang_zz) * Phase;
653  }
654  }
655  }
656  }
657  }
658 
659  /** evaluate V
660  */
661  template<typename LAT, typename T, typename PosType, typename VT>
662  inline void evaluateV(const LAT& lattice, const T r, const PosType& dr, VT* restrict psi, PosType Tv)
663  {
664  int TransX, TransY, TransZ;
665 
666  PosType dr_new;
667  T r_new;
668 
669 #if not defined(QMC_COMPLEX)
670  const ValueType correctphase = 1.0;
671 #else
672 
673  RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] + SuperTwist[2] * Tv[2];
674  RealType s, c;
675  qmcplusplus::sincos(-phasearg, &s, &c);
676  const ValueType correctphase(c, s);
677 
678 #endif
679 
680  RealType* restrict ylm_v = tempS.data(0);
681  RealType* restrict phi_r = tempS.data(1);
682 
683  for (size_t ib = 0; ib < BasisSetSize; ++ib)
684  psi[ib] = 0;
685  //Phase_idx (iter) needs to be initialized at -1 as it has to be incremented first to comply with the if statement (r_new >=Rmax)
686  int iter = -1;
687  for (int i = 0; i <= PBCImages[0]; i++) //loop Translation over X
688  {
689  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
690  TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
691  for (int j = 0; j <= PBCImages[1]; j++) //loop Translation over Y
692  {
693  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
694  TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
695  for (int k = 0; k <= PBCImages[2]; k++) //loop Translation over Z
696  {
697  //Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
698  TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
699 
700  dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0));
701  dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1));
702  dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2));
703 
704  r_new = std::sqrt(dot(dr_new, dr_new));
705  iter++;
706  if (r_new >= Rmax)
707  continue;
708 
709  Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v);
710  MultiRnl.evaluate(r_new, phi_r);
711  ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table.
712  const ValueType Phase = periodic_image_phase_factors_[iter] * correctphase;
713  for (size_t ib = 0; ib < BasisSetSize; ++ib)
714  psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase;
715  }
716  }
717  }
718  }
719 
720  /**
721  * @brief evaluate VGL for multiple electrons
722  *
723  * This function should only assign to elements of psi in the range [[0:nElec],[BasisOffset:BasisOffset+BasisSetSize]].
724  * These elements are assumed to be zero when passed to this function.
725  * This function only uses only one center (center_idx) from displ_list
726  *
727  * @param [in] atom_bs_list multi-walker list of SoaAtomicBasisSet [nWalkers]
728  * @param [in] lattice crystal lattice
729  * @param [in,out] psi_vgl wavefunction vgl for all electrons [5, nElec, nBasTot]
730  * @param [in] displ_list displacement from each electron to each center [NumCenters, nElec, 3] (flattened)
731  * @param [in] Tv_list translation vectors for computing overall phase factor [NumCenters, nElec, 3] (flattened)
732  * @param [in] nElec number of electrons
733  * @param [in] nBasTot total number of basis functions represented in psi_vgl
734  * @param [in] center_idx current center index (for indexing into displ_list)
735  * @param [in] BasisOffset index of first basis function of this center (for indexing into psi_vgl)
736  * @param [in] NumCenters total number of centers in system (for indexing into displ_list)
737  *
738  */
739 
740  template<typename LAT, typename VT>
742  const LAT& lattice,
743  Array<VT, 3, OffloadPinnedAllocator<VT>>& psi_vgl,
746  const size_t nElec,
747  const size_t nBasTot,
748  const size_t center_idx,
749  const size_t BasisOffset,
750  const size_t NumCenters)
751  {
752  assert(this == &atom_bs_list.getLeader());
753  auto& atom_bs_leader = atom_bs_list.template getCastedLeader<SoaAtomicBasisSet<ROT, SH>>();
754 
755  int Nx = PBCImages[0] + 1;
756  int Ny = PBCImages[1] + 1;
757  int Nz = PBCImages[2] + 1;
758  const int Nxyz = Nx * Ny * Nz;
759 
760  assert(psi_vgl.size(0) == 5);
761  assert(psi_vgl.size(1) == nElec);
762  assert(psi_vgl.size(2) == nBasTot);
763 
764 
765  auto& ylm_vgl = atom_bs_leader.mw_mem_handle_.getResource().ylm_vgl;
766  auto& rnl_vgl = atom_bs_leader.mw_mem_handle_.getResource().rnl_vgl;
767  auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr;
768  auto& r = atom_bs_leader.mw_mem_handle_.getResource().r;
769 
770  size_t nRnl = RnlID.size();
771  size_t nYlm = Ylm.size();
772 
773  ylm_vgl.resize(5, nElec, Nxyz, nYlm);
774  rnl_vgl.resize(3, nElec, Nxyz, nRnl);
775  dr.resize(nElec, Nxyz, 3);
776  r.resize(nElec, Nxyz);
777 
778 
779  // TODO: move these outside?
780  auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase;
781  correctphase.resize(nElec);
782 
783  auto* dr_ptr = dr.data();
784  auto* r_ptr = r.data();
785 
786  auto* correctphase_ptr = correctphase.data();
787 
788  auto* Tv_list_ptr = Tv_list.data();
789  auto* displ_list_ptr = displ_list.data();
790 
791  constexpr RealType cone(1);
792  constexpr RealType ctwo(2);
793 
794  //V,Gx,Gy,Gz,L
795  auto* restrict psi_ptr = psi_vgl.data_at(0, 0, 0);
796  auto* restrict dpsi_x_ptr = psi_vgl.data_at(1, 0, 0);
797  auto* restrict dpsi_y_ptr = psi_vgl.data_at(2, 0, 0);
798  auto* restrict dpsi_z_ptr = psi_vgl.data_at(3, 0, 0);
799  auto* restrict d2psi_ptr = psi_vgl.data_at(4, 0, 0);
800 
801  {
802  ScopedTimer local_timer(phase_timer_);
803 #if not defined(QMC_COMPLEX)
804 
805  PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ")
806  for (size_t i_e = 0; i_e < nElec; i_e++)
807  correctphase_ptr[i_e] = 1.0;
808 
809 #else
810  auto* SuperTwist_ptr = SuperTwist.data();
811 
812  PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \
813  Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ")
814  for (size_t i_e = 0; i_e < nElec; i_e++)
815  {
816  //RealType phasearg = dot(3, SuperTwist.data(), 1, Tv_list.data() + 3 * i_e, 1);
817  RealType phasearg = 0;
818  for (size_t i_dim = 0; i_dim < 3; i_dim++)
819  phasearg += SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)];
820  RealType s, c;
821  qmcplusplus::sincos(-phasearg, &s, &c);
822  correctphase_ptr[i_e] = ValueType(c, s);
823  }
824 #endif
825  }
826 
827  {
828  ScopedTimer local_timer(nelec_pbc_timer_);
829  auto* periodic_image_displacements_ptr = periodic_image_displacements_.data();
830  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \
831  map(to:periodic_image_displacements_ptr[:3*Nxyz]) \
832  map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ")
833  for (size_t i_e = 0; i_e < nElec; i_e++)
834  for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
835  {
836  RealType tmp_r2 = 0.0;
837  for (size_t i_dim = 0; i_dim < 3; i_dim++)
838  {
839  dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] +
840  periodic_image_displacements_ptr[i_dim + 3 * i_xyz]);
841  tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)];
842  }
843  r_ptr[i_xyz + Nxyz * i_e] = std::sqrt(tmp_r2);
844  //printf("particle %lu image %d, %lf, %lf\n", i_e, i_xyz, tmp_r2, dr_ptr[3 * (i_xyz + Nxyz * i_e)]);
845  }
846  }
847 
848  {
849  ScopedTimer local(rnl_timer_);
850  MultiRnl.batched_evaluateVGL(r, rnl_vgl, Rmax);
851  }
852 
853  {
854  ScopedTimer local(ylm_timer_);
855  Ylm.batched_evaluateVGL(dr, ylm_vgl);
856  }
857 
858  {
859  ScopedTimer local_timer(psi_timer_);
860  auto* phase_fac_ptr = periodic_image_phase_factors_.data();
861  auto* LM_ptr = LM.data();
862  auto* NL_ptr = NL.data();
863  const int bset_size = BasisSetSize;
864 
865  RealType* restrict phi_ptr = rnl_vgl.data_at(0, 0, 0, 0);
866  RealType* restrict dphi_ptr = rnl_vgl.data_at(1, 0, 0, 0);
867  RealType* restrict d2phi_ptr = rnl_vgl.data_at(2, 0, 0, 0);
868 
869 
870  const RealType* restrict ylm_v_ptr = ylm_vgl.data_at(0, 0, 0, 0); //value
871  const RealType* restrict ylm_x_ptr = ylm_vgl.data_at(1, 0, 0, 0); //gradX
872  const RealType* restrict ylm_y_ptr = ylm_vgl.data_at(2, 0, 0, 0); //gradY
873  const RealType* restrict ylm_z_ptr = ylm_vgl.data_at(3, 0, 0, 0); //gradZ
874  const RealType* restrict ylm_l_ptr = ylm_vgl.data_at(4, 0, 0, 0); //lap
875  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \
876  map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \
877  map(to:ylm_v_ptr[:nYlm*nElec*Nxyz], ylm_x_ptr[:nYlm*nElec*Nxyz], ylm_y_ptr[:nYlm*nElec*Nxyz], ylm_z_ptr[:nYlm*nElec*Nxyz], ylm_l_ptr[:nYlm*nElec*Nxyz], \
878  phi_ptr[:nRnl*nElec*Nxyz], dphi_ptr[:nRnl*nElec*Nxyz], d2phi_ptr[:nRnl*nElec*Nxyz], \
879  psi_ptr[:nBasTot*nElec], dpsi_x_ptr[:nBasTot*nElec], dpsi_y_ptr[:nBasTot*nElec], dpsi_z_ptr[:nBasTot*nElec], d2psi_ptr[:nBasTot*nElec], \
880  correctphase_ptr[:nElec], r_ptr[:nElec*Nxyz], dr_ptr[:3*nElec*Nxyz]) ")
881  for (int i_e = 0; i_e < nElec; i_e++)
882  for (int ib = 0; ib < bset_size; ++ib)
883  {
884  const int nl(NL_ptr[ib]);
885  const int lm(LM_ptr[ib]);
886  VT psi = 0;
887  VT dpsi_x = 0;
888  VT dpsi_y = 0;
889  VT dpsi_z = 0;
890  VT d2psi = 0;
891 
892  for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
893  {
894  const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e];
895  const RealType rinv = cone / r_ptr[i_xyz + Nxyz * i_e];
896  const RealType x = dr_ptr[0 + 3 * (i_xyz + Nxyz * i_e)];
897  const RealType y = dr_ptr[1 + 3 * (i_xyz + Nxyz * i_e)];
898  const RealType z = dr_ptr[2 + 3 * (i_xyz + Nxyz * i_e)];
899  const RealType drnloverr = rinv * dphi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)];
900  const RealType ang = ylm_v_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
901  const RealType gr_x = drnloverr * x;
902  const RealType gr_y = drnloverr * y;
903  const RealType gr_z = drnloverr * z;
904  const RealType ang_x = ylm_x_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
905  const RealType ang_y = ylm_y_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
906  const RealType ang_z = ylm_z_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)];
907  const RealType vr = phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)];
908 
909  psi += ang * vr * Phase;
910  dpsi_x += (ang * gr_x + vr * ang_x) * Phase;
911  dpsi_y += (ang * gr_y + vr * ang_y) * Phase;
912  dpsi_z += (ang * gr_z + vr * ang_z) * Phase;
913  d2psi += (ang * (ctwo * drnloverr + d2phi_ptr[nl + nRnl * (i_xyz + Nxyz * i_e)]) +
914  ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) +
915  vr * ylm_l_ptr[lm + nYlm * (i_xyz + Nxyz * i_e)]) *
916  Phase;
917  }
918 
919  psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi;
920  dpsi_x_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_x;
921  dpsi_y_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_y;
922  dpsi_z_ptr[BasisOffset + ib + i_e * nBasTot] = dpsi_z;
923  d2psi_ptr[BasisOffset + ib + i_e * nBasTot] = d2psi;
924  }
925  }
926  }
927 
928  /**
929  * @brief evaluate for multiple electrons
930  *
931  * This function should only assign to elements of psi in the range [[0:nElec],[BasisOffset:BasisOffset+BasisSetSize]].
932  * These elements are assumed to be zero when passed to this function.
933  * This function only uses only one center (center_idx) from displ_list
934  *
935  * @param [in] atom_bs_list multi-walker list of SoaAtomicBasisSet [nWalkers]
936  * @param [in] lattice crystal lattice
937  * @param [in,out] psi wavefunction values for all electrons [nElec, nBasTot]
938  * @param [in] displ_list displacement from each electron to each center [NumCenters, nElec, 3] (flattened)
939  * @param [in] Tv_list translation vectors for computing overall phase factor [NumCenters, nElec, 3] (flattened)
940  * @param [in] nElec number of electrons
941  * @param [in] nBasTot total number of basis functions represented in psi
942  * @param [in] center_idx current center index (for indexing into displ_list)
943  * @param [in] BasisOffset index of first basis function of this center (for indexing into psi)
944  * @param [in] NumCenters total number of centers in system (for indexing into displ_list)
945  *
946  */
947  template<typename LAT, typename VT>
948  inline void mw_evaluateV(const RefVectorWithLeader<SoaAtomicBasisSet>& atom_bs_list,
949  const LAT& lattice,
950  Array<VT, 2, OffloadPinnedAllocator<VT>>& psi,
953  const size_t nElec,
954  const size_t nBasTot,
955  const size_t center_idx,
956  const size_t BasisOffset,
957  const size_t NumCenters)
958  {
959  assert(this == &atom_bs_list.getLeader());
960  auto& atom_bs_leader = atom_bs_list.template getCastedLeader<SoaAtomicBasisSet<ROT, SH>>();
961  //TODO: use QMCTraits::DIM instead of 3?
962  // DIM==3 is baked into so many parts here that it's probably not worth it for now
963  const int Nx = PBCImages[0] + 1;
964  const int Ny = PBCImages[1] + 1;
965  const int Nz = PBCImages[2] + 1;
966  const int Nxyz = Nx * Ny * Nz;
967  assert(psi.size(0) == nElec);
968  assert(psi.size(1) == nBasTot);
969 
970 
971  auto& ylm_v = atom_bs_leader.mw_mem_handle_.getResource().ylm_v;
972  auto& rnl_v = atom_bs_leader.mw_mem_handle_.getResource().rnl_v;
973  auto& dr = atom_bs_leader.mw_mem_handle_.getResource().dr;
974  auto& r = atom_bs_leader.mw_mem_handle_.getResource().r;
975 
976  const size_t nRnl = RnlID.size();
977  const size_t nYlm = Ylm.size();
978 
979  ylm_v.resize(nElec, Nxyz, nYlm);
980  rnl_v.resize(nElec, Nxyz, nRnl);
981  dr.resize(nElec, Nxyz, 3);
982  r.resize(nElec, Nxyz);
983 
984  // TODO: move these outside?
985  auto& correctphase = atom_bs_leader.mw_mem_handle_.getResource().correctphase;
986  correctphase.resize(nElec);
987 
988  auto* dr_ptr = dr.data();
989  auto* r_ptr = r.data();
990 
991  auto* correctphase_ptr = correctphase.data();
992 
993  auto* Tv_list_ptr = Tv_list.data();
994  auto* displ_list_ptr = displ_list.data();
995 
996  // need to map Tensor<T,3> vals to device
997  auto* latR_ptr = lattice.R.data();
998 
999 
1000  {
1001  ScopedTimer local_timer(phase_timer_);
1002 #if not defined(QMC_COMPLEX)
1003 
1004  PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:correctphase_ptr[:nElec]) ")
1005  for (size_t i_e = 0; i_e < nElec; i_e++)
1006  correctphase_ptr[i_e] = 1.0;
1007 
1008 #else
1009  auto* SuperTwist_ptr = SuperTwist.data();
1010 
1011  PRAGMA_OFFLOAD("omp target teams distribute parallel for map(to:SuperTwist_ptr[:SuperTwist.size()], \
1012  Tv_list_ptr[3*nElec*center_idx:3*nElec], correctphase_ptr[:nElec]) ")
1013  for (size_t i_e = 0; i_e < nElec; i_e++)
1014  {
1015  //RealType phasearg = dot(3, SuperTwist.data(), 1, Tv_list.data() + 3 * i_e, 1);
1016  RealType phasearg = 0;
1017  for (size_t i_dim = 0; i_dim < 3; i_dim++)
1018  phasearg += SuperTwist[i_dim] * Tv_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)];
1019  RealType s, c;
1020  qmcplusplus::sincos(-phasearg, &s, &c);
1021  correctphase_ptr[i_e] = ValueType(c, s);
1022  }
1023 #endif
1024  }
1025 
1026  {
1027  ScopedTimer local_timer(nelec_pbc_timer_);
1028  auto* periodic_image_displacements_ptr = periodic_image_displacements_.data();
1029  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \
1030  map(to:periodic_image_displacements_ptr[:3*Nxyz]) \
1031  map(to: dr_ptr[:3*nElec*Nxyz], r_ptr[:nElec*Nxyz], displ_list_ptr[3*nElec*center_idx:3*nElec]) ")
1032  for (size_t i_e = 0; i_e < nElec; i_e++)
1033  for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
1034  {
1035  RealType tmp_r2 = 0.0;
1036  for (size_t i_dim = 0; i_dim < 3; i_dim++)
1037  {
1038  dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] = -(displ_list_ptr[i_dim + 3 * (i_e + center_idx * nElec)] +
1039  periodic_image_displacements_ptr[i_dim + 3 * i_xyz]);
1040  tmp_r2 += dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)] * dr_ptr[i_dim + 3 * (i_xyz + Nxyz * i_e)];
1041  }
1042  r_ptr[i_xyz + Nxyz * i_e] = std::sqrt(tmp_r2);
1043  }
1044  }
1045 
1046 
1047  {
1048  ScopedTimer local(rnl_timer_);
1049  MultiRnl.batched_evaluate(r, rnl_v, Rmax);
1050  }
1051 
1052  {
1053  ScopedTimer local(ylm_timer_);
1054  Ylm.batched_evaluateV(dr, ylm_v);
1055  }
1056 
1057  {
1058  ScopedTimer local_timer(psi_timer_);
1059  ///Phase for PBC containing the phase for the nearest image displacement and the correction due to the Distance table.
1060  auto* phase_fac_ptr = periodic_image_phase_factors_.data();
1061  auto* LM_ptr = LM.data();
1062  auto* NL_ptr = NL.data();
1063  auto* psi_ptr = psi.data();
1064  const int bset_size = BasisSetSize;
1065 
1066  auto* ylm_ptr = ylm_v.data();
1067  auto* rnl_ptr = rnl_v.data();
1068  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) \
1069  map(to:phase_fac_ptr[:Nxyz], LM_ptr[:BasisSetSize], NL_ptr[:BasisSetSize]) \
1070  map(to:ylm_ptr[:nYlm*nElec*Nxyz], rnl_ptr[:nRnl*nElec*Nxyz], psi_ptr[:nBasTot*nElec], correctphase_ptr[:nElec])")
1071  for (int i_e = 0; i_e < nElec; i_e++)
1072  for (int ib = 0; ib < bset_size; ++ib)
1073  {
1074  VT psi = 0;
1075  for (int i_xyz = 0; i_xyz < Nxyz; i_xyz++)
1076  {
1077  const ValueType Phase = phase_fac_ptr[i_xyz] * correctphase_ptr[i_e];
1078  psi += ylm_ptr[(i_xyz + Nxyz * i_e) * nYlm + LM_ptr[ib]] *
1079  rnl_ptr[(i_xyz + Nxyz * i_e) * nRnl + NL_ptr[ib]] * Phase;
1080  }
1081  psi_ptr[BasisOffset + ib + i_e * nBasTot] = psi;
1082  }
1083  }
1084  }
1085 
1086  void createResource(ResourceCollection& collection) const
1087  {
1088  collection.addResource(std::make_unique<SoaAtomicBSetMultiWalkerMem>());
1089  }
1090 
1092  const RefVectorWithLeader<SoaAtomicBasisSet>& atom_basis_list) const
1093  {
1094  assert(this == &atom_basis_list.getLeader());
1095  atom_basis_list.template getCastedLeader<SoaAtomicBasisSet>().mw_mem_handle_ =
1097  }
1098 
1100  const RefVectorWithLeader<SoaAtomicBasisSet>& atom_basis_list) const
1101  {
1102  assert(this == &atom_basis_list.getLeader());
1103  collection.takebackResource(atom_basis_list.template getCastedLeader<SoaAtomicBasisSet>().mw_mem_handle_);
1104  }
1105 
1106 private:
1107  /// multi walker shared memory buffer
1109  {
1110  SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {}
1111 
1113 
1114  std::unique_ptr<Resource> makeClone() const override
1115  {
1116  return std::make_unique<SoaAtomicBSetMultiWalkerMem>(*this);
1117  }
1118 
1119  OffloadArray4D ylm_vgl; // [5][Nelec][PBC][NYlm]
1120  OffloadArray4D rnl_vgl; // [5][Nelec][PBC][NRnl]
1121  OffloadArray3D ylm_v; // [Nelec][PBC][NYlm]
1122  OffloadArray3D rnl_v; // [Nelec][PBC][NRnl]
1123  OffloadArray3D dr; // [Nelec][PBC][xyz] ion->elec displacement for each image
1124  OffloadArray2D r; // [Nelec][PBC] ion->elec distance for each image
1125  OffloadVector correctphase; // [Nelec] overall phase
1126  };
1127 
1128  /// multi walker resource handle
1130  ///size of the basis set
1132  ///Number of Cell images for the evaluation of the orbital with PBC. If No PBC, should be 0;
1134  ///Coordinates of SuperTwist
1136  ///maximum radius of this center
1138  ///spherical harmonics
1139  SH Ylm;
1140  ///radial orbitals
1142  ///container for the quantum-numbers
1143  std::vector<QuantumNumberType> RnlID;
1144  ///temporary storage
1146  ///Phase Factor array of images
1147  std::shared_ptr<OffloadVector> periodic_image_phase_factors_ptr_;
1148  ///Displacements of images
1149  std::shared_ptr<OffloadArray2D> periodic_image_displacements_ptr_;
1150  ///reference to the phase factor array of images
1152  ///reference to the displacements of images
1154  /**index of the corresponding radial orbital with quantum numbers \f$ (n,l) \f$ */
1155  const std::shared_ptr<OffloadIntVector> NL_ptr_;
1156  ///index of the corresponding real Spherical Harmonic with quantum numbers \f$ (l,m) \f$
1157  const std::shared_ptr<OffloadIntVector> LM_ptr_;
1158  /// reference to NL_ptr_
1160  /// reference to LM_ptr_
1162  // timers
1169 
1170  template<typename COT>
1171  friend class AOBasisBuilder;
1172  template<typename COT>
1174 };
1175 
1176 } // namespace qmcplusplus
1177 #endif
RealType Rmax
maximum radius of this center
typename ROT::GridType GridType
void setCenter(int c, int offset)
set the current offset
void evaluateVGH(const LAT &lattice, const T r, const PosType &dr, const size_t offset, VGH &vgh, PosType Tv)
Declaration of OptimizableObject.
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
void takebackResource(ResourceHandle< RS > &res_handle)
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
TinyVector< double, 3 > SuperTwist
Coordinates of SuperTwist.
const std::shared_ptr< OffloadIntVector > NL_ptr_
index of the corresponding radial orbital with quantum numbers
std::vector< QuantumNumberType > RnlID
container for the quantum-numbers
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< SoaAtomicBasisSet > &atom_basis_list) const
ResourceHandle manages the temporary resource referenced from a collection.
Type_t * data()
Definition: OhmmsArray.h:87
Build a set of radial orbitals at the origin.
constexpr std::complex< float > cone
Definition: BLAS.hpp:50
int BasisSetSize
size of the basis set
void mw_evaluateVGL(const RefVectorWithLeader< SoaAtomicBasisSet > &atom_bs_list, const LAT &lattice, Array< VT, 3, OffloadPinnedAllocator< VT >> &psi_vgl, const Vector< RealType, OffloadPinnedAllocator< RealType >> &displ_list, const Vector< RealType, OffloadPinnedAllocator< RealType >> &Tv_list, const size_t nElec, const size_t nBasTot, const size_t center_idx, const size_t BasisOffset, const size_t NumCenters)
evaluate VGL for multiple electrons
Timer accumulates time and call counts.
Definition: NewTimer.h:135
void evaluateVGHGH(const LAT &lattice, const T r, const PosType &dr, const size_t offset, VGHGH &vghgh, PosType Tv)
VectorSoaContainer< RealType, 4 > tempS
temporary storage
SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem &)
int getBasisSetSize() const
return the number of basis functions
TinyVector< int, 3 > PBCImages
Number of Cell images for the evaluation of the orbital with PBC. If No PBC, should be 0;...
std::unique_ptr< Resource > makeClone() const override
void createResource(ResourceCollection &collection) const
CrystalLattice< OHMMS_PRECISION, OHMMS_DIM > lattice
atomic basisset builder
const std::shared_ptr< OffloadIntVector > LM_ptr_
index of the corresponding real Spherical Harmonic with quantum numbers
size_type size() const
return the current size
Definition: OhmmsVector.h:162
QTBase::ValueType ValueType
Definition: Configuration.h:60
OffloadIntVector & NL
reference to NL_ptr_
void finalize()
implement a BasisSetBase virtual function
std::shared_ptr< OffloadVector > periodic_image_phase_factors_ptr_
Phase Factor array of images.
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
void setPBCParams(const TinyVector< int, 3 > &pbc_images, const TinyVector< double, 3 > supertwist, const OffloadVector &PeriodicImagePhaseFactors, const OffloadArray2D &PeriodicImageDisplacements)
Set the number of periodic image for the evaluation of the orbitals and the phase factor...
class to handle a set of variables that can be modified during optimizations
Definition: VariableSet.h:49
SoaAtomicBasisSet(int lmax, bool addsignforM=false)
the constructor
void setRmax(T rmax)
Set Rmax.
typename QMCTraits::ValueType ValueType
void checkOutVariables(const opt_variables_type &active)
void checkInVariables(opt_variables_type &active)
OMPallocator is an allocator with fused device and dualspace allocator functionality.
std::shared_ptr< OffloadArray2D > periodic_image_displacements_ptr_
Displacements of images.
OffloadVector & periodic_image_phase_factors_
reference to the phase factor array of images
OffloadArray2D & periodic_image_displacements_
reference to the displacements of images
void evaluateVGL(const LAT &lattice, const T r, const PosType &dr, const size_t offset, VGL &vgl, PosType Tv)
evaluate VGL
MakeReturn< UnaryNode< FnSqrt, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t sqrt(const Vector< T1, C1 > &l)
QMCTraits::RealType RealType
GridType
The different types of grids that we currently allow.
Definition: Grid.h:29
void resetParameters(const opt_variables_type &active)
void mw_evaluateV(const RefVectorWithLeader< SoaAtomicBasisSet > &atom_bs_list, const LAT &lattice, Array< VT, 2, OffloadPinnedAllocator< VT >> &psi, const Vector< RealType, OffloadPinnedAllocator< RealType >> &displ_list, const Vector< RealType, OffloadPinnedAllocator< RealType >> &Tv_list, const size_t nElec, const size_t nBasTot, const size_t center_idx, const size_t BasisOffset, const size_t NumCenters)
evaluate for multiple electrons
OffloadIntVector & LM
reference to LM_ptr_
void updateTo(size_t size=0, std::ptrdiff_t offset=0)
Definition: OhmmsArray.h:224
void evaluateV(const LAT &lattice, const T r, const PosType &dr, VT *restrict psi, PosType Tv)
evaluate V
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
ResourceHandle< RS > lendResource()
void updateTo(size_type size=0, std::ptrdiff_t offset=0)
Definition: OhmmsVector.h:263
void resize(size_type n)
resize myData
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< SoaAtomicBasisSet > &atom_basis_list) const
typename ROT::RealType RealType
ResourceHandle< SoaAtomicBSetMultiWalkerMem > mw_mem_handle_
multi walker resource handle
void queryOrbitalsForSType(std::vector< bool > &s_orbitals) const
Sets a boolean vector for S-type orbitals. Used for cusp correction.