QMCPACK
SplineC2C.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
8 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
9 //
10 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 
14 #include <complex>
15 #include "Concurrency/OpenMP.h"
16 #include "SplineC2C.h"
17 #include "spline2/MultiBsplineEval.hpp"
19 #include "CPU/math.hpp"
21 #include "CPU/BLAS.hpp"
22 
23 namespace qmcplusplus
24 {
25 template<typename ST>
26 SplineC2C<ST>::SplineC2C(const SplineC2C& in) = default;
27 
28 template<typename ST>
30  SingleSplineType* spline_i,
31  int twist,
32  int ispline,
33  int level)
34 {
35  SplineInst->copy_spline(spline_r, 2 * ispline);
36  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
37 }
38 
39 template<typename ST>
41 {
42  std::ostringstream o;
43  o << "spline_" << MyIndex;
44  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
45  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
46 }
47 
48 template<typename ST>
50 {
51  std::ostringstream o;
52  o << "spline_" << MyIndex;
53  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
54  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
55 }
56 
57 template<typename ST>
59 {
60  const auto spline_ptr = SplineInst->getSplinePtr();
61  const auto coefs_tot_size = spline_ptr->coefs_size;
62  coef_copy_ = std::make_shared<std::vector<ST>>(coefs_tot_size);
63 
64  std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
65 }
66 
67 /*
68  ~~ Notes for rotation ~~
69  spl_coefs = Raw pointer to spline coefficients
70  basis_set_size = Number of spline coefs per orbital
71  OrbitalSetSize = Number of orbitals (excluding padding)
72 
73  spl_coefs has a complicated layout depending on dimensionality of splines.
74  Luckily, for our purposes, we can think of spl_coefs as pointing to a
75  matrix of size BasisSetSize x (OrbitalSetSize + padding), with the spline
76  index adjacent in memory. The orbital index is SIMD aligned and therefore
77  may include padding.
78 
79  As a result, due to SIMD alignment, Nsplines may be larger than the
80  actual number of splined orbitals. This means that in practice rot_mat
81  may be smaller than the number of 'columns' in the coefs array!
82 
83  SplineR2R spl_coef layout:
84  ^ | sp1 | ... | spN | pad |
85  | |=====|=====|=====|=====|
86  | | c11 | ... | c1N | 0 |
87  basis_set_size | c21 | ... | c2N | 0 |
88  | | ... | ... | ... | 0 |
89  | | cM1 | ... | cMN | 0 |
90  v |=====|=====|=====|=====|
91  <------ Nsplines ------>
92 
93  SplineC2C spl_coef layout:
94  ^ | sp1_r | sp1_i | ... | spN_r | spN_i | pad |
95  | |=======|=======|=======|=======|=======|=======|
96  | | c11_r | c11_i | ... | c1N_r | c1N_i | 0 |
97  basis_set_size | c21_r | c21_i | ... | c2N_r | c2N_i | 0 |
98  | | ... | ... | ... | ... | ... | ... |
99  | | cM1_r | cM1_i | ... | cMN_r | cMN_i | 0 |
100  v |=======|=======|=======|=======|=======|=======|
101  <------------------ Nsplines ------------------>
102 
103  NB: For splines (typically) BasisSetSize >> OrbitalSetSize, so the spl_coefs
104  "matrix" is very tall and skinny.
105 */
106 template<typename ST>
107 void SplineC2C<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
108 {
109  // SplineInst is a MultiBspline. See src/spline2/MultiBspline.hpp
110  const auto spline_ptr = SplineInst->getSplinePtr();
111  assert(spline_ptr != nullptr);
112  const auto spl_coefs = spline_ptr->coefs;
113  const auto Nsplines = spline_ptr->num_splines; // May include padding
114  const auto coefs_tot_size = spline_ptr->coefs_size;
115  const auto basis_set_size = coefs_tot_size / Nsplines;
116  assert(OrbitalSetSize == rot_mat.rows());
117  assert(OrbitalSetSize == rot_mat.cols());
118 
119  if (!use_stored_copy)
120  {
121  assert(coef_copy_ != nullptr);
122  std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
123  }
124 
125  if constexpr (std::is_same_v<ST, RealType>)
126  {
127  //if ST is double, go ahead and use blas to make things faster
128  //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals.
129  //Also casting them as ValueType so they are complex to do the correct gemm
130  BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0, 0.0), rot_mat.data(),
131  OrbitalSetSize, (ValueType*)coef_copy_->data(), Nsplines / 2, ValueType(0.0, 0.0), (ValueType*)spl_coefs,
132  Nsplines / 2);
133  }
134  else
135  {
136  // if ST is float, RealType is double and ValueType is std::complex<double> for C2C
137  // Just use naive matrix multiplication in order to avoid losing precision on rotation matrix
138  for (IndexType i = 0; i < basis_set_size; i++)
139  for (IndexType j = 0; j < OrbitalSetSize; j++)
140  {
141  // cur_elem points to the real componend of the coefficient.
142  // Imag component is adjacent in memory.
143  const auto cur_elem = Nsplines * i + 2 * j;
144  ST newval_r{0.};
145  ST newval_i{0.};
146  for (IndexType k = 0; k < OrbitalSetSize; k++)
147  {
148  const auto index = Nsplines * i + 2 * k;
149  ST zr = (*coef_copy_)[index];
150  ST zi = (*coef_copy_)[index + 1];
151  ST wr = rot_mat[k][j].real();
152  ST wi = rot_mat[k][j].imag();
153  newval_r += zr * wr - zi * wi;
154  newval_i += zr * wi + zi * wr;
155  }
156  spl_coefs[cur_elem] = newval_r;
157  spl_coefs[cur_elem + 1] = newval_i;
158  }
159  }
160 }
161 
162 template<typename ST>
163 inline void SplineC2C<ST>::assign_v(const PointType& r,
164  const vContainer_type& myV,
165  ValueVector& psi,
166  int first,
167  int last) const
168 {
169  // protect last
170  const size_t last_cplx = std::min(kPoints.size(), psi.size());
171  last = last > last_cplx ? last_cplx : last;
172 
173  const ST x = r[0], y = r[1], z = r[2];
174  const ST* restrict kx = myKcart.data(0);
175  const ST* restrict ky = myKcart.data(1);
176  const ST* restrict kz = myKcart.data(2);
177 #pragma omp simd
178  for (size_t j = first; j < last; ++j)
179  {
180  ST s, c;
181  const ST val_r = myV[2 * j];
182  const ST val_i = myV[2 * j + 1];
183  qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
184  psi[j + first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s);
185  }
186 }
187 
188 template<typename ST>
189 void SplineC2C<ST>::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi)
190 {
191  const PointType& r = P.activeR(iat);
192  PointType ru(PrimLattice.toUnit_floor(r));
193 
194 #pragma omp parallel
195  {
196  int first, last;
197  // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
198  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
199 
200  spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
201  assign_v(r, myV, psi, first / 2, last / 2);
202  }
203 }
204 
205 template<typename ST>
207  ValueVector& psi,
208  const ValueVector& psiinv,
209  std::vector<ValueType>& ratios)
210 {
211  const bool need_resize = ratios_private.rows() < VP.getTotalNum();
212 
213 #pragma omp parallel
214  {
215  int tid = omp_get_thread_num();
216  // initialize thread private ratios
217  if (need_resize)
218  {
219  if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime
220  ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
221 #pragma omp barrier
222  }
223  int first, last;
224  // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
225  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), tid, first, last);
226  const int first_cplx = first / 2;
227  const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2;
228 
229  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
230  {
231  const PointType& r = VP.activeR(iat);
232  PointType ru(PrimLattice.toUnit_floor(r));
233 
234  spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
235  assign_v(r, myV, psi, first_cplx, last_cplx);
236  ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx);
237  }
238  }
239 
240  // do the reduction manually
241  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
242  {
243  ratios[iat] = ComplexT(0);
244  for (int tid = 0; tid < ratios_private.cols(); tid++)
245  ratios[iat] += ratios_private[iat][tid];
246  }
247 }
248 
249 /** assign_vgl
250  */
251 template<typename ST>
253  ValueVector& psi,
254  GradVector& dpsi,
255  ValueVector& d2psi,
256  int first,
257  int last) const
258 {
259  // protect last
260  const int last_cplx = std::min(kPoints.size(), psi.size());
261  last = last > last_cplx ? last_cplx : last;
262 
263  constexpr ST zero(0);
264  constexpr ST two(2);
265  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
266  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
267  g22 = PrimLattice.G(8);
268  const ST x = r[0], y = r[1], z = r[2];
269  const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]};
270 
271  const ST* restrict k0 = myKcart.data(0);
272  const ST* restrict k1 = myKcart.data(1);
273  const ST* restrict k2 = myKcart.data(2);
274 
275  const ST* restrict g0 = myG.data(0);
276  const ST* restrict g1 = myG.data(1);
277  const ST* restrict g2 = myG.data(2);
278  const ST* restrict h00 = myH.data(0);
279  const ST* restrict h01 = myH.data(1);
280  const ST* restrict h02 = myH.data(2);
281  const ST* restrict h11 = myH.data(3);
282  const ST* restrict h12 = myH.data(4);
283  const ST* restrict h22 = myH.data(5);
284 
285 #pragma omp simd
286  for (size_t j = first; j < last; ++j)
287  {
288  const size_t jr = j << 1;
289  const size_t ji = jr + 1;
290 
291  const ST kX = k0[j];
292  const ST kY = k1[j];
293  const ST kZ = k2[j];
294  const ST val_r = myV[jr];
295  const ST val_i = myV[ji];
296 
297  //phase
298  ST s, c;
299  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
300 
301  //dot(PrimLattice.G,myG[j])
302  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
303  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
304  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
305 
306  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
307  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
308  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
309 
310  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
311  const ST gX_r = dX_r + val_i * kX;
312  const ST gY_r = dY_r + val_i * kY;
313  const ST gZ_r = dZ_r + val_i * kZ;
314  const ST gX_i = dX_i - val_r * kX;
315  const ST gY_i = dY_i - val_r * kY;
316  const ST gZ_i = dZ_i - val_r * kZ;
317 
318  const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG);
319  const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG);
320  const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
321  const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
322  const size_t psiIndex = j + first_spo;
323  psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
324  dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
325  dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
326  dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
327  d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
328  }
329 }
330 
331 /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
332  */
333 template<typename ST>
335 {
336  constexpr ST two(2);
337  const ST x = r[0], y = r[1], z = r[2];
338 
339  const ST* restrict k0 = myKcart.data(0);
340  const ST* restrict k1 = myKcart.data(1);
341  const ST* restrict k2 = myKcart.data(2);
342 
343  const ST* restrict g0 = myG.data(0);
344  const ST* restrict g1 = myG.data(1);
345  const ST* restrict g2 = myG.data(2);
346 
347  const size_t last_cplx = last_spo > psi.size() ? psi.size() : last_spo;
348  const size_t N = last_cplx - first_spo;
349 #pragma omp simd
350  for (size_t j = 0; j < N; ++j)
351  {
352  const size_t jr = j << 1;
353  const size_t ji = jr + 1;
354 
355  const ST kX = k0[j];
356  const ST kY = k1[j];
357  const ST kZ = k2[j];
358  const ST val_r = myV[jr];
359  const ST val_i = myV[ji];
360 
361  //phase
362  ST s, c;
363  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
364 
365  //dot(PrimLattice.G,myG[j])
366  const ST dX_r = g0[jr];
367  const ST dY_r = g1[jr];
368  const ST dZ_r = g2[jr];
369 
370  const ST dX_i = g0[ji];
371  const ST dY_i = g1[ji];
372  const ST dZ_i = g2[ji];
373 
374  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
375  const ST gX_r = dX_r + val_i * kX;
376  const ST gY_r = dY_r + val_i * kY;
377  const ST gZ_r = dZ_r + val_i * kZ;
378  const ST gX_i = dX_i - val_r * kX;
379  const ST gY_i = dY_i - val_r * kY;
380  const ST gZ_i = dZ_i - val_r * kZ;
381 
382  const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
383  const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
384 
385  const size_t psiIndex = j + first_spo;
386  psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
387  dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
388  dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
389  dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
390  d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
391  }
392 }
393 
394 template<typename ST>
396  const int iat,
397  ValueVector& psi,
398  GradVector& dpsi,
399  ValueVector& d2psi)
400 {
401  const PointType& r = P.activeR(iat);
402  PointType ru(PrimLattice.toUnit_floor(r));
403 
404 #pragma omp parallel
405  {
406  int first, last;
407  // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
408  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
409 
410  spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
411  assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2);
412  }
413 }
414 
415 template<typename ST>
417  ValueVector& psi,
418  GradVector& dpsi,
419  HessVector& grad_grad_psi,
420  int first,
421  int last) const
422 {
423  // protect last
424  const size_t last_cplx = std::min(kPoints.size(), psi.size());
425  last = last > last_cplx ? last_cplx : last;
426 
427  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
428  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
429  g22 = PrimLattice.G(8);
430  const ST x = r[0], y = r[1], z = r[2];
431 
432  const ST* restrict k0 = myKcart.data(0);
433  const ST* restrict k1 = myKcart.data(1);
434  const ST* restrict k2 = myKcart.data(2);
435 
436  const ST* restrict g0 = myG.data(0);
437  const ST* restrict g1 = myG.data(1);
438  const ST* restrict g2 = myG.data(2);
439  const ST* restrict h00 = myH.data(0);
440  const ST* restrict h01 = myH.data(1);
441  const ST* restrict h02 = myH.data(2);
442  const ST* restrict h11 = myH.data(3);
443  const ST* restrict h12 = myH.data(4);
444  const ST* restrict h22 = myH.data(5);
445 
446 #pragma omp simd
447  for (size_t j = first; j < last; ++j)
448  {
449  int jr = j << 1;
450  int ji = jr + 1;
451 
452  const ST kX = k0[j];
453  const ST kY = k1[j];
454  const ST kZ = k2[j];
455  const ST val_r = myV[jr];
456  const ST val_i = myV[ji];
457 
458  //phase
459  ST s, c;
460  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
461 
462  //dot(PrimLattice.G,myG[j])
463  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
464  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
465  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
466 
467  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
468  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
469  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
470 
471  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
472  const ST gX_r = dX_r + val_i * kX;
473  const ST gY_r = dY_r + val_i * kY;
474  const ST gZ_r = dZ_r + val_i * kZ;
475  const ST gX_i = dX_i - val_r * kX;
476  const ST gY_i = dY_i - val_r * kY;
477  const ST gZ_i = dZ_i - val_r * kZ;
478 
479  const size_t psiIndex = j + first_spo;
480  psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
481  dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
482  dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
483  dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
484 
485  const ST h_xx_r =
486  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
487  const ST h_xy_r =
488  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
489  const ST h_xz_r =
490  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
491  const ST h_yx_r =
492  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
493  const ST h_yy_r =
494  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
495  const ST h_yz_r =
496  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
497  const ST h_zx_r =
498  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
499  const ST h_zy_r =
500  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
501  const ST h_zz_r =
502  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
503 
504  const ST h_xx_i =
505  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
506  const ST h_xy_i =
507  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
508  const ST h_xz_i =
509  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
510  const ST h_yx_i =
511  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
512  const ST h_yy_i =
513  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
514  const ST h_yz_i =
515  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
516  const ST h_zx_i =
517  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
518  const ST h_zy_i =
519  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
520  const ST h_zz_i =
521  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
522 
523  grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
524  grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
525  grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
526  grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r);
527  grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
528  grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
529  grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r);
530  grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r);
531  grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
532  }
533 }
534 
535 template<typename ST>
537  const int iat,
538  ValueVector& psi,
539  GradVector& dpsi,
540  HessVector& grad_grad_psi)
541 {
542  const PointType& r = P.activeR(iat);
543  PointType ru(PrimLattice.toUnit_floor(r));
544 
545 #pragma omp parallel
546  {
547  int first, last;
548  // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
549  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
550 
551  spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
552  assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
553  }
554 }
555 
556 template<typename ST>
558  ValueVector& psi,
559  GradVector& dpsi,
560  HessVector& grad_grad_psi,
561  GGGVector& grad_grad_grad_psi,
562  int first,
563  int last) const
564 {
565  // protect last
566  const size_t last_cplx = std::min(kPoints.size(), psi.size());
567  last = last < 0 ? last_cplx : (last > last_cplx ? last_cplx : last);
568 
569  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
570  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
571  g22 = PrimLattice.G(8);
572  const ST x = r[0], y = r[1], z = r[2];
573 
574  const ST* restrict k0 = myKcart.data(0);
575  const ST* restrict k1 = myKcart.data(1);
576  const ST* restrict k2 = myKcart.data(2);
577 
578  const ST* restrict g0 = myG.data(0);
579  const ST* restrict g1 = myG.data(1);
580  const ST* restrict g2 = myG.data(2);
581  const ST* restrict h00 = myH.data(0);
582  const ST* restrict h01 = myH.data(1);
583  const ST* restrict h02 = myH.data(2);
584  const ST* restrict h11 = myH.data(3);
585  const ST* restrict h12 = myH.data(4);
586  const ST* restrict h22 = myH.data(5);
587 
588  const ST* restrict gh000 = mygH.data(0);
589  const ST* restrict gh001 = mygH.data(1);
590  const ST* restrict gh002 = mygH.data(2);
591  const ST* restrict gh011 = mygH.data(3);
592  const ST* restrict gh012 = mygH.data(4);
593  const ST* restrict gh022 = mygH.data(5);
594  const ST* restrict gh111 = mygH.data(6);
595  const ST* restrict gh112 = mygH.data(7);
596  const ST* restrict gh122 = mygH.data(8);
597  const ST* restrict gh222 = mygH.data(9);
598 
599 //SIMD doesn't work quite right yet. Comment out until further debugging.
600 #pragma omp simd
601  for (size_t j = first; j < last; ++j)
602  {
603  int jr = j << 1;
604  int ji = jr + 1;
605 
606  const ST kX = k0[j];
607  const ST kY = k1[j];
608  const ST kZ = k2[j];
609  const ST val_r = myV[jr];
610  const ST val_i = myV[ji];
611 
612  //phase
613  ST s, c;
614  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
615 
616  //dot(PrimLattice.G,myG[j])
617  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
618  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
619  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
620 
621  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
622  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
623  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
624 
625  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
626  const ST gX_r = dX_r + val_i * kX;
627  const ST gY_r = dY_r + val_i * kY;
628  const ST gZ_r = dZ_r + val_i * kZ;
629  const ST gX_i = dX_i - val_r * kX;
630  const ST gY_i = dY_i - val_r * kY;
631  const ST gZ_i = dZ_i - val_r * kZ;
632 
633  const size_t psiIndex = j + first_spo;
634  psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
635  dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
636  dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
637  dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
638 
639  //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
640  const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
641  const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
642  const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
643  const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
644  const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
645  const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
646 
647  const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
648  const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
649  const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
650  const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
651  const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
652  const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
653 
654  const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
655  const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
656  const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
657  const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
658  const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
659  const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
660 
661  const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
662  const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
663  const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
664  const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
665  const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
666  const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
667 
668  grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
669  grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
670  grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
671  grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
672  grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
673  grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
674  grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
675  grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
676  grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
677 
678  //These are the real and imaginary components of the third SPO derivative. _xxx denotes
679  // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
680 
681  const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
682  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
683  const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
684  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
685  const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
686  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
687  const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
688  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
689  const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
690  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
691  const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
692  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
693  const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
694  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
695  const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
696  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
697  const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
698  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
699  const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
700  gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
701 
702  const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
703  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
704  const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
705  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
706  const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
707  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
708  const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
709  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
710  const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
711  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
712  const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
713  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
714  const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
715  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
716  const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
717  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
718  const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
719  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
720  const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
721  gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
722 
723  //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
724  const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
725  const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
726  const ST gh_xxy_r =
727  f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
728  const ST gh_xxy_i =
729  f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
730  const ST gh_xxz_r =
731  f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
732  const ST gh_xxz_i =
733  f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
734  const ST gh_xyy_r =
735  f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
736  const ST gh_xyy_i =
737  f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
738  const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
739  (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
740  const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
741  (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
742  const ST gh_xzz_r =
743  f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
744  const ST gh_xzz_i =
745  f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
746  const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
747  const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
748  const ST gh_yyz_r =
749  f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
750  const ST gh_yyz_i =
751  f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
752  const ST gh_yzz_r =
753  f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
754  const ST gh_yzz_i =
755  f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
756  const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
757  const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
758 
759  grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r);
760  grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
761  grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
762  grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
763  grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
764  grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
765  grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
766  grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
767  grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
768 
769  grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
770  grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
771  grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
772  grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
773  grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r);
774  grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
775  grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
776  grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
777  grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
778 
779 
780  grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
781  grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
782  grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
783  grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
784  grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
785  grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
786  grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
787  grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
788  grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r);
789  }
790 }
791 
792 template<typename ST>
794  const int iat,
795  ValueVector& psi,
796  GradVector& dpsi,
797  HessVector& grad_grad_psi,
798  GGGVector& grad_grad_grad_psi)
799 {
800  const PointType& r = P.activeR(iat);
801  PointType ru(PrimLattice.toUnit_floor(r));
802 #pragma omp parallel
803  {
804  int first, last;
805  // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
806  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
807 
808  spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
809  assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
810  }
811 }
812 
813 template class SplineC2C<float>;
814 template class SplineC2C<double>;
815 
816 } // namespace qmcplusplus
void set_spline(SingleSplineType *spline_r, SingleSplineType *spline_i, int twist, int ispline, int level)
Definition: SplineC2C.cpp:29
OrbitalSetTraits< ValueType >::HessVector HessVector
Definition: SPOSet.h:53
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
Fixed-size array.
Definition: OhmmsTinyMeta.h:30
UBspline_3d_d SingleSplineType
Definition: SplineC2C.h:44
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
void storeParamsBeforeRotation() override
Store an original copy of the spline coefficients for orbital rotation.
Definition: SplineC2C.cpp:58
T dot(const T *restrict a, const T *restrict b, int n, TRES res=TRES())
dot product
void evaluateVGL(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) override
evaluate the values, gradients and laplacians of this single-particle orbital set ...
Definition: SplineC2C.cpp:395
void evaluateValue(const ParticleSet &P, const int iat, ValueVector &psi) override
evaluate the values of this single-particle orbital set
Definition: SplineC2C.cpp:189
A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet Virtu...
OrbitalSetTraits< ValueType >::ValueMatrix ValueMatrix
Definition: SPOSet.h:50
T v_m_v(T h00, T h01, T h02, T h11, T h12, T h22, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z)
compute vector[3]^T x matrix[3][3] x vector[3]
T t3_contract(T h000, T h001, T h002, T h011, T h012, T h022, T h111, T h112, T h122, T h222, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z, T g3x, T g3y, T g3z)
Coordinate transform for a 3rd rank symmetric tensor representing coordinate derivatives (hence t3_co...
class to handle hdf file
Definition: hdf_archive.h:51
void assign_v(ST x, ST y, ST z, TT *restrict results_scratch_ptr, const ST *restrict offload_scratch_ptr, const ST *restrict myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
T min(T a, T b)
bool write_splines(hdf_archive &h5f)
Definition: SplineC2C.cpp:49
void assign_vgl_from_l(const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)
assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian ...
Definition: SplineC2C.cpp:334
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
QTBase::ValueType ValueType
Definition: Configuration.h:60
void assign_vgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
Definition: SplineC2C.cpp:416
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
OrbitalSetTraits< ValueType >::ValueVector ValueVector
Definition: SPOSet.h:49
typename BsplineSet::ValueType ComplexT
Definition: SplineC2C.h:47
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
Definition: SplineC2C.cpp:163
const PosType & activeR(int iat) const
return the active position if the particle is active or the return current position if not ...
Definition: ParticleSet.h:265
bool read_splines(hdf_archive &h5f)
Definition: SplineC2C.cpp:40
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
OHMMS_INDEXTYPE IndexType
define other types
Definition: Configuration.h:65
void evaluateVGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) override
evaluate the values, gradients and hessians of this single-particle orbital set
Definition: SplineC2C.cpp:536
void evaluateDetRatios(const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios) override
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
Definition: SplineC2C.cpp:206
OrbitalSetTraits< ValueType >::GradHessVector GGGVector
Definition: SPOSet.h:55
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
OrbitalSetTraits< ValueType >::GradVector GradVector
Definition: SPOSet.h:51
void evaluateVGHGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) override
evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set ...
Definition: SplineC2C.cpp:793
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
SplineC2C(const std::string &my_name)
Definition: SplineC2C.h:84
void assign_vgl(const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi, int first, int last) const
assign_vgl
Definition: SplineC2C.cpp:252
void applyRotation(const ValueMatrix &rot_mat, bool use_stored_copy) override
apply rotation to all the orbitals
Definition: SplineC2C.cpp:107
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
LatticeGaussianProduct::ValueType ValueType
static void gemm(char Atrans, char Btrans, int M, int N, int K, double alpha, const double *A, int lda, const double *restrict B, int ldb, double beta, double *restrict C, int ldc)
Definition: BLAS.hpp:235
bool readEntry(T &data, const std::string &aname)
read the data from the group aname and return status use read() for inbuilt error checking ...
Definition: hdf_archive.h:293
class to handle complex splines to complex orbitals with splines of arbitrary precision ...
class to match std::complex<ST> spline with BsplineSet::ValueType (complex) SPOs
Definition: SplineC2C.h:37
void assign_vghgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
Definition: SplineC2C.cpp:557
bool writeEntry(T &data, const std::string &aname)
write the data to the group aname and return status use write() for inbuilt error checking ...
Definition: hdf_archive.h:244