QMCPACK
SplineC2R.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
8 // Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign
9 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 // Anouar Benali, benali@anl.gov, Argonne National Laboratory
11 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
12 //
13 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
14 //////////////////////////////////////////////////////////////////////////////////////
15 
16 
17 #include "Concurrency/OpenMP.h"
18 #include "SplineC2R.h"
19 #include "spline2/MultiBsplineEval.hpp"
21 #include "CPU/math.hpp"
23 
24 namespace qmcplusplus
25 {
26 template<typename ST>
27 SplineC2R<ST>::SplineC2R(const SplineC2R& in) = default;
28 
29 template<typename ST>
31  SingleSplineType* spline_i,
32  int twist,
33  int ispline,
34  int level)
35 {
36  SplineInst->copy_spline(spline_r, 2 * ispline);
37  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
38 }
39 
40 template<typename ST>
42 {
43  std::ostringstream o;
44  o << "spline_" << MyIndex;
45  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
46  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
47 }
48 
49 template<typename ST>
51 {
52  std::ostringstream o;
53  o << "spline_" << MyIndex;
54  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
55  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
56 }
57 
58 template<typename ST>
59 inline void SplineC2R<ST>::assign_v(const PointType& r,
60  const vContainer_type& myV,
61  ValueVector& psi,
62  int first,
63  int last) const
64 {
65  // protect last
66  last = last > kPoints.size() ? kPoints.size() : last;
67 
68  const ST x = r[0], y = r[1], z = r[2];
69  const ST* restrict kx = myKcart.data(0);
70  const ST* restrict ky = myKcart.data(1);
71  const ST* restrict kz = myKcart.data(2);
72 
73  TT* restrict psi_s = psi.data() + first_spo;
74  const size_t requested_orb_size = psi.size();
75 #pragma omp simd
76  for (size_t j = first; j < std::min(nComplexBands, last); j++)
77  {
78  ST s, c;
79  const size_t jr = j << 1;
80  const size_t ji = jr + 1;
81  const ST val_r = myV[jr];
82  const ST val_i = myV[ji];
83  qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
84  if (jr < requested_orb_size)
85  psi_s[jr] = val_r * c - val_i * s;
86  if (ji < requested_orb_size)
87  psi_s[ji] = val_i * c + val_r * s;
88  }
89 
90  psi_s += nComplexBands;
91 #pragma omp simd
92  for (size_t j = std::max(nComplexBands, first); j < last; j++)
93  {
94  ST s, c;
95  const ST val_r = myV[2 * j];
96  const ST val_i = myV[2 * j + 1];
97  qmcplusplus::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
98  if (j + nComplexBands < requested_orb_size)
99  psi_s[j] = val_r * c - val_i * s;
100  }
101 }
102 
103 template<typename ST>
104 void SplineC2R<ST>::evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi)
105 {
106  const PointType& r = P.activeR(iat);
107  PointType ru(PrimLattice.toUnit_floor(r));
108 
109 #pragma omp parallel
110  {
111  int first, last;
112  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
113 
114  spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
115  assign_v(r, myV, psi, first / 2, last / 2);
116  }
117 }
118 
119 template<typename ST>
121  ValueVector& psi,
122  const ValueVector& psiinv,
123  std::vector<TT>& ratios)
124 {
125  const bool need_resize = ratios_private.rows() < VP.getTotalNum();
126 
127 #pragma omp parallel
128  {
129  int tid = omp_get_thread_num();
130  // initialize thread private ratios
131  if (need_resize)
132  {
133  if (tid == 0) // just like #pragma omp master, but one fewer call to the runtime
134  ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
135 #pragma omp barrier
136  }
137  int first, last;
138  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), tid, first, last);
139  const int first_cplx = first / 2;
140  const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2;
141 
142  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
143  {
144  const PointType& r = VP.activeR(iat);
145  PointType ru(PrimLattice.toUnit_floor(r));
146 
147  spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
148  assign_v(r, myV, psi, first_cplx, last_cplx);
149 
150  const int first_real = first_cplx + std::min(nComplexBands, first_cplx);
151  const int last_real = last_cplx + std::min(nComplexBands, last_cplx);
152  ratios_private[iat][tid] = simd::dot(psi.data() + first_real, psiinv.data() + first_real, last_real - first_real);
153  }
154  }
155 
156  // do the reduction manually
157  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
158  {
159  ratios[iat] = TT(0);
160  for (int tid = 0; tid < ratios_private.cols(); tid++)
161  ratios[iat] += ratios_private[iat][tid];
162  }
163 }
164 
165 /** assign_vgl
166  */
167 template<typename ST>
169  ValueVector& psi,
170  GradVector& dpsi,
171  ValueVector& d2psi,
172  int first,
173  int last) const
174 {
175  // protect last
176  last = last > kPoints.size() ? kPoints.size() : last;
177 
178  constexpr ST two(2);
179  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
180  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
181  g22 = PrimLattice.G(8);
182  const ST x = r[0], y = r[1], z = r[2];
183  const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]};
184 
185  const ST* restrict k0 = myKcart.data(0);
186  ASSUME_ALIGNED(k0);
187  const ST* restrict k1 = myKcart.data(1);
188  ASSUME_ALIGNED(k1);
189  const ST* restrict k2 = myKcart.data(2);
190  ASSUME_ALIGNED(k2);
191 
192  const ST* restrict g0 = myG.data(0);
193  ASSUME_ALIGNED(g0);
194  const ST* restrict g1 = myG.data(1);
195  ASSUME_ALIGNED(g1);
196  const ST* restrict g2 = myG.data(2);
197  ASSUME_ALIGNED(g2);
198  const ST* restrict h00 = myH.data(0);
199  ASSUME_ALIGNED(h00);
200  const ST* restrict h01 = myH.data(1);
201  ASSUME_ALIGNED(h01);
202  const ST* restrict h02 = myH.data(2);
203  ASSUME_ALIGNED(h02);
204  const ST* restrict h11 = myH.data(3);
205  ASSUME_ALIGNED(h11);
206  const ST* restrict h12 = myH.data(4);
207  ASSUME_ALIGNED(h12);
208  const ST* restrict h22 = myH.data(5);
209  ASSUME_ALIGNED(h22);
210 
211  const size_t requested_orb_size = psi.size();
212 #pragma omp simd
213  for (size_t j = first; j < std::min(nComplexBands, last); j++)
214  {
215  const size_t jr = j << 1;
216  const size_t ji = jr + 1;
217 
218  const ST kX = k0[j];
219  const ST kY = k1[j];
220  const ST kZ = k2[j];
221  const ST val_r = myV[jr];
222  const ST val_i = myV[ji];
223 
224  //phase
225  ST s, c;
226  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
227 
228  //dot(PrimLattice.G,myG[j])
229  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
230  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
231  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
232 
233  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
234  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
235  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
236 
237  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
238  const ST gX_r = dX_r + val_i * kX;
239  const ST gY_r = dY_r + val_i * kY;
240  const ST gZ_r = dZ_r + val_i * kZ;
241  const ST gX_i = dX_i - val_r * kX;
242  const ST gY_i = dY_i - val_r * kY;
243  const ST gZ_i = dZ_i - val_r * kZ;
244 
245  const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG);
246  const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG);
247  const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
248  const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
249 
250  const size_t psiIndex = first_spo + jr;
251  if (psiIndex < requested_orb_size)
252  {
253  psi[psiIndex] = c * val_r - s * val_i;
254  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
255  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
256  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
257  d2psi[psiIndex] = c * lap_r - s * lap_i;
258  }
259  if (psiIndex + 1 < requested_orb_size)
260  {
261  psi[psiIndex + 1] = c * val_i + s * val_r;
262  dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
263  dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
264  dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
265  d2psi[psiIndex + 1] = c * lap_i + s * lap_r;
266  }
267  }
268 
269 #pragma omp simd
270  for (size_t j = std::max(nComplexBands, first); j < last; j++)
271  {
272  const size_t jr = j << 1;
273  const size_t ji = jr + 1;
274 
275  const ST kX = k0[j];
276  const ST kY = k1[j];
277  const ST kZ = k2[j];
278  const ST val_r = myV[jr];
279  const ST val_i = myV[ji];
280 
281  //phase
282  ST s, c;
283  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
284 
285  //dot(PrimLattice.G,myG[j])
286  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
287  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
288  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
289 
290  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
291  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
292  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
293 
294  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
295  const ST gX_r = dX_r + val_i * kX;
296  const ST gY_r = dY_r + val_i * kY;
297  const ST gZ_r = dZ_r + val_i * kZ;
298  const ST gX_i = dX_i - val_r * kX;
299  const ST gY_i = dY_i - val_r * kY;
300  const ST gZ_i = dZ_i - val_r * kZ;
301 
302  if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size)
303  {
304  psi[psiIndex] = c * val_r - s * val_i;
305  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
306  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
307  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
308 
309  const ST lcart_r = SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG);
310  const ST lcart_i = SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG);
311  const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
312  const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
313  d2psi[psiIndex] = c * lap_r - s * lap_i;
314  }
315  }
316 }
317 
318 /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
319  */
320 template<typename ST>
322 {
323  constexpr ST two(2);
324  const ST x = r[0], y = r[1], z = r[2];
325 
326  const ST* restrict k0 = myKcart.data(0);
327  ASSUME_ALIGNED(k0);
328  const ST* restrict k1 = myKcart.data(1);
329  ASSUME_ALIGNED(k1);
330  const ST* restrict k2 = myKcart.data(2);
331  ASSUME_ALIGNED(k2);
332 
333  const ST* restrict g0 = myG.data(0);
334  ASSUME_ALIGNED(g0);
335  const ST* restrict g1 = myG.data(1);
336  ASSUME_ALIGNED(g1);
337  const ST* restrict g2 = myG.data(2);
338  ASSUME_ALIGNED(g2);
339 
340  const size_t N = kPoints.size();
341 
342  const size_t requested_orb_size = psi.size();
343 #pragma omp simd
344  for (size_t j = 0; j < nComplexBands; j++)
345  {
346  const size_t jr = j << 1;
347  const size_t ji = jr + 1;
348 
349  const ST kX = k0[j];
350  const ST kY = k1[j];
351  const ST kZ = k2[j];
352  const ST val_r = myV[jr];
353  const ST val_i = myV[ji];
354 
355  //phase
356  ST s, c;
357  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
358 
359  //dot(PrimLattice.G,myG[j])
360  const ST dX_r = g0[jr];
361  const ST dY_r = g1[jr];
362  const ST dZ_r = g2[jr];
363 
364  const ST dX_i = g0[ji];
365  const ST dY_i = g1[ji];
366  const ST dZ_i = g2[ji];
367 
368  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
369  const ST gX_r = dX_r + val_i * kX;
370  const ST gY_r = dY_r + val_i * kY;
371  const ST gZ_r = dZ_r + val_i * kZ;
372  const ST gX_i = dX_i - val_r * kX;
373  const ST gY_i = dY_i - val_r * kY;
374  const ST gZ_i = dZ_i - val_r * kZ;
375 
376  const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
377  const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
378 
379  const size_t psiIndex = first_spo + jr;
380  if (psiIndex < requested_orb_size)
381  {
382  psi[psiIndex] = c * val_r - s * val_i;
383  d2psi[psiIndex] = c * lap_r - s * lap_i;
384  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
385  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
386  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
387  }
388  if (psiIndex + 1 < requested_orb_size)
389  {
390  psi[psiIndex + 1] = c * val_i + s * val_r;
391  d2psi[psiIndex + 1] = c * lap_i + s * lap_r;
392  dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
393  dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
394  dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
395  }
396  }
397 
398 #pragma omp simd
399  for (size_t j = nComplexBands; j < N; j++)
400  {
401  const size_t jr = j << 1;
402  const size_t ji = jr + 1;
403 
404  const ST kX = k0[j];
405  const ST kY = k1[j];
406  const ST kZ = k2[j];
407  const ST val_r = myV[jr];
408  const ST val_i = myV[ji];
409 
410  //phase
411  ST s, c;
412  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
413 
414  //dot(PrimLattice.G,myG[j])
415  const ST dX_r = g0[jr];
416  const ST dY_r = g1[jr];
417  const ST dZ_r = g2[jr];
418 
419  const ST dX_i = g0[ji];
420  const ST dY_i = g1[ji];
421  const ST dZ_i = g2[ji];
422 
423  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
424  const ST gX_r = dX_r + val_i * kX;
425  const ST gY_r = dY_r + val_i * kY;
426  const ST gZ_r = dZ_r + val_i * kZ;
427  const ST gX_i = dX_i - val_r * kX;
428  const ST gY_i = dY_i - val_r * kY;
429  const ST gZ_i = dZ_i - val_r * kZ;
430  if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size)
431  {
432  psi[psiIndex] = c * val_r - s * val_i;
433  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
434  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
435  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
436 
437  const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
438  const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
439  d2psi[psiIndex] = c * lap_r - s * lap_i;
440  }
441  }
442 }
443 
444 template<typename ST>
446  const int iat,
447  ValueVector& psi,
448  GradVector& dpsi,
449  ValueVector& d2psi)
450 {
451  const PointType& r = P.activeR(iat);
452  PointType ru(PrimLattice.toUnit_floor(r));
453 
454 #pragma omp parallel
455  {
456  int first, last;
457  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
458 
459  spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
460  assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2);
461  }
462 }
463 
464 template<typename ST>
466  ValueVector& psi,
467  GradVector& dpsi,
468  HessVector& grad_grad_psi,
469  int first,
470  int last) const
471 {
472  // protect last
473  last = last > kPoints.size() ? kPoints.size() : last;
474 
475  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
476  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
477  g22 = PrimLattice.G(8);
478  const ST x = r[0], y = r[1], z = r[2];
479 
480  const ST* restrict k0 = myKcart.data(0);
481  const ST* restrict k1 = myKcart.data(1);
482  const ST* restrict k2 = myKcart.data(2);
483 
484  const ST* restrict g0 = myG.data(0);
485  const ST* restrict g1 = myG.data(1);
486  const ST* restrict g2 = myG.data(2);
487  const ST* restrict h00 = myH.data(0);
488  const ST* restrict h01 = myH.data(1);
489  const ST* restrict h02 = myH.data(2);
490  const ST* restrict h11 = myH.data(3);
491  const ST* restrict h12 = myH.data(4);
492  const ST* restrict h22 = myH.data(5);
493 
494  const size_t requested_orb_size = psi.size();
495 #pragma omp simd
496  for (size_t j = first; j < std::min(nComplexBands, last); j++)
497  {
498  int jr = j << 1;
499  int ji = jr + 1;
500 
501  const ST kX = k0[j];
502  const ST kY = k1[j];
503  const ST kZ = k2[j];
504  const ST val_r = myV[jr];
505  const ST val_i = myV[ji];
506 
507  //phase
508  ST s, c;
509  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
510 
511  //dot(PrimLattice.G,myG[j])
512  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
513  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
514  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
515 
516  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
517  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
518  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
519 
520  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
521  const ST gX_r = dX_r + val_i * kX;
522  const ST gY_r = dY_r + val_i * kY;
523  const ST gZ_r = dZ_r + val_i * kZ;
524  const ST gX_i = dX_i - val_r * kX;
525  const ST gY_i = dY_i - val_r * kY;
526  const ST gZ_i = dZ_i - val_r * kZ;
527 
528  const size_t psiIndex = first_spo + jr;
529  if (psiIndex < requested_orb_size)
530  {
531  psi[psiIndex] = c * val_r - s * val_i;
532  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
533  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
534  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
535  }
536  if (psiIndex + 1 < requested_orb_size)
537  {
538  psi[psiIndex + 1] = c * val_i + s * val_r;
539  dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
540  dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
541  dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
542  }
543 
544  const ST h_xx_r =
545  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
546  const ST h_xy_r =
547  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
548  const ST h_xz_r =
549  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
550  const ST h_yx_r =
551  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
552  const ST h_yy_r =
553  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
554  const ST h_yz_r =
555  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
556  const ST h_zx_r =
557  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
558  const ST h_zy_r =
559  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
560  const ST h_zz_r =
561  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
562 
563  const ST h_xx_i =
564  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
565  const ST h_xy_i =
566  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
567  const ST h_xz_i =
568  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
569  const ST h_yx_i =
570  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
571  const ST h_yy_i =
572  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
573  const ST h_yz_i =
574  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
575  const ST h_zx_i =
576  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
577  const ST h_zy_i =
578  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
579  const ST h_zz_i =
580  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
581 
582  if (psiIndex < requested_orb_size)
583  {
584  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
585  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
586  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
587  grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
588  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
589  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
590  grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
591  grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
592  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
593  }
594  if (psiIndex + 1 < requested_orb_size)
595  {
596  grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
597  grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
598  grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
599  grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r;
600  grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
601  grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
602  grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r;
603  grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r;
604  grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
605  }
606  }
607 
608 #pragma omp simd
609  for (size_t j = std::max(nComplexBands, first); j < last; j++)
610  {
611  int jr = j << 1;
612  int ji = jr + 1;
613 
614  const ST kX = k0[j];
615  const ST kY = k1[j];
616  const ST kZ = k2[j];
617  const ST val_r = myV[jr];
618  const ST val_i = myV[ji];
619 
620  //phase
621  ST s, c;
622  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
623 
624  //dot(PrimLattice.G,myG[j])
625  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
626  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
627  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
628 
629  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
630  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
631  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
632 
633  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
634  const ST gX_r = dX_r + val_i * kX;
635  const ST gY_r = dY_r + val_i * kY;
636  const ST gZ_r = dZ_r + val_i * kZ;
637  const ST gX_i = dX_i - val_r * kX;
638  const ST gY_i = dY_i - val_r * kY;
639  const ST gZ_i = dZ_i - val_r * kZ;
640 
641  if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size)
642  {
643  psi[psiIndex] = c * val_r - s * val_i;
644  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
645  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
646  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
647 
648  const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) +
649  kX * (gX_i + dX_i);
650  const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) +
651  kX * (gY_i + dY_i);
652  const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) +
653  kX * (gZ_i + dZ_i);
654  const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) +
655  kY * (gX_i + dX_i);
656  const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) +
657  kY * (gY_i + dY_i);
658  const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) +
659  kY * (gZ_i + dZ_i);
660  const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) +
661  kZ * (gX_i + dX_i);
662  const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) +
663  kZ * (gY_i + dY_i);
664  const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) +
665  kZ * (gZ_i + dZ_i);
666 
667  const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) -
668  kX * (gX_r + dX_r);
669  const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) -
670  kX * (gY_r + dY_r);
671  const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) -
672  kX * (gZ_r + dZ_r);
673  const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) -
674  kY * (gX_r + dX_r);
675  const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) -
676  kY * (gY_r + dY_r);
677  const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) -
678  kY * (gZ_r + dZ_r);
679  const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) -
680  kZ * (gX_r + dX_r);
681  const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) -
682  kZ * (gY_r + dY_r);
683  const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) -
684  kZ * (gZ_r + dZ_r);
685 
686  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
687  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
688  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
689  grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
690  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
691  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
692  grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
693  grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
694  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
695  }
696  }
697 }
698 
699 template<typename ST>
701  const int iat,
702  ValueVector& psi,
703  GradVector& dpsi,
704  HessVector& grad_grad_psi)
705 {
706  const PointType& r = P.activeR(iat);
707  PointType ru(PrimLattice.toUnit_floor(r));
708 #pragma omp parallel
709  {
710  int first, last;
711  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
712 
713  spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
714  assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
715  }
716 }
717 
718 template<typename ST>
720  ValueVector& psi,
721  GradVector& dpsi,
722  HessVector& grad_grad_psi,
723  GGGVector& grad_grad_grad_psi,
724  int first,
725  int last) const
726 {
727  // protect last
728  last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last);
729 
730  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
731  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
732  g22 = PrimLattice.G(8);
733  const ST x = r[0], y = r[1], z = r[2];
734 
735  const ST* restrict k0 = myKcart.data(0);
736  const ST* restrict k1 = myKcart.data(1);
737  const ST* restrict k2 = myKcart.data(2);
738 
739  const ST* restrict g0 = myG.data(0);
740  const ST* restrict g1 = myG.data(1);
741  const ST* restrict g2 = myG.data(2);
742  const ST* restrict h00 = myH.data(0);
743  const ST* restrict h01 = myH.data(1);
744  const ST* restrict h02 = myH.data(2);
745  const ST* restrict h11 = myH.data(3);
746  const ST* restrict h12 = myH.data(4);
747  const ST* restrict h22 = myH.data(5);
748 
749  const ST* restrict gh000 = mygH.data(0);
750  const ST* restrict gh001 = mygH.data(1);
751  const ST* restrict gh002 = mygH.data(2);
752  const ST* restrict gh011 = mygH.data(3);
753  const ST* restrict gh012 = mygH.data(4);
754  const ST* restrict gh022 = mygH.data(5);
755  const ST* restrict gh111 = mygH.data(6);
756  const ST* restrict gh112 = mygH.data(7);
757  const ST* restrict gh122 = mygH.data(8);
758  const ST* restrict gh222 = mygH.data(9);
759 
760  const size_t requested_orb_size = psi.size();
761 //SIMD doesn't work quite right yet. Comment out until further debugging.
762 #pragma omp simd
763  for (size_t j = first; j < std::min(nComplexBands, last); j++)
764  {
765  int jr = j << 1;
766  int ji = jr + 1;
767 
768  const ST kX = k0[j];
769  const ST kY = k1[j];
770  const ST kZ = k2[j];
771  const ST val_r = myV[jr];
772  const ST val_i = myV[ji];
773 
774  //phase
775  ST s, c;
776  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
777 
778  //dot(PrimLattice.G,myG[j])
779  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
780  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
781  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
782 
783  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
784  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
785  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
786 
787  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
788  const ST gX_r = dX_r + val_i * kX;
789  const ST gY_r = dY_r + val_i * kY;
790  const ST gZ_r = dZ_r + val_i * kZ;
791  const ST gX_i = dX_i - val_r * kX;
792  const ST gY_i = dY_i - val_r * kY;
793  const ST gZ_i = dZ_i - val_r * kZ;
794 
795  const size_t psiIndex = first_spo + jr;
796  if (psiIndex < requested_orb_size)
797  {
798  psi[psiIndex] = c * val_r - s * val_i;
799  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
800  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
801  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
802  }
803  if (psiIndex + 1 < requested_orb_size)
804  {
805  psi[psiIndex + 1] = c * val_i + s * val_r;
806  dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
807  dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
808  dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
809  }
810 
811  //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
812  const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
813  const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
814  const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
815  const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
816  const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
817  const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
818 
819  const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
820  const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
821  const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
822  const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
823  const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
824  const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
825 
826  const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
827  const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
828  const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
829  const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
830  const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
831  const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
832 
833  const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
834  const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
835  const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
836  const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
837  const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
838  const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
839 
840  if (psiIndex < requested_orb_size)
841  {
842  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
843  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
844  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
845  grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
846  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
847  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
848  grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
849  grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
850  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
851  }
852 
853  if (psiIndex + 1 < requested_orb_size)
854  {
855  grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
856  grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
857  grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
858  grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r;
859  grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
860  grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
861  grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r;
862  grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r;
863  grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
864  }
865 
866  //These are the real and imaginary components of the third SPO derivative. _xxx denotes
867  // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
868 
869  const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
870  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
871  const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
872  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
873  const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
874  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
875  const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
876  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
877  const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
878  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
879  const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
880  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
881  const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
882  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
883  const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
884  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
885  const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
886  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
887  const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
888  gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
889 
890  const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
891  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
892  const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
893  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
894  const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
895  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
896  const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
897  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
898  const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
899  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
900  const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
901  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
902  const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
903  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
904  const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
905  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
906  const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
907  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
908  const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
909  gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
910 
911  //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
912  const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
913  const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
914  const ST gh_xxy_r =
915  f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
916  const ST gh_xxy_i =
917  f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
918  const ST gh_xxz_r =
919  f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
920  const ST gh_xxz_i =
921  f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
922  const ST gh_xyy_r =
923  f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
924  const ST gh_xyy_i =
925  f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
926  const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
927  (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
928  const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
929  (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
930  const ST gh_xzz_r =
931  f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
932  const ST gh_xzz_i =
933  f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
934  const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
935  const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
936  const ST gh_yyz_r =
937  f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
938  const ST gh_yyz_i =
939  f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
940  const ST gh_yzz_r =
941  f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
942  const ST gh_yzz_i =
943  f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
944  const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
945  const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
946 
947  if (psiIndex < requested_orb_size)
948  {
949  grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
950  grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
951  grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
952  grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
953  grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
954  grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
955  grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
956  grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
957  grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
958 
959  grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
960  grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
961  grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
962  grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
963  grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
964  grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
965  grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
966  grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
967  grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
968 
969  grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
970  grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
971  grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
972  grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
973  grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
974  grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
975  grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
976  grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
977  grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
978  }
979 
980  if (psiIndex + 1 < requested_orb_size)
981  {
982  grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r;
983  grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r;
984  grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r;
985  grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r;
986  grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r;
987  grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r;
988  grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r;
989  grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r;
990  grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r;
991 
992  grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r;
993  grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r;
994  grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r;
995  grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r;
996  grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r;
997  grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r;
998  grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r;
999  grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r;
1000  grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r;
1001 
1002  grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r;
1003  grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r;
1004  grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r;
1005  grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r;
1006  grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r;
1007  grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r;
1008  grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r;
1009  grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r;
1010  grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r;
1011  }
1012  }
1013 #pragma omp simd
1014  for (size_t j = std::max(nComplexBands, first); j < last; j++)
1015  {
1016  int jr = j << 1;
1017  int ji = jr + 1;
1018 
1019  const ST kX = k0[j];
1020  const ST kY = k1[j];
1021  const ST kZ = k2[j];
1022  const ST val_r = myV[jr];
1023  const ST val_i = myV[ji];
1024 
1025  //phase
1026  ST s, c;
1027  qmcplusplus::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
1028 
1029  //dot(PrimLattice.G,myG[j])
1030  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
1031  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
1032  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
1033 
1034  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
1035  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
1036  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
1037 
1038  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
1039  const ST gX_r = dX_r + val_i * kX;
1040  const ST gY_r = dY_r + val_i * kY;
1041  const ST gZ_r = dZ_r + val_i * kZ;
1042  const ST gX_i = dX_i - val_r * kX;
1043  const ST gY_i = dY_i - val_r * kY;
1044  const ST gZ_i = dZ_i - val_r * kZ;
1045 
1046  if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size)
1047  {
1048  psi[psiIndex] = c * val_r - s * val_i;
1049  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
1050  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
1051  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
1052 
1053  //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
1054  const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
1055  const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
1056  const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
1057  const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
1058  const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
1059  const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
1060 
1061  const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
1062  const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
1063  const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
1064  const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
1065  const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
1066  const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
1067 
1068  const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
1069  const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
1070  const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
1071  const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
1072  const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
1073  const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
1074 
1075  const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
1076  const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
1077  const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
1078  const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
1079  const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
1080  const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
1081 
1082  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
1083  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
1084  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
1085  grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
1086  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
1087  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
1088  grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
1089  grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
1090  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
1091 
1092  //These are the real and imaginary components of the third SPO derivative. _xxx denotes
1093  // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
1094 
1095  const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1096  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1097  const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1098  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1099  const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1100  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1101  const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1102  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1103  const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1104  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1105  const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1106  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1107  const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1108  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1109  const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1110  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1111  const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1112  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1113  const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1114  gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1115 
1116  const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1117  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1118  const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1119  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1120  const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1121  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1122  const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1123  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1124  const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1125  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1126  const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1127  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1128  const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1129  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1130  const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1131  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1132  const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1133  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1134  const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1135  gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1136 
1137  //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
1138  const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
1139  const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
1140  const ST gh_xxy_r =
1141  f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
1142  const ST gh_xxy_i =
1143  f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
1144  const ST gh_xxz_r =
1145  f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
1146  const ST gh_xxz_i =
1147  f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
1148  const ST gh_xyy_r =
1149  f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
1150  const ST gh_xyy_i =
1151  f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
1152  const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
1153  (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
1154  const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
1155  (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
1156  const ST gh_xzz_r =
1157  f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
1158  const ST gh_xzz_i =
1159  f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
1160  const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
1161  const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
1162  const ST gh_yyz_r =
1163  f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
1164  const ST gh_yyz_i =
1165  f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
1166  const ST gh_yzz_r =
1167  f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
1168  const ST gh_yzz_i =
1169  f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
1170  const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
1171  const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
1172  //[x][xx] //These are the unique entries
1173  grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
1174  grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
1175  grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
1176  grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
1177  grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
1178  grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
1179  grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
1180  grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
1181  grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
1182 
1183  grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
1184  grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
1185  grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
1186  grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
1187  grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
1188  grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
1189  grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
1190  grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
1191  grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
1192 
1193  grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
1194  grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
1195  grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
1196  grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
1197  grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
1198  grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
1199  grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
1200  grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
1201  grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
1202  }
1203  }
1204 }
1205 
1206 template<typename ST>
1208  const int iat,
1209  ValueVector& psi,
1210  GradVector& dpsi,
1211  HessVector& grad_grad_psi,
1212  GGGVector& grad_grad_grad_psi)
1213 {
1214  const PointType& r = P.activeR(iat);
1215  PointType ru(PrimLattice.toUnit_floor(r));
1216 #pragma omp parallel
1217  {
1218  int first, last;
1219  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
1220 
1221  spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
1222  assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
1223  }
1224 }
1225 
1226 template class SplineC2R<float>;
1227 template class SplineC2R<double>;
1228 
1229 } // namespace qmcplusplus
OrbitalSetTraits< ValueType >::HessVector HessVector
Definition: SPOSet.h:53
class to match std::complex<ST> spline with BsplineSet::ValueType (real) SPOs
Definition: SplineC2R.h:42
bool read_splines(hdf_archive &h5f)
Definition: SplineC2R.cpp:41
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
Fixed-size array.
Definition: OhmmsTinyMeta.h:30
void evaluateVGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) override
evaluate the values, gradients and hessians of this single-particle orbital set
Definition: SplineC2R.cpp:700
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
T dot(const T *restrict a, const T *restrict b, int n, TRES res=TRES())
dot product
SplineC2R(const std::string &my_name)
Definition: SplineC2R.h:87
void assign_vghgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
Definition: SplineC2R.cpp:719
void evaluateDetRatios(const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< TT > &ratios) override
Definition: SplineC2R.cpp:120
A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet Virtu...
void assign_vgl_from_l(const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)
assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian ...
Definition: SplineC2R.cpp:321
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
Definition: SplineC2R.cpp:59
T v_m_v(T h00, T h01, T h02, T h11, T h12, T h22, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z)
compute vector[3]^T x matrix[3][3] x vector[3]
T t3_contract(T h000, T h001, T h002, T h011, T h012, T h022, T h111, T h112, T h122, T h222, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z, T g3x, T g3y, T g3z)
Coordinate transform for a 3rd rank symmetric tensor representing coordinate derivatives (hence t3_co...
class to handle hdf file
Definition: hdf_archive.h:51
void assign_v(ST x, ST y, ST z, TT *restrict results_scratch_ptr, const ST *restrict offload_scratch_ptr, const ST *restrict myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
void assign_vgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
Definition: SplineC2R.cpp:465
T min(T a, T b)
void evaluateVGHGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) override
evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set ...
Definition: SplineC2R.cpp:1207
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
class to handle complex splines to real orbitals with splines of arbitrary precision ...
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
OrbitalSetTraits< ValueType >::ValueVector ValueVector
Definition: SPOSet.h:49
bool write_splines(hdf_archive &h5f)
Definition: SplineC2R.cpp:50
UBspline_3d_d SingleSplineType
Definition: SplineC2R.h:49
const PosType & activeR(int iat) const
return the active position if the particle is active or the return current position if not ...
Definition: ParticleSet.h:265
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
OrbitalSetTraits< ValueType >::GradHessVector GGGVector
Definition: SPOSet.h:55
OrbitalSetTraits< ValueType >::GradVector GradVector
Definition: SPOSet.h:51
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
void set_spline(SingleSplineType *spline_r, SingleSplineType *spline_i, int twist, int ispline, int level)
Definition: SplineC2R.cpp:30
#define ASSUME_ALIGNED(x)
void evaluateVGL(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) override
evaluate the values, gradients and laplacians of this single-particle orbital set ...
Definition: SplineC2R.cpp:445
bool readEntry(T &data, const std::string &aname)
read the data from the group aname and return status use read() for inbuilt error checking ...
Definition: hdf_archive.h:293
void evaluateValue(const ParticleSet &P, const int iat, ValueVector &psi) override
evaluate the values of this single-particle orbital set
Definition: SplineC2R.cpp:104
void assign_vgl(const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi, int first, int last) const
assign_vgl
Definition: SplineC2R.cpp:168
bool writeEntry(T &data, const std::string &aname)
write the data to the group aname and return status use write() for inbuilt error checking ...
Definition: hdf_archive.h:244
typename BsplineSet::ValueType TT
Definition: SplineC2R.h:51