17 #include "spline2/MultiBsplineEval.hpp" 35 SplineInst->copy_spline(spline_r, 2 * ispline);
36 SplineInst->copy_spline(spline_i, 2 * ispline + 1);
43 o <<
"spline_" << MyIndex;
44 einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
45 return h5f.
readEntry(bigtable, o.str().c_str());
52 o <<
"spline_" << MyIndex;
53 einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
54 return h5f.
writeEntry(bigtable, o.str().c_str());
60 const auto spline_ptr = SplineInst->getSplinePtr();
61 const auto coefs_tot_size = spline_ptr->coefs_size;
62 coef_copy_ = std::make_shared<std::vector<ST>>(coefs_tot_size);
64 std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
106 template<
typename ST>
110 const auto spline_ptr = SplineInst->getSplinePtr();
111 assert(spline_ptr !=
nullptr);
112 const auto spl_coefs = spline_ptr->coefs;
113 const auto Nsplines = spline_ptr->num_splines;
114 const auto coefs_tot_size = spline_ptr->coefs_size;
115 const auto basis_set_size = coefs_tot_size / Nsplines;
116 assert(OrbitalSetSize == rot_mat.rows());
117 assert(OrbitalSetSize == rot_mat.cols());
119 if (!use_stored_copy)
121 assert(coef_copy_ !=
nullptr);
122 std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
125 if constexpr (std::is_same_v<ST, RealType>)
130 BLAS::gemm(
'N',
'N', OrbitalSetSize, basis_set_size, OrbitalSetSize,
ValueType(1.0, 0.0), rot_mat.data(),
138 for (
IndexType i = 0; i < basis_set_size; i++)
139 for (
IndexType j = 0; j < OrbitalSetSize; j++)
143 const auto cur_elem = Nsplines * i + 2 * j;
146 for (
IndexType k = 0; k < OrbitalSetSize; k++)
148 const auto index = Nsplines * i + 2 * k;
149 ST zr = (*coef_copy_)[index];
150 ST zi = (*coef_copy_)[index + 1];
151 ST wr = rot_mat[k][j].real();
152 ST wi = rot_mat[k][j].imag();
153 newval_r += zr * wr - zi * wi;
154 newval_i += zr * wi + zi * wr;
156 spl_coefs[cur_elem] = newval_r;
157 spl_coefs[cur_elem + 1] = newval_i;
162 template<
typename ST>
170 const size_t last_cplx =
std::min(kPoints.size(), psi.size());
171 last = last > last_cplx ? last_cplx : last;
173 const ST x = r[0], y = r[1], z = r[2];
174 const ST* restrict kx = myKcart.
data(0);
175 const ST* restrict ky = myKcart.data(1);
176 const ST* restrict kz = myKcart.data(2);
178 for (
size_t j = first; j < last; ++j)
181 const ST val_r = myV[2 * j];
182 const ST val_i = myV[2 * j + 1];
184 psi[j + first_spo] =
ComplexT(val_r * c - val_i *
s, val_i * c + val_r *
s);
188 template<
typename ST>
192 PointType ru(PrimLattice.toUnit_floor(r));
200 spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
201 assign_v(r, myV, psi, first / 2, last / 2);
205 template<
typename ST>
209 std::vector<ValueType>& ratios)
211 const bool need_resize = ratios_private.rows() < VP.
getTotalNum();
226 const int first_cplx = first / 2;
227 const int last_cplx = kPoints.size() < last / 2 ? kPoints.size() : last / 2;
232 PointType ru(PrimLattice.toUnit_floor(r));
234 spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
235 assign_v(r, myV, psi, first_cplx, last_cplx);
236 ratios_private[iat][tid] =
simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx);
244 for (
int tid = 0; tid < ratios_private.cols(); tid++)
245 ratios[iat] += ratios_private[iat][tid];
251 template<
typename ST>
260 const int last_cplx =
std::min(kPoints.size(), psi.size());
261 last = last > last_cplx ? last_cplx : last;
263 constexpr ST zero(0);
265 const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
266 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
267 g22 = PrimLattice.G(8);
268 const ST x = r[0], y = r[1], z = r[2];
269 const ST symGG[6] = {GGt[0], GGt[1] + GGt[3], GGt[2] + GGt[6], GGt[4], GGt[5] + GGt[7], GGt[8]};
271 const ST* restrict k0 = myKcart.
data(0);
272 const ST* restrict k1 = myKcart.data(1);
273 const ST* restrict k2 = myKcart.data(2);
275 const ST* restrict g0 = myG.data(0);
276 const ST* restrict g1 = myG.data(1);
277 const ST* restrict g2 = myG.data(2);
278 const ST* restrict h00 = myH.data(0);
279 const ST* restrict h01 = myH.data(1);
280 const ST* restrict h02 = myH.data(2);
281 const ST* restrict h11 = myH.data(3);
282 const ST* restrict h12 = myH.data(4);
283 const ST* restrict h22 = myH.data(5);
286 for (
size_t j = first; j < last; ++j)
288 const size_t jr = j << 1;
289 const size_t ji = jr + 1;
294 const ST val_r = myV[jr];
295 const ST val_i = myV[ji];
302 const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
303 const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
304 const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
306 const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
307 const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
308 const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
311 const ST gX_r = dX_r + val_i * kX;
312 const ST gY_r = dY_r + val_i * kY;
313 const ST gZ_r = dZ_r + val_i * kZ;
314 const ST gX_i = dX_i - val_r * kX;
315 const ST gY_i = dY_i - val_r * kY;
316 const ST gZ_i = dZ_i - val_r * kZ;
318 const ST lcart_r =
SymTrace(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], symGG);
319 const ST lcart_i =
SymTrace(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], symGG);
320 const ST lap_r = lcart_r + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
321 const ST lap_i = lcart_i + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
322 const size_t psiIndex = j + first_spo;
323 psi[psiIndex] =
ComplexT(c * val_r -
s * val_i, c * val_i +
s * val_r);
324 dpsi[psiIndex][0] =
ComplexT(c * gX_r -
s * gX_i, c * gX_i +
s * gX_r);
325 dpsi[psiIndex][1] =
ComplexT(c * gY_r -
s * gY_i, c * gY_i +
s * gY_r);
326 dpsi[psiIndex][2] =
ComplexT(c * gZ_r -
s * gZ_i, c * gZ_i +
s * gZ_r);
327 d2psi[psiIndex] =
ComplexT(c * lap_r -
s * lap_i, c * lap_i +
s * lap_r);
333 template<
typename ST>
337 const ST x = r[0], y = r[1], z = r[2];
339 const ST* restrict k0 = myKcart.
data(0);
340 const ST* restrict k1 = myKcart.data(1);
341 const ST* restrict k2 = myKcart.data(2);
343 const ST* restrict g0 = myG.data(0);
344 const ST* restrict g1 = myG.data(1);
345 const ST* restrict g2 = myG.data(2);
347 const size_t last_cplx = last_spo > psi.size() ? psi.size() : last_spo;
348 const size_t N = last_cplx - first_spo;
350 for (
size_t j = 0; j <
N; ++j)
352 const size_t jr = j << 1;
353 const size_t ji = jr + 1;
358 const ST val_r = myV[jr];
359 const ST val_i = myV[ji];
366 const ST dX_r = g0[jr];
367 const ST dY_r = g1[jr];
368 const ST dZ_r = g2[jr];
370 const ST dX_i = g0[ji];
371 const ST dY_i = g1[ji];
372 const ST dZ_i = g2[ji];
375 const ST gX_r = dX_r + val_i * kX;
376 const ST gY_r = dY_r + val_i * kY;
377 const ST gZ_r = dZ_r + val_i * kZ;
378 const ST gX_i = dX_i - val_r * kX;
379 const ST gY_i = dY_i - val_r * kY;
380 const ST gZ_i = dZ_i - val_r * kZ;
382 const ST lap_r = myL[jr] + mKK[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
383 const ST lap_i = myL[ji] + mKK[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
385 const size_t psiIndex = j + first_spo;
386 psi[psiIndex] =
ComplexT(c * val_r -
s * val_i, c * val_i +
s * val_r);
387 dpsi[psiIndex][0] =
ComplexT(c * gX_r -
s * gX_i, c * gX_i +
s * gX_r);
388 dpsi[psiIndex][1] =
ComplexT(c * gY_r -
s * gY_i, c * gY_i +
s * gY_r);
389 dpsi[psiIndex][2] =
ComplexT(c * gZ_r -
s * gZ_i, c * gZ_i +
s * gZ_r);
390 d2psi[psiIndex] =
ComplexT(c * lap_r -
s * lap_i, c * lap_i +
s * lap_r);
394 template<
typename ST>
402 PointType ru(PrimLattice.toUnit_floor(r));
410 spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
411 assign_vgl(r, psi, dpsi, d2psi, first / 2, last / 2);
415 template<
typename ST>
424 const size_t last_cplx =
std::min(kPoints.size(), psi.size());
425 last = last > last_cplx ? last_cplx : last;
427 const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
428 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
429 g22 = PrimLattice.G(8);
430 const ST x = r[0], y = r[1], z = r[2];
432 const ST* restrict k0 = myKcart.
data(0);
433 const ST* restrict k1 = myKcart.data(1);
434 const ST* restrict k2 = myKcart.data(2);
436 const ST* restrict g0 = myG.data(0);
437 const ST* restrict g1 = myG.data(1);
438 const ST* restrict g2 = myG.data(2);
439 const ST* restrict h00 = myH.data(0);
440 const ST* restrict h01 = myH.data(1);
441 const ST* restrict h02 = myH.data(2);
442 const ST* restrict h11 = myH.data(3);
443 const ST* restrict h12 = myH.data(4);
444 const ST* restrict h22 = myH.data(5);
447 for (
size_t j = first; j < last; ++j)
455 const ST val_r = myV[jr];
456 const ST val_i = myV[ji];
463 const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
464 const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
465 const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
467 const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
468 const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
469 const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
472 const ST gX_r = dX_r + val_i * kX;
473 const ST gY_r = dY_r + val_i * kY;
474 const ST gZ_r = dZ_r + val_i * kZ;
475 const ST gX_i = dX_i - val_r * kX;
476 const ST gY_i = dY_i - val_r * kY;
477 const ST gZ_i = dZ_i - val_r * kZ;
479 const size_t psiIndex = j + first_spo;
480 psi[psiIndex] =
ComplexT(c * val_r -
s * val_i, c * val_i +
s * val_r);
481 dpsi[psiIndex][0] =
ComplexT(c * gX_r -
s * gX_i, c * gX_i +
s * gX_r);
482 dpsi[psiIndex][1] =
ComplexT(c * gY_r -
s * gY_i, c * gY_i +
s * gY_r);
483 dpsi[psiIndex][2] =
ComplexT(c * gZ_r -
s * gZ_i, c * gZ_i +
s * gZ_r);
486 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
488 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
490 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
492 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
494 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
496 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
498 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
500 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
502 v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
505 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
507 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
509 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
511 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
513 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
515 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
517 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
519 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
521 v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
523 grad_grad_psi[psiIndex][0] =
ComplexT(c * h_xx_r -
s * h_xx_i, c * h_xx_i +
s * h_xx_r);
524 grad_grad_psi[psiIndex][1] =
ComplexT(c * h_xy_r -
s * h_xy_i, c * h_xy_i +
s * h_xy_r);
525 grad_grad_psi[psiIndex][2] =
ComplexT(c * h_xz_r -
s * h_xz_i, c * h_xz_i +
s * h_xz_r);
526 grad_grad_psi[psiIndex][3] =
ComplexT(c * h_yx_r -
s * h_yx_i, c * h_yx_i +
s * h_yx_r);
527 grad_grad_psi[psiIndex][4] =
ComplexT(c * h_yy_r -
s * h_yy_i, c * h_yy_i +
s * h_yy_r);
528 grad_grad_psi[psiIndex][5] =
ComplexT(c * h_yz_r -
s * h_yz_i, c * h_yz_i +
s * h_yz_r);
529 grad_grad_psi[psiIndex][6] =
ComplexT(c * h_zx_r -
s * h_zx_i, c * h_zx_i +
s * h_zx_r);
530 grad_grad_psi[psiIndex][7] =
ComplexT(c * h_zy_r -
s * h_zy_i, c * h_zy_i +
s * h_zy_r);
531 grad_grad_psi[psiIndex][8] =
ComplexT(c * h_zz_r -
s * h_zz_i, c * h_zz_i +
s * h_zz_r);
535 template<
typename ST>
543 PointType ru(PrimLattice.toUnit_floor(r));
551 spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
552 assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
556 template<
typename ST>
566 const size_t last_cplx =
std::min(kPoints.size(), psi.size());
567 last = last < 0 ? last_cplx : (last > last_cplx ? last_cplx : last);
569 const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
570 g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
571 g22 = PrimLattice.G(8);
572 const ST x = r[0], y = r[1], z = r[2];
574 const ST* restrict k0 = myKcart.
data(0);
575 const ST* restrict k1 = myKcart.data(1);
576 const ST* restrict k2 = myKcart.data(2);
578 const ST* restrict g0 = myG.data(0);
579 const ST* restrict g1 = myG.data(1);
580 const ST* restrict g2 = myG.data(2);
581 const ST* restrict h00 = myH.data(0);
582 const ST* restrict h01 = myH.data(1);
583 const ST* restrict h02 = myH.data(2);
584 const ST* restrict h11 = myH.data(3);
585 const ST* restrict h12 = myH.data(4);
586 const ST* restrict h22 = myH.data(5);
588 const ST* restrict gh000 = mygH.data(0);
589 const ST* restrict gh001 = mygH.data(1);
590 const ST* restrict gh002 = mygH.data(2);
591 const ST* restrict gh011 = mygH.data(3);
592 const ST* restrict gh012 = mygH.data(4);
593 const ST* restrict gh022 = mygH.data(5);
594 const ST* restrict gh111 = mygH.data(6);
595 const ST* restrict gh112 = mygH.data(7);
596 const ST* restrict gh122 = mygH.data(8);
597 const ST* restrict gh222 = mygH.data(9);
601 for (
size_t j = first; j < last; ++j)
609 const ST val_r = myV[jr];
610 const ST val_i = myV[ji];
617 const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
618 const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
619 const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
621 const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
622 const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
623 const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
626 const ST gX_r = dX_r + val_i * kX;
627 const ST gY_r = dY_r + val_i * kY;
628 const ST gZ_r = dZ_r + val_i * kZ;
629 const ST gX_i = dX_i - val_r * kX;
630 const ST gY_i = dY_i - val_r * kY;
631 const ST gZ_i = dZ_i - val_r * kZ;
633 const size_t psiIndex = j + first_spo;
634 psi[psiIndex] =
ComplexT(c * val_r -
s * val_i, c * val_i +
s * val_r);
635 dpsi[psiIndex][0] =
ComplexT(c * gX_r -
s * gX_i, c * gX_i +
s * gX_r);
636 dpsi[psiIndex][1] =
ComplexT(c * gY_r -
s * gY_i, c * gY_i +
s * gY_r);
637 dpsi[psiIndex][2] =
ComplexT(c * gZ_r -
s * gZ_i, c * gZ_i +
s * gZ_r);
640 const ST f_xx_r =
v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
641 const ST f_xy_r =
v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
642 const ST f_xz_r =
v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
643 const ST f_yy_r =
v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
644 const ST f_yz_r =
v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
645 const ST f_zz_r =
v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
647 const ST f_xx_i =
v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
648 const ST f_xy_i =
v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
649 const ST f_xz_i =
v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
650 const ST f_yy_i =
v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
651 const ST f_yz_i =
v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
652 const ST f_zz_i =
v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
654 const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
655 const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
656 const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
657 const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
658 const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
659 const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
661 const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
662 const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
663 const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
664 const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
665 const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
666 const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
668 grad_grad_psi[psiIndex][0] =
ComplexT(c * h_xx_r -
s * h_xx_i, c * h_xx_i +
s * h_xx_r);
669 grad_grad_psi[psiIndex][1] =
ComplexT(c * h_xy_r -
s * h_xy_i, c * h_xy_i +
s * h_xy_r);
670 grad_grad_psi[psiIndex][2] =
ComplexT(c * h_xz_r -
s * h_xz_i, c * h_xz_i +
s * h_xz_r);
671 grad_grad_psi[psiIndex][3] =
ComplexT(c * h_xy_r -
s * h_xy_i, c * h_xy_i +
s * h_xy_r);
672 grad_grad_psi[psiIndex][4] =
ComplexT(c * h_yy_r -
s * h_yy_i, c * h_yy_i +
s * h_yy_r);
673 grad_grad_psi[psiIndex][5] =
ComplexT(c * h_yz_r -
s * h_yz_i, c * h_yz_i +
s * h_yz_r);
674 grad_grad_psi[psiIndex][6] =
ComplexT(c * h_xz_r -
s * h_xz_i, c * h_xz_i +
s * h_xz_r);
675 grad_grad_psi[psiIndex][7] =
ComplexT(c * h_yz_r -
s * h_yz_i, c * h_yz_i +
s * h_yz_r);
676 grad_grad_psi[psiIndex][8] =
ComplexT(c * h_zz_r -
s * h_zz_i, c * h_zz_i +
s * h_zz_r);
681 const ST f3_xxx_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
682 gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
683 const ST f3_xxy_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
684 gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
685 const ST f3_xxz_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
686 gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
687 const ST f3_xyy_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
688 gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
689 const ST f3_xyz_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
690 gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
691 const ST f3_xzz_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
692 gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
693 const ST f3_yyy_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
694 gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
695 const ST f3_yyz_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
696 gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
697 const ST f3_yzz_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
698 gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
699 const ST f3_zzz_r =
t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
700 gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
702 const ST f3_xxx_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
703 gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
704 const ST f3_xxy_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
705 gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
706 const ST f3_xxz_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
707 gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
708 const ST f3_xyy_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
709 gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
710 const ST f3_xyz_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
711 gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
712 const ST f3_xzz_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
713 gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
714 const ST f3_yyy_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
715 gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
716 const ST f3_yyz_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
717 gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
718 const ST f3_yzz_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
719 gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
720 const ST f3_zzz_i =
t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
721 gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
724 const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
725 const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
727 f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
729 f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
731 f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
733 f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
735 f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
737 f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
738 const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
739 (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
740 const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
741 (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
743 f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
745 f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
746 const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
747 const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
749 f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
751 f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
753 f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
755 f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
756 const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
757 const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
759 grad_grad_grad_psi[psiIndex][0][0] =
ComplexT(c * gh_xxx_r -
s * gh_xxx_i, c * gh_xxx_i +
s * gh_xxx_r);
760 grad_grad_grad_psi[psiIndex][0][1] =
ComplexT(c * gh_xxy_r -
s * gh_xxy_i, c * gh_xxy_i +
s * gh_xxy_r);
761 grad_grad_grad_psi[psiIndex][0][2] =
ComplexT(c * gh_xxz_r -
s * gh_xxz_i, c * gh_xxz_i +
s * gh_xxz_r);
762 grad_grad_grad_psi[psiIndex][0][3] =
ComplexT(c * gh_xxy_r -
s * gh_xxy_i, c * gh_xxy_i +
s * gh_xxy_r);
763 grad_grad_grad_psi[psiIndex][0][4] =
ComplexT(c * gh_xyy_r -
s * gh_xyy_i, c * gh_xyy_i +
s * gh_xyy_r);
764 grad_grad_grad_psi[psiIndex][0][5] =
ComplexT(c * gh_xyz_r -
s * gh_xyz_i, c * gh_xyz_i +
s * gh_xyz_r);
765 grad_grad_grad_psi[psiIndex][0][6] =
ComplexT(c * gh_xxz_r -
s * gh_xxz_i, c * gh_xxz_i +
s * gh_xxz_r);
766 grad_grad_grad_psi[psiIndex][0][7] =
ComplexT(c * gh_xyz_r -
s * gh_xyz_i, c * gh_xyz_i +
s * gh_xyz_r);
767 grad_grad_grad_psi[psiIndex][0][8] =
ComplexT(c * gh_xzz_r -
s * gh_xzz_i, c * gh_xzz_i +
s * gh_xzz_r);
769 grad_grad_grad_psi[psiIndex][1][0] =
ComplexT(c * gh_xxy_r -
s * gh_xxy_i, c * gh_xxy_i +
s * gh_xxy_r);
770 grad_grad_grad_psi[psiIndex][1][1] =
ComplexT(c * gh_xyy_r -
s * gh_xyy_i, c * gh_xyy_i +
s * gh_xyy_r);
771 grad_grad_grad_psi[psiIndex][1][2] =
ComplexT(c * gh_xyz_r -
s * gh_xyz_i, c * gh_xyz_i +
s * gh_xyz_r);
772 grad_grad_grad_psi[psiIndex][1][3] =
ComplexT(c * gh_xyy_r -
s * gh_xyy_i, c * gh_xyy_i +
s * gh_xyy_r);
773 grad_grad_grad_psi[psiIndex][1][4] =
ComplexT(c * gh_yyy_r -
s * gh_yyy_i, c * gh_yyy_i +
s * gh_yyy_r);
774 grad_grad_grad_psi[psiIndex][1][5] =
ComplexT(c * gh_yyz_r -
s * gh_yyz_i, c * gh_yyz_i +
s * gh_yyz_r);
775 grad_grad_grad_psi[psiIndex][1][6] =
ComplexT(c * gh_xyz_r -
s * gh_xyz_i, c * gh_xyz_i +
s * gh_xyz_r);
776 grad_grad_grad_psi[psiIndex][1][7] =
ComplexT(c * gh_yyz_r -
s * gh_yyz_i, c * gh_yyz_i +
s * gh_yyz_r);
777 grad_grad_grad_psi[psiIndex][1][8] =
ComplexT(c * gh_yzz_r -
s * gh_yzz_i, c * gh_yzz_i +
s * gh_yzz_r);
780 grad_grad_grad_psi[psiIndex][2][0] =
ComplexT(c * gh_xxz_r -
s * gh_xxz_i, c * gh_xxz_i +
s * gh_xxz_r);
781 grad_grad_grad_psi[psiIndex][2][1] =
ComplexT(c * gh_xyz_r -
s * gh_xyz_i, c * gh_xyz_i +
s * gh_xyz_r);
782 grad_grad_grad_psi[psiIndex][2][2] =
ComplexT(c * gh_xzz_r -
s * gh_xzz_i, c * gh_xzz_i +
s * gh_xzz_r);
783 grad_grad_grad_psi[psiIndex][2][3] =
ComplexT(c * gh_xyz_r -
s * gh_xyz_i, c * gh_xyz_i +
s * gh_xyz_r);
784 grad_grad_grad_psi[psiIndex][2][4] =
ComplexT(c * gh_yyz_r -
s * gh_yyz_i, c * gh_yyz_i +
s * gh_yyz_r);
785 grad_grad_grad_psi[psiIndex][2][5] =
ComplexT(c * gh_yzz_r -
s * gh_yzz_i, c * gh_yzz_i +
s * gh_yzz_r);
786 grad_grad_grad_psi[psiIndex][2][6] =
ComplexT(c * gh_xzz_r -
s * gh_xzz_i, c * gh_xzz_i +
s * gh_xzz_r);
787 grad_grad_grad_psi[psiIndex][2][7] =
ComplexT(c * gh_yzz_r -
s * gh_yzz_i, c * gh_yzz_i +
s * gh_yzz_r);
788 grad_grad_grad_psi[psiIndex][2][8] =
ComplexT(c * gh_zzz_r -
s * gh_zzz_i, c * gh_zzz_i +
s * gh_zzz_r);
792 template<
typename ST>
801 PointType ru(PrimLattice.toUnit_floor(r));
808 spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
809 assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
void set_spline(SingleSplineType *spline_r, SingleSplineType *spline_i, int twist, int ispline, int level)
OrbitalSetTraits< ValueType >::HessVector HessVector
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
UBspline_3d_d SingleSplineType
helper functions for EinsplineSetBuilder
void storeParamsBeforeRotation() override
Store an original copy of the spline coefficients for orbital rotation.
T dot(const T *restrict a, const T *restrict b, int n, TRES res=TRES())
dot product
void evaluateVGL(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) override
evaluate the values, gradients and laplacians of this single-particle orbital set ...
void evaluateValue(const ParticleSet &P, const int iat, ValueVector &psi) override
evaluate the values of this single-particle orbital set
A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet Virtu...
OrbitalSetTraits< ValueType >::ValueMatrix ValueMatrix
size_t getTotalNum() const
T v_m_v(T h00, T h01, T h02, T h11, T h12, T h22, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z)
compute vector[3]^T x matrix[3][3] x vector[3]
T t3_contract(T h000, T h001, T h002, T h011, T h012, T h022, T h111, T h112, T h122, T h222, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z, T g3x, T g3y, T g3z)
Coordinate transform for a 3rd rank symmetric tensor representing coordinate derivatives (hence t3_co...
void assign_v(ST x, ST y, ST z, TT *restrict results_scratch_ptr, const ST *restrict offload_scratch_ptr, const ST *restrict myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
bool write_splines(hdf_archive &h5f)
void assign_vgl_from_l(const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)
assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian ...
omp_int_t omp_get_thread_num()
Specialized paritlce class for atomistic simulations.
QTBase::ValueType ValueType
void assign_vgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
OrbitalSetTraits< ValueType >::ValueVector ValueVector
typename BsplineSet::ValueType ComplexT
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
const PosType & activeR(int iat) const
return the active position if the particle is active or the return current position if not ...
bool read_splines(hdf_archive &h5f)
omp_int_t omp_get_num_threads()
OHMMS_INDEXTYPE IndexType
define other types
void evaluateVGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) override
evaluate the values, gradients and hessians of this single-particle orbital set
void evaluateDetRatios(const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios) override
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
OrbitalSetTraits< ValueType >::GradHessVector GGGVector
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
OrbitalSetTraits< ValueType >::GradVector GradVector
void evaluateVGHGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) override
evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set ...
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
SplineC2C(const std::string &my_name)
void assign_vgl(const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi, int first, int last) const
assign_vgl
void applyRotation(const ValueMatrix &rot_mat, bool use_stored_copy) override
apply rotation to all the orbitals
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
LatticeGaussianProduct::ValueType ValueType
static void gemm(char Atrans, char Btrans, int M, int N, int K, double alpha, const double *A, int lda, const double *restrict B, int ldb, double beta, double *restrict C, int ldc)
bool readEntry(T &data, const std::string &aname)
read the data from the group aname and return status use read() for inbuilt error checking ...
class to handle complex splines to complex orbitals with splines of arbitrary precision ...
class to match std::complex<ST> spline with BsplineSet::ValueType (complex) SPOs
void assign_vghgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
bool writeEntry(T &data, const std::string &aname)
write the data to the group aname and return status use write() for inbuilt error checking ...