13 #ifndef QMCPLUSPLUS_SOA_SPHERICAL_CARTESIAN_TENSOR_H 14 #define QMCPLUSPLUS_SOA_SPHERICAL_CARTESIAN_TENSOR_H 91 for (
int i = 0, nl =
cYlm.
size(); i < nl; i++)
103 const size_t nElec = xyz.
size(0);
104 const size_t Npbc = xyz.
size(1);
105 assert(xyz.
size(2) == 3);
107 assert(
Ylm.size(0) == nElec);
108 assert(
Ylm.size(1) == Npbc);
109 const size_t Nlm =
Ylm.size(2);
111 size_t nR = nElec * Npbc;
113 auto* xyz_ptr = xyz.
data();
114 auto* Ylm_ptr =
Ylm.data();
119 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for \ 120 map(to:factorLM__ptr[:Nlm], factorL__ptr[:Lmax+1], norm_factor__ptr[:Nlm]) \ 121 map(to: xyz_ptr[:3*nR], Ylm_ptr[:Nlm*nR])")
122 for (uint32_t ir = 0; ir < nR; ir++)
124 evaluate_bare(xyz_ptr[0 + 3 * ir], xyz_ptr[1 + 3 * ir], xyz_ptr[2 + 3 * ir], Ylm_ptr + (ir * Nlm),
Lmax,
125 factorL__ptr, factorLM__ptr);
126 for (
int i = 0; i < Nlm; i++)
127 Ylm_ptr[ir * Nlm + i] *= norm_factor__ptr[i];
142 const size_t nElec = xyz.
size(0);
143 const size_t Npbc = xyz.
size(1);
144 assert(xyz.
size(2) == 3);
146 assert(Ylm_vgl.
size(0) == 5);
147 assert(Ylm_vgl.
size(1) == nElec);
148 assert(Ylm_vgl.
size(2) == Npbc);
149 const size_t Nlm = Ylm_vgl.
size(3);
152 const size_t nR = nElec * Npbc;
153 const size_t offset = Nlm * nR;
155 auto* xyz_ptr = xyz.
data();
156 auto* Ylm_vgl_ptr = Ylm_vgl.
data();
162 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for \ 163 map(to:factorLM__ptr[:Nlm], factorL__ptr[:Lmax+1], norm_factor__ptr[:Nlm], factor2L__ptr[:Lmax+1]) \ 164 map(to: xyz_ptr[:nR*3], Ylm_vgl_ptr[:5*nR*Nlm])")
165 for (uint32_t ir = 0; ir < nR; ir++)
166 evaluateVGL_impl(xyz_ptr[0 + 3 * ir], xyz_ptr[1 + 3 * ir], xyz_ptr[2 + 3 * ir], Ylm_vgl_ptr + (ir * Nlm),
Lmax,
167 factorL__ptr, factorLM__ptr, factor2L__ptr, norm_factor__ptr, offset);
175 for (
int i = 0, nl =
cYlm.
size(); i < nl; i++)
189 static inline int index(
int l,
int m) {
return (l * (l + 1)) +
m; }
231 norm_factor_(*norm_factor_ptr_),
232 factorLM_(*factorLM_ptr_),
233 factorL_(*factorL_ptr_),
234 factor2L_(*factor2L_ptr_)
236 constexpr T
czero(0);
238 const int ntot = (Lmax + 1) * (Lmax + 1);
240 norm_factor_.resize(ntot,
cone);
244 for (
int l = 0; l <= Lmax; l++)
246 norm_factor_[index(l, 0)] =
cone;
247 for (
int m = 1;
m <= l;
m++)
256 for (
int l = 0; l <= Lmax; l++)
258 for (
int m = 1;
m <= l;
m++)
260 norm_factor_[index(l,
m)] = sqrt2;
261 norm_factor_[index(l, -
m)] = sqrt2;
265 factorL_.resize(Lmax + 1);
267 for (
int l = 1; l <= Lmax; l++)
268 factorL_[l] =
std::sqrt(static_cast<T>(2 * l + 1)) * omega;
269 factor2L_.resize(Lmax + 1);
270 for (
int l = 1; l <= Lmax; l++)
271 factor2L_[l] = static_cast<T>(2 * l + 1) /
static_cast<T
>(2 * l - 1);
272 factorLM_.resize(ntot);
273 for (
int l = 1; l <= Lmax; l++)
274 for (
int m = 1;
m <= l;
m++)
276 T fac2 = 1.0 /
std::sqrt(static_cast<T>((l +
m) * (l + 1 -
m)));
277 factorLM_[index(l,
m)] = fac2;
278 factorLM_[index(l, -
m)] = fac2;
280 norm_factor_.updateTo();
281 factorLM_.updateTo();
283 factor2L_.updateTo();
286 PRAGMA_OFFLOAD(
"omp declare target")
296 constexpr T
czero(0);
300 constexpr T eps2 = std::numeric_limits<T>::epsilon() * std::numeric_limits<T>::epsilon();
305 T cphi, sphi, ctheta;
306 T r2xy = x * x + y * y;
334 for (
int l = 1; l <= lmax; l++)
338 int ll = index(l, l);
339 int l1 = index(l, l - 1);
340 int l2 = index(l - 1, l - 1);
342 Ylm[l1] = j * ctheta *
Ylm[l2];
345 for (
int m = 0;
m < lmax - 1;
m++)
348 for (
int l =
m + 2; l <= lmax; l++)
351 int lm = index(l,
m);
352 int l1 = index(l - 1,
m);
353 int l2 = index(l - 2,
m);
354 Ylm[lm] = (ctheta * j *
Ylm[l1] - (l +
m - 1) *
Ylm[l2]) / (l -
m);
358 T sphim, cphim, temp;
361 for (
int l = 1; l <= lmax; l++)
366 fac = rpow * factorL[l];
367 int l0 = index(l, 0);
371 for (
int m = 1;
m <= l;
m++)
373 temp = cphim * cphi - sphim * sphi;
374 sphim = sphim * cphi + cphim * sphi;
376 int lm = index(l,
m);
378 temp = fac *
Ylm[lm];
379 Ylm[lm] = temp * cphim;
381 Ylm[lm] = temp * sphim;
387 PRAGMA_OFFLOAD(
"omp end declare target")
390 PRAGMA_OFFLOAD("omp declare target")
403 T* restrict
Ylm = Ylm_vgl;
405 evaluate_bare(x, y, z,
Ylm, lmax, factorL, factorLM);
406 const size_t Nlm = (lmax + 1) * (lmax + 1);
408 constexpr T
czero(0);
409 constexpr T ahalf(0.5);
410 T* restrict gYlmX = Ylm_vgl + offset * 1;
411 T* restrict gYlmY = Ylm_vgl + offset * 2;
412 T* restrict gYlmZ = Ylm_vgl + offset * 3;
413 T* restrict lYlm = Ylm_vgl + offset * 4;
421 for (
int l = 1; l <= lmax; l++)
425 for (
int m = -l;
m <= l;
m++)
427 int lm = index(l - 1, 0);
428 T gx, gy, gz, dpr, dpi, dmr, dmi;
430 const T cp =
std::sqrt(fac * (l - ma - 1) * (l - ma));
431 const T cm =
std::sqrt(fac * (l + ma - 1) * (l + ma));
432 const T c0 =
std::sqrt(fac * (l - ma) * (l + ma));
433 gz = (l > ma) ? c0 *
Ylm[lm +
m] :
czero;
436 dpr = cp *
Ylm[lm + ma + 1];
437 dpi = cp *
Ylm[lm - ma - 1];
449 dmr = -cm *
Ylm[lm + 1];
450 dmi = cm *
Ylm[lm - 1];
457 dmr = cm *
Ylm[lm + ma - 1];
458 dmi = cm *
Ylm[lm - ma + 1];
470 gx = ahalf * (dpi - dmi);
471 gy = -ahalf * (dpr + dmr);
475 gx = ahalf * (dpr - dmr);
476 gy = ahalf * (dpi + dmi);
481 gYlmX[lm] = normfactor[lm] * gx;
482 gYlmY[lm] = normfactor[lm] * gy;
483 gYlmZ[lm] = normfactor[lm] * gz;
493 for (
int i = 0; i < Nlm; i++)
495 Ylm[i] *= normfactor[i];
500 PRAGMA_OFFLOAD(
"omp end declare target")
505 evaluateVGL_impl(x, y, z, cYlm.data(), Lmax, factorL_.data(), factorLM_.data(), factor2L_.data(), norm_factor_.data(),
512 throw std::runtime_error(
"SoaSphericalTensor<T>::evaluateVGH(x,y,z): Not implemented\n");
518 throw std::runtime_error(
"SoaSphericalTensor<T>::evaluateVGHGH(x,y,z): Not implemented\n");
OffloadVector & norm_factor_
norm_factor reference
const std::shared_ptr< OffloadVector > norm_factor_ptr_
Normalization factors.
helper functions for EinsplineSetBuilder
SoaSphericalTensor(const int l_max, bool addsign=false)
constructor
void batched_evaluateVGL(const OffloadArray3D &xyz, OffloadArray4D &Ylm_vgl) const
evaluate VGL for multiple electrons and multiple pbc images
const std::shared_ptr< OffloadVector > factor2L_ptr_
pre-evaluated factor
void evaluateV(T x, T y, T z)
compute Ylm
std::complex< T > Ylm(int l, int m, const TinyVector< T, 3 > &r)
calculates Ylm param[in] l angular momentum param[in] m magnetic quantum number param[in] r position ...
MakeReturn< UnaryNode< FnFabs, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t abs(const Vector< T1, C1 > &l)
OffloadVector & factorL_
factorL reference
constexpr std::complex< float > czero
constexpr std::complex< float > cone
Soa Container for D-dim vectors.
const T * operator[](size_t component) const
return the starting address of the component
VectorSoaContainer< T, 5 > cYlm
composite
SoaSphericalTensor that evaluates the Real Spherical Harmonics.
void evaluateVGL(T x, T y, T z)
makes a table of and their gradients up to Lmax.
static void evaluate_bare(T x, T y, T z, T *Ylm, int lmax, const T *factorL, const T *factorLM)
compute Ylm for single position
void evaluateVGHGH(T x, T y, T z)
makes a table of and their gradients up to Lmax.
MakeReturn< BinaryNode< FnPow, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t, typename CreateLeaf< Vector< T2, C2 > >::Leaf_t > >::Expression_t pow(const Vector< T1, C1 > &l, const Vector< T2, C2 > &r)
size_type size() const
return the current size
MakeReturn< UnaryNode< FnArcTan, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t atan(const Vector< T1, C1 > &l)
std::complex< double > Ylm(int l, int m, const std::vector< double > &sph)
T * data()
return the base
void batched_evaluateV(const OffloadArray3D &xyz, OffloadArray3D &Ylm) const
evaluate V for multiple electrons and multiple pbc images
void evaluateV(T x, T y, T z, T *Ylm) const
compute Ylm
MakeReturn< UnaryNode< FnSqrt, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t sqrt(const Vector< T1, C1 > &l)
const std::shared_ptr< OffloadVector > factorL_ptr_
pre-evaluated factor
void evaluateVGH(T x, T y, T z)
makes a table of and their gradients up to Lmax.
int Lmax
maximum angular momentum for the center
static int index(int l, int m)
returns the index/locator for ( ) combo,
A D-dimensional Array class based on PETE.
size_type size() const
return the physical size
static void evaluateVGL_impl(const T x, const T y, const T z, T *restrict Ylm_vgl, int lmax, const T *factorL, const T *factorLM, const T *factor2L, const T *normfactor, size_t offset)
compute Ylm_vgl for single position
OffloadVector & factor2L_
factor2L reference
const std::shared_ptr< OffloadVector > factorLM_ptr_
pre-evaluated factor
OffloadVector & factorLM_
factorLM reference