22 #ifndef QMCPLUSPLUS_MULTI_FUNCTOR_QUINTIC_SPLINE_SET_H 23 #define QMCPLUSPLUS_MULTI_FUNCTOR_QUINTIC_SPLINE_SET_H 46 inline void set(T ri, T rf,
int n)
50 double ratio = rf / ri;
52 double dlog_ratio = log_ratio /
static_cast<double>(
n - 1);
56 for (
int i = 0; i <
n; i++)
60 PRAGMA_OFFLOAD(
"omp declare target")
61 static inline T
getCL(T r,
int& loc,
double one_over_log_delta, T
lower_bound,
double log_delta)
66 PRAGMA_OFFLOAD(
"omp end declare target")
72 throw std::domain_error(
"r value " + std::to_string(r) +
">=" + std::to_string(
upper_bound) +
"\n");
139 if (r <
myGrid.lower_bound)
141 const T dr = r -
myGrid.lower_bound;
142 const T* restrict a =
coeffs_[0];
149 const auto cL =
myGrid.getCLForQuintic(r, loc);
150 const size_t offset = loc * 6;
153 const T* restrict a =
coeffs_[offset + 0];
154 const T* restrict b =
coeffs_[offset + 1];
155 const T* restrict c =
coeffs_[offset + 2];
156 const T* restrict d =
coeffs_[offset + 3];
157 const T* restrict
e =
coeffs_[offset + 4];
158 const T* restrict f =
coeffs_[offset + 5];
160 u[i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (
e[i] + cL * f[i]))));
173 const size_t nElec = r.
size(0);
174 const size_t Nxyz = r.
size(1);
175 assert(nElec == u.
size(0));
176 assert(Nxyz == u.
size(1));
177 const size_t nRnl = u.
size(2);
178 const size_t nR = nElec * Nxyz;
180 double one_over_log_delta =
myGrid.OneOverLogDelta;
182 T log_delta =
myGrid.LogDelta;
184 auto* r_ptr = r.
data();
185 auto* u_ptr = u.
data();
193 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for \ 194 map(to:coeff_ptr[:coefsize], first_deriv_ptr[:nsplines]) \ 195 map(to:r_ptr[:nR], u_ptr[:nRnl*nR])")
196 for (
int ir = 0; ir < nR; ir++)
198 if (r_ptr[ir] >= Rmax)
200 for (
int i = 0; i < nsplines; ++i)
201 u_ptr[ir * nRnl + i] = 0.0;
206 for (
int i = 0; i < nsplines; ++i)
207 u_ptr[ir * nRnl + i] = coeff_ptr[i] + first_deriv_ptr[i] * dr;
213 const size_t offset = loc * 6;
214 const T* restrict a = coeff_ptr + nCols * (offset + 0);
215 const T* restrict b = coeff_ptr + nCols * (offset + 1);
216 const T* restrict c = coeff_ptr + nCols * (offset + 2);
217 const T* restrict d = coeff_ptr + nCols * (offset + 3);
218 const T* restrict
e = coeff_ptr + nCols * (offset + 4);
219 const T* restrict f = coeff_ptr + nCols * (offset + 5);
220 for (
int i = 0; i < nsplines; ++i)
221 u_ptr[ir * nRnl + i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (
e[i] + cL * f[i]))));
238 const size_t nElec = r.
size(0);
239 const size_t Nxyz = r.
size(1);
240 assert(3 == vgl.
size(0));
241 assert(nElec == vgl.
size(1));
242 assert(Nxyz == vgl.
size(2));
243 const size_t nRnl = vgl.
size(3);
244 const size_t nR = nElec * Nxyz;
246 double one_over_log_delta =
myGrid.OneOverLogDelta;
248 T dlog_ratio =
myGrid.LogDelta;
250 auto* r_ptr = r.
data();
251 auto* u_ptr = vgl.
data_at(0, 0, 0, 0);
252 auto* du_ptr = vgl.
data_at(1, 0, 0, 0);
253 auto* d2u_ptr = vgl.
data_at(2, 0, 0, 0);
262 constexpr T cthree(3);
263 constexpr T cfour(4);
264 constexpr T cfive(5);
269 PRAGMA_OFFLOAD(
"omp target teams distribute parallel for \ 270 map(to: first_deriv_ptr[:nsplines], coeff_ptr[:coefsize]) \ 271 map(to: r_ptr[:nR], u_ptr[:nRnl*nR], du_ptr[:nRnl*nR], d2u_ptr[:nRnl*nR])")
272 for (
size_t ir = 0; ir < nR; ir++)
274 if (r_ptr[ir] >= Rmax)
276 for (
size_t i = 0; i < nsplines; ++i)
278 u_ptr[ir * nRnl + i] = 0.0;
279 du_ptr[ir * nRnl + i] = 0.0;
280 d2u_ptr[ir * nRnl + i] = 0.0;
286 const T* restrict a = coeff_ptr;
288 for (
size_t i = 0; i < nsplines; ++i)
290 u_ptr[ir * nRnl + i] = a[i] + first_deriv_ptr[i] * dr;
291 du_ptr[ir * nRnl + i] = first_deriv_ptr[i];
292 d2u_ptr[ir * nRnl + i] = 0.0;
300 const size_t offset = loc * 6;
301 const T* restrict a = coeff_ptr + nCols * (offset + 0);
302 const T* restrict b = coeff_ptr + nCols * (offset + 1);
303 const T* restrict c = coeff_ptr + nCols * (offset + 2);
304 const T* restrict d = coeff_ptr + nCols * (offset + 3);
305 const T* restrict
e = coeff_ptr + nCols * (offset + 4);
306 const T* restrict f = coeff_ptr + nCols * (offset + 5);
307 for (
size_t i = 0; i < nsplines; ++i)
309 u_ptr[ir * nRnl + i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (
e[i] + cL * f[i]))));
310 du_ptr[ir * nRnl + i] =
311 b[i] + cL * (ctwo * c[i] + cL * (cthree * d[i] + cL * (cfour *
e[i] + cL * f[i] * cfive)));
312 d2u_ptr[ir * nRnl + i] = ctwo * c[i] + cL * (csix * d[i] + cL * (c12 *
e[i] + cL * f[i] * c20));
317 inline void evaluate(T r, T* restrict u, T* restrict du, T* restrict d2u)
const 319 if (r <
myGrid.lower_bound)
321 const T dr = r -
myGrid.lower_bound;
322 const T* restrict a =
coeffs_[0];
333 const auto cL =
myGrid.getCLForQuintic(r, loc);
334 const size_t offset = loc * 6;
337 constexpr T cthree(3);
338 constexpr T cfour(4);
339 constexpr T cfive(5);
344 const T* restrict a =
coeffs_[offset + 0];
345 const T* restrict b =
coeffs_[offset + 1];
346 const T* restrict c =
coeffs_[offset + 2];
347 const T* restrict d =
coeffs_[offset + 3];
348 const T* restrict
e =
coeffs_[offset + 4];
349 const T* restrict f =
coeffs_[offset + 5];
353 u[i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (
e[i] + cL * f[i]))));
354 du[i] = b[i] + cL * (ctwo * c[i] + cL * (cthree * d[i] + cL * (cfour *
e[i] + cL * f[i] * cfive)));
355 d2u[i] = ctwo * c[i] + cL * (csix * d[i] + cL * (c12 *
e[i] + cL * f[i] * c20));
361 inline void evaluate(T r, T* restrict u, T* restrict du, T* restrict d2u, T* restrict d3u)
const 363 if (r <
myGrid.lower_bound)
365 const T dr = r -
myGrid.lower_bound;
366 const T* restrict a =
coeffs_[0];
378 const auto cL =
myGrid.getCLForQuintic(r, loc);
379 const size_t offset = loc * 6;
382 constexpr T cthree(3);
383 constexpr T cfour(4);
384 constexpr T cfive(5);
391 const T* restrict a =
coeffs_[offset + 0];
392 const T* restrict b =
coeffs_[offset + 1];
393 const T* restrict c =
coeffs_[offset + 2];
394 const T* restrict d =
coeffs_[offset + 3];
395 const T* restrict
e =
coeffs_[offset + 4];
396 const T* restrict f =
coeffs_[offset + 5];
400 u[i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (
e[i] + cL * f[i]))));
401 du[i] = b[i] + cL * (ctwo * c[i] + cL * (cthree * d[i] + cL * (cfour *
e[i] + cL * f[i] * cfive)));
402 d2u[i] = ctwo * c[i] + cL * (csix * d[i] + cL * (c12 *
e[i] + cL * f[i] * c20));
403 d3u[i] = csix * d[i] + cL * (c24 *
e[i] + cL * f[i] * c60);
415 template<
typename GT>
418 myGrid.set(agrid.rmin(), agrid.rmax(), agrid.size());
421 throw std::runtime_error(
"MultiQuinticSpline1D::initialize cannot reinitialize coeffs.");
425 coeffs_.
resize((order + 1) * agrid.size(), getAlignedSize<T>(norbs));
429 template<
typename T1>
435 const T1* restrict
A = in.
m_Y.data();
436 const T1* restrict
B = in.
B.data();
437 const T1* restrict
C = in.
m_Y2.data();
438 const T1* restrict D = in.
D.data();
439 const T1* restrict E = in.
E.data();
440 const T1* restrict F = in.
F.data();
443 const size_t num_points = in.
size();
444 for (
size_t i = 0; i < num_points; ++i)
446 out[(i * 6 + 0) * ncols + ispline] = static_cast<T>(
A[i]);
447 out[(i * 6 + 1) * ncols + ispline] = static_cast<T>(
B[i]);
448 out[(i * 6 + 2) * ncols + ispline] = static_cast<T>(
C[i]);
449 out[(i * 6 + 3) * ncols + ispline] = static_cast<T>(D[i]);
450 out[(i * 6 + 4) * ncols + ispline] = static_cast<T>(E[i]);
451 out[(i * 6 + 5) * ncols + ispline] = static_cast<T>(F[i]);
467 extern template class MultiQuinticSpline1D<float>;
468 extern template class MultiQuinticSpline1D<double>;
multivalue implementation for OneDimQuintic Real valued only calling any eval method with r >= r_max ...
data_type m_Y
data for the function on the grid
helper functions for EinsplineSetBuilder
void batched_evaluateVGL(const OffloadArray2D &r, OffloadArray4D &vgl, T Rmax) const
evaluate value, first deriv, second deriv of MultiQuinticSpline1D for multiple electrons and multiple...
Type_t * data_at(const std::array< SIZET, D > &indices)
size_t num_splines_
number of splines
LogGridLight< T > myGrid
will be the real grid
Vector< T, OffloadPinnedAllocator< T > > & first_deriv_
void resize(size_type n, size_type m)
Resize the container.
TinyVector< T, 3 > lower_bound(const TinyVector< T, 3 > &a, const TinyVector< T, 3 > &b)
helper function to determine the lower bound of a domain (need to move up)
An abstract base class to implement a One-Dimensional grid.
Decalaration of One-Dimesional grids.
void evaluate(T r, T *restrict u, T *restrict du, T *restrict d2u) const
void updateTo(size_type size=0, std::ptrdiff_t offset=0)
size_t spline_order
order of spline
void evaluate(T r, T *restrict u, T *restrict du, T *restrict d2u, T *restrict d3u) const
compute upto 3rd derivatives
const std::shared_ptr< CoeffType > coeffs_ptr_
coeffs[6*spline_points][num_splines+padding]
MakeReturn< UnaryNode< FnExp, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t exp(const Vector< T1, C1 > &l)
std::vector< T > r_values
OMPallocator is an allocator with fused device and dualspace allocator functionality.
MakeReturn< UnaryNode< FnLog, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t log(const Vector< T1, C1 > &l)
void finalize()
finalize the construction
void batched_evaluate(const OffloadArray2D &r, OffloadArray3D &u, T Rmax) const
evaluate MultiQuinticSpline1D for multiple electrons and multiple pbc images
void evaluate(T r, T *restrict u) const
void add_spline(int ispline, OneDimQuinticSpline< T1 > &in)
int size() const
return the number of data points
double B(double x, int k, int i, const std::vector< double > &t)
static T getCL(T r, int &loc, double one_over_log_delta, T lower_bound, double log_delta)
A D-dimensional Array class based on PETE.
int getNumSplines() const
T getCLForQuintic(T r, int &loc) const
void initialize(GT &agrid, int norbs, int order=5)
initialize grid and container
const std::shared_ptr< Vector< T, OffloadPinnedAllocator< T > > > first_deriv_ptr_
void setNumSplines(int num_splines)