QMCPACK
MultiQuinticSpline1D.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
8 // Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
9 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
10 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
11 //
12 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
13 //////////////////////////////////////////////////////////////////////////////////////
14 
15 
16 /** @file
17  * Assume that coeffs.D1 and the LogLightGrid
18  * r_values.size() are equal
19  * Therefore r must be < r_max
20  */
21 
22 #ifndef QMCPLUSPLUS_MULTI_FUNCTOR_QUINTIC_SPLINE_SET_H
23 #define QMCPLUSPLUS_MULTI_FUNCTOR_QUINTIC_SPLINE_SET_H
24 
25 #include <algorithm>
26 
29 #include "OhmmsPETE/OhmmsMatrix.h"
30 #include "OhmmsPETE/OhmmsArray.h"
33 #include <cassert>
34 
35 namespace qmcplusplus
36 {
37 template<typename T>
39 {
43  double LogDelta;
44  std::vector<T> r_values;
45 
46  inline void set(T ri, T rf, int n)
47  {
48  lower_bound = ri;
49  upper_bound = rf;
50  double ratio = rf / ri;
51  double log_ratio = std::log(ratio);
52  double dlog_ratio = log_ratio / static_cast<double>(n - 1);
53  OneOverLogDelta = 1.0 / dlog_ratio;
54  LogDelta = dlog_ratio;
55  r_values.resize(n);
56  for (int i = 0; i < n; i++)
57  r_values[i] = ri * std::exp(i * dlog_ratio);
58  }
59 
60  PRAGMA_OFFLOAD("omp declare target")
61  static inline T getCL(T r, int& loc, double one_over_log_delta, T lower_bound, double log_delta)
62  {
63  loc = static_cast<int>(std::log(r / lower_bound) * one_over_log_delta);
64  return r - lower_bound * std::exp(loc * log_delta);
65  }
66  PRAGMA_OFFLOAD("omp end declare target")
67 
68  inline int locate(T r) const
69  {
70  int loc = static_cast<int>(std::log(r / lower_bound) * OneOverLogDelta);
71  if (loc >= r_values.size())
72  throw std::domain_error("r value " + std::to_string(r) + ">=" + std::to_string(upper_bound) + "\n");
73  return loc;
74  }
75 
76  inline T operator()(int i)
77  {
78  //return static_cast<T>(lower_bound*std::exp(i*LogDelta));
79  return r_values[i];
80  }
81 
82  //CHECK MIXED PRECISION SENSITIVITY
83  inline T getCLForQuintic(T r, int& loc) const
84  {
85  loc = locate(r);
86  return r - r_values[loc];
87  }
88 
89  inline int size() { return r_values.size(); }
90 };
91 
92 /** multivalue implementation for OneDimQuintic
93  * Real valued only
94  * calling any eval method with r >= r_max will throw an exception
95  */
96 template<typename T>
98 {
99 public:
100  using RealType = T;
106 
107 private:
108  ///number of splines
109  size_t num_splines_;
110  ///order of spline
111  size_t spline_order;
112  ///maximum radius
113  T r_max;
114 
115  ///will be the real grid
117 
118  ///coeffs[6*spline_points][num_splines+padding]
119  const std::shared_ptr<CoeffType> coeffs_ptr_;
120  const std::shared_ptr<Vector<T, OffloadPinnedAllocator<T>>> first_deriv_ptr_;
121 
124 
125 public:
127  : coeffs_ptr_(std::make_shared<CoeffType>()),
128  first_deriv_ptr_(std::make_shared<Vector<T, OffloadPinnedAllocator<T>>>()),
131  {}
132 
133  MultiQuinticSpline1D(const MultiQuinticSpline1D& in) = default;
134 
135  inline T rmax() const { return myGrid.upper_bound; }
136 
137  inline void evaluate(T r, T* restrict u) const
138  {
139  if (r < myGrid.lower_bound)
140  {
141  const T dr = r - myGrid.lower_bound;
142  const T* restrict a = coeffs_[0];
143  for (size_t i = 0; i < num_splines_; ++i)
144  u[i] = a[i] + first_deriv_[i] * dr;
145  }
146  else
147  {
148  int loc;
149  const auto cL = myGrid.getCLForQuintic(r, loc);
150  const size_t offset = loc * 6;
151  //coeffs is an OhmmsMatrix and [] is a row access operator
152  //returning a pointer to 'row' which is normal type pointer []
153  const T* restrict a = coeffs_[offset + 0];
154  const T* restrict b = coeffs_[offset + 1];
155  const T* restrict c = coeffs_[offset + 2];
156  const T* restrict d = coeffs_[offset + 3];
157  const T* restrict e = coeffs_[offset + 4];
158  const T* restrict f = coeffs_[offset + 5];
159  for (size_t i = 0; i < num_splines_; ++i)
160  u[i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (e[i] + cL * f[i]))));
161  }
162  }
163 
164  /**
165  * @brief evaluate MultiQuinticSpline1D for multiple electrons and multiple pbc images
166  *
167  * @param [in] r electron distances [Nelec, Npbc]
168  * @param [out] u value of all splines at all electron distances [Nelec, Npbc, Nsplines]
169  * @param Rmax spline will evaluate to zero for any distance greater than or equal to Rmax
170  */
171  inline void batched_evaluate(const OffloadArray2D& r, OffloadArray3D& u, T Rmax) const
172  {
173  const size_t nElec = r.size(0);
174  const size_t Nxyz = r.size(1); // number of PBC images
175  assert(nElec == u.size(0));
176  assert(Nxyz == u.size(1));
177  const size_t nRnl = u.size(2); // number of splines
178  const size_t nR = nElec * Nxyz; // total number of positions to evaluate
179 
180  double one_over_log_delta = myGrid.OneOverLogDelta;
181  T lower_bound = myGrid.lower_bound;
182  T log_delta = myGrid.LogDelta;
183 
184  auto* r_ptr = r.data();
185  auto* u_ptr = u.data();
186 
187  auto* coeff_ptr = coeffs_.data();
188  auto* first_deriv_ptr = first_deriv_.data();
189  const size_t nCols = coeffs_.cols();
190  const size_t coefsize = coeffs_.size();
191  const int nsplines = num_splines_;
192 
193  PRAGMA_OFFLOAD("omp target teams distribute parallel for \
194  map(to:coeff_ptr[:coefsize], first_deriv_ptr[:nsplines]) \
195  map(to:r_ptr[:nR], u_ptr[:nRnl*nR])")
196  for (int ir = 0; ir < nR; ir++)
197  {
198  if (r_ptr[ir] >= Rmax)
199  {
200  for (int i = 0; i < nsplines; ++i)
201  u_ptr[ir * nRnl + i] = 0.0;
202  }
203  else if (r_ptr[ir] < lower_bound)
204  {
205  const T dr = r_ptr[ir] - lower_bound;
206  for (int i = 0; i < nsplines; ++i)
207  u_ptr[ir * nRnl + i] = coeff_ptr[i] + first_deriv_ptr[i] * dr;
208  }
209  else
210  {
211  int loc;
212  const auto cL = LogGridLight<T>::getCL(r_ptr[ir], loc, one_over_log_delta, lower_bound, log_delta);
213  const size_t offset = loc * 6;
214  const T* restrict a = coeff_ptr + nCols * (offset + 0);
215  const T* restrict b = coeff_ptr + nCols * (offset + 1);
216  const T* restrict c = coeff_ptr + nCols * (offset + 2);
217  const T* restrict d = coeff_ptr + nCols * (offset + 3);
218  const T* restrict e = coeff_ptr + nCols * (offset + 4);
219  const T* restrict f = coeff_ptr + nCols * (offset + 5);
220  for (int i = 0; i < nsplines; ++i)
221  u_ptr[ir * nRnl + i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (e[i] + cL * f[i]))));
222  }
223  }
224  }
225 
226  /**
227  * @brief evaluate value, first deriv, second deriv of MultiQuinticSpline1D for multiple electrons and multiple pbc images
228  *
229  * r is assumed to be up-to-date on the device when entering this function, and
230  * vgl will be up to date on the device when exiting this function
231  *
232  * @param [in] r electron distances [Nelec, Npbc]
233  * @param [out] vgl val/d1/d2 of all splines at all electron distances [3(val,d1,d2), Nelec, Npbc, Nsplines]
234  * @param Rmax spline and derivatives will evaluate to zero for any distance greater than or equal to Rmax
235  */
236  inline void batched_evaluateVGL(const OffloadArray2D& r, OffloadArray4D& vgl, T Rmax) const
237  {
238  const size_t nElec = r.size(0);
239  const size_t Nxyz = r.size(1); // number of PBC images
240  assert(3 == vgl.size(0));
241  assert(nElec == vgl.size(1));
242  assert(Nxyz == vgl.size(2));
243  const size_t nRnl = vgl.size(3); // number of splines
244  const size_t nR = nElec * Nxyz; // total number of positions to evaluate
245 
246  double one_over_log_delta = myGrid.OneOverLogDelta;
247  T lower_bound = myGrid.lower_bound;
248  T dlog_ratio = myGrid.LogDelta;
249 
250  auto* r_ptr = r.data();
251  auto* u_ptr = vgl.data_at(0, 0, 0, 0);
252  auto* du_ptr = vgl.data_at(1, 0, 0, 0);
253  auto* d2u_ptr = vgl.data_at(2, 0, 0, 0);
254 
255  auto* coeff_ptr = coeffs_.data();
256  auto* first_deriv_ptr = first_deriv_.data();
257  const size_t nCols = coeffs_.cols();
258  const size_t coefsize = coeffs_.size();
259  const auto nsplines = num_splines_;
260 
261  constexpr T ctwo(2);
262  constexpr T cthree(3);
263  constexpr T cfour(4);
264  constexpr T cfive(5);
265  constexpr T csix(6);
266  constexpr T c12(12);
267  constexpr T c20(20);
268 
269  PRAGMA_OFFLOAD("omp target teams distribute parallel for \
270  map(to: first_deriv_ptr[:nsplines], coeff_ptr[:coefsize]) \
271  map(to: r_ptr[:nR], u_ptr[:nRnl*nR], du_ptr[:nRnl*nR], d2u_ptr[:nRnl*nR])")
272  for (size_t ir = 0; ir < nR; ir++)
273  {
274  if (r_ptr[ir] >= Rmax)
275  {
276  for (size_t i = 0; i < nsplines; ++i)
277  {
278  u_ptr[ir * nRnl + i] = 0.0;
279  du_ptr[ir * nRnl + i] = 0.0;
280  d2u_ptr[ir * nRnl + i] = 0.0;
281  }
282  }
283  else if (r_ptr[ir] < lower_bound)
284  {
285  const T dr = r_ptr[ir] - lower_bound;
286  const T* restrict a = coeff_ptr;
287  // const T* restrict a = coeffs_[0];
288  for (size_t i = 0; i < nsplines; ++i)
289  {
290  u_ptr[ir * nRnl + i] = a[i] + first_deriv_ptr[i] * dr;
291  du_ptr[ir * nRnl + i] = first_deriv_ptr[i];
292  d2u_ptr[ir * nRnl + i] = 0.0;
293  }
294  }
295  else
296  {
297  int loc;
298  const auto cL = LogGridLight<T>::getCL(r_ptr[ir], loc, one_over_log_delta, lower_bound, dlog_ratio);
299  // const auto cL = myGrid.getCLForQuintic(r_list[ir], loc);
300  const size_t offset = loc * 6;
301  const T* restrict a = coeff_ptr + nCols * (offset + 0);
302  const T* restrict b = coeff_ptr + nCols * (offset + 1);
303  const T* restrict c = coeff_ptr + nCols * (offset + 2);
304  const T* restrict d = coeff_ptr + nCols * (offset + 3);
305  const T* restrict e = coeff_ptr + nCols * (offset + 4);
306  const T* restrict f = coeff_ptr + nCols * (offset + 5);
307  for (size_t i = 0; i < nsplines; ++i)
308  {
309  u_ptr[ir * nRnl + i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (e[i] + cL * f[i]))));
310  du_ptr[ir * nRnl + i] =
311  b[i] + cL * (ctwo * c[i] + cL * (cthree * d[i] + cL * (cfour * e[i] + cL * f[i] * cfive)));
312  d2u_ptr[ir * nRnl + i] = ctwo * c[i] + cL * (csix * d[i] + cL * (c12 * e[i] + cL * f[i] * c20));
313  }
314  }
315  }
316  }
317  inline void evaluate(T r, T* restrict u, T* restrict du, T* restrict d2u) const
318  {
319  if (r < myGrid.lower_bound)
320  {
321  const T dr = r - myGrid.lower_bound;
322  const T* restrict a = coeffs_[0];
323  for (size_t i = 0; i < num_splines_; ++i)
324  {
325  u[i] = a[i] + first_deriv_[i] * dr;
326  du[i] = first_deriv_[i];
327  d2u[i] = 0.0;
328  }
329  }
330  else
331  {
332  int loc;
333  const auto cL = myGrid.getCLForQuintic(r, loc);
334  const size_t offset = loc * 6;
335 
336  constexpr T ctwo(2);
337  constexpr T cthree(3);
338  constexpr T cfour(4);
339  constexpr T cfive(5);
340  constexpr T csix(6);
341  constexpr T c12(12);
342  constexpr T c20(20);
343 
344  const T* restrict a = coeffs_[offset + 0];
345  const T* restrict b = coeffs_[offset + 1];
346  const T* restrict c = coeffs_[offset + 2];
347  const T* restrict d = coeffs_[offset + 3];
348  const T* restrict e = coeffs_[offset + 4];
349  const T* restrict f = coeffs_[offset + 5];
350 
351  for (size_t i = 0; i < num_splines_; ++i)
352  {
353  u[i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (e[i] + cL * f[i]))));
354  du[i] = b[i] + cL * (ctwo * c[i] + cL * (cthree * d[i] + cL * (cfour * e[i] + cL * f[i] * cfive)));
355  d2u[i] = ctwo * c[i] + cL * (csix * d[i] + cL * (c12 * e[i] + cL * f[i] * c20));
356  }
357  }
358  }
359 
360  /** compute upto 3rd derivatives */
361  inline void evaluate(T r, T* restrict u, T* restrict du, T* restrict d2u, T* restrict d3u) const
362  {
363  if (r < myGrid.lower_bound)
364  {
365  const T dr = r - myGrid.lower_bound;
366  const T* restrict a = coeffs_[0];
367  for (size_t i = 0; i < num_splines_; ++i)
368  {
369  u[i] = a[i] + first_deriv_[i] * dr;
370  du[i] = first_deriv_[i];
371  d2u[i] = 0.0;
372  d3u[i] = 0.0;
373  }
374  }
375  else
376  {
377  int loc;
378  const auto cL = myGrid.getCLForQuintic(r, loc);
379  const size_t offset = loc * 6;
380 
381  constexpr T ctwo(2);
382  constexpr T cthree(3);
383  constexpr T cfour(4);
384  constexpr T cfive(5);
385  constexpr T csix(6);
386  constexpr T c12(12);
387  constexpr T c20(20);
388  constexpr T c24(24);
389  constexpr T c60(60);
390 
391  const T* restrict a = coeffs_[offset + 0];
392  const T* restrict b = coeffs_[offset + 1];
393  const T* restrict c = coeffs_[offset + 2];
394  const T* restrict d = coeffs_[offset + 3];
395  const T* restrict e = coeffs_[offset + 4];
396  const T* restrict f = coeffs_[offset + 5];
397 
398  for (size_t i = 0; i < num_splines_; ++i)
399  {
400  u[i] = a[i] + cL * (b[i] + cL * (c[i] + cL * (d[i] + cL * (e[i] + cL * f[i]))));
401  du[i] = b[i] + cL * (ctwo * c[i] + cL * (cthree * d[i] + cL * (cfour * e[i] + cL * f[i] * cfive)));
402  d2u[i] = ctwo * c[i] + cL * (csix * d[i] + cL * (c12 * e[i] + cL * f[i] * c20));
403  d3u[i] = csix * d[i] + cL * (c24 * e[i] + cL * f[i] * c60);
404  }
405  }
406  }
407 
408  /** initialize grid and container
409  * @param ri minimum grid point
410  * @param rf maximum grid point
411  * @param npts number of grid points
412  * @param n number of splines
413  * @param oreder 5=quintic and 3=cubic
414  */
415  template<typename GT>
416  void initialize(GT& agrid, int norbs, int order = 5)
417  {
418  myGrid.set(agrid.rmin(), agrid.rmax(), agrid.size());
419  r_max = myGrid.upper_bound;
420  if (coeffs_.size())
421  throw std::runtime_error("MultiQuinticSpline1D::initialize cannot reinitialize coeffs.");
422 
423  spline_order = order;
424  num_splines_ = norbs;
425  coeffs_.resize((order + 1) * agrid.size(), getAlignedSize<T>(norbs));
426  first_deriv_.resize(num_splines_);
427  }
428 
429  template<typename T1>
430  void add_spline(int ispline, OneDimQuinticSpline<T1>& in)
431  {
432  first_deriv_[ispline] = in.first_deriv;
433  //if(spline_order==QUINTIC)
434  {
435  const T1* restrict A = in.m_Y.data();
436  const T1* restrict B = in.B.data();
437  const T1* restrict C = in.m_Y2.data();
438  const T1* restrict D = in.D.data();
439  const T1* restrict E = in.E.data();
440  const T1* restrict F = in.F.data();
441  T* restrict out = coeffs_.data();
442  const size_t ncols = coeffs_.cols();
443  const size_t num_points = in.size();
444  for (size_t i = 0; i < num_points; ++i)
445  {
446  out[(i * 6 + 0) * ncols + ispline] = static_cast<T>(A[i]);
447  out[(i * 6 + 1) * ncols + ispline] = static_cast<T>(B[i]);
448  out[(i * 6 + 2) * ncols + ispline] = static_cast<T>(C[i]);
449  out[(i * 6 + 3) * ncols + ispline] = static_cast<T>(D[i]);
450  out[(i * 6 + 4) * ncols + ispline] = static_cast<T>(E[i]);
451  out[(i * 6 + 5) * ncols + ispline] = static_cast<T>(F[i]);
452  }
453  }
454  }
455 
456  /// finalize the construction
457  void finalize()
458  {
459  first_deriv_.updateTo();
460  coeffs_.updateTo();
461  }
462 
463  int getNumSplines() const { return num_splines_; }
464  void setNumSplines(int num_splines) { num_splines_ = num_splines; }
465 };
466 
467 extern template class MultiQuinticSpline1D<float>;
468 extern template class MultiQuinticSpline1D<double>;
469 } // namespace qmcplusplus
470 #endif
multivalue implementation for OneDimQuintic Real valued only calling any eval method with r >= r_max ...
data_type m_Y
data for the function on the grid
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
void batched_evaluateVGL(const OffloadArray2D &r, OffloadArray4D &vgl, T Rmax) const
evaluate value, first deriv, second deriv of MultiQuinticSpline1D for multiple electrons and multiple...
Type_t * data_at(const std::array< SIZET, D > &indices)
Definition: OhmmsArray.h:104
Type_t * data()
Definition: OhmmsArray.h:87
size_t num_splines_
number of splines
LogGridLight< T > myGrid
will be the real grid
Vector< T, OffloadPinnedAllocator< T > > & first_deriv_
void resize(size_type n, size_type m)
Resize the container.
Definition: OhmmsMatrix.h:99
TinyVector< T, 3 > lower_bound(const TinyVector< T, 3 > &a, const TinyVector< T, 3 > &b)
helper function to determine the lower bound of a domain (need to move up)
size_type cols() const
Definition: OhmmsMatrix.h:78
An abstract base class to implement a One-Dimensional grid.
Decalaration of One-Dimesional grids.
void evaluate(T r, T *restrict u, T *restrict du, T *restrict d2u) const
size_type size() const
Definition: OhmmsMatrix.h:76
void updateTo(size_type size=0, std::ptrdiff_t offset=0)
Definition: OhmmsMatrix.h:371
void evaluate(T r, T *restrict u, T *restrict du, T *restrict d2u, T *restrict d3u) const
compute upto 3rd derivatives
size_t size() const
Definition: OhmmsArray.h:57
const std::shared_ptr< CoeffType > coeffs_ptr_
coeffs[6*spline_points][num_splines+padding]
MakeReturn< UnaryNode< FnExp, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t exp(const Vector< T1, C1 > &l)
OMPallocator is an allocator with fused device and dualspace allocator functionality.
MakeReturn< UnaryNode< FnLog, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t log(const Vector< T1, C1 > &l)
void finalize()
finalize the construction
void batched_evaluate(const OffloadArray2D &r, OffloadArray3D &u, T Rmax) const
evaluate MultiQuinticSpline1D for multiple electrons and multiple pbc images
void evaluate(T r, T *restrict u) const
void add_spline(int ispline, OneDimQuinticSpline< T1 > &in)
int size() const
return the number of data points
double B(double x, int k, int i, const std::vector< double > &t)
static T getCL(T r, int &loc, double one_over_log_delta, T lower_bound, double log_delta)
A D-dimensional Array class based on PETE.
Definition: OhmmsArray.h:25
T getCLForQuintic(T r, int &loc) const
void initialize(GT &agrid, int norbs, int order=5)
initialize grid and container
const std::shared_ptr< Vector< T, OffloadPinnedAllocator< T > > > first_deriv_ptr_