QMCPACK
SplineC2ROMPTarget.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 
13 /** @file SplineC2ROMPTarget.h
14  *
15  * class to handle complex splines to real orbitals with splines of arbitrary precision
16  * splines storage and computation is offloaded to accelerators using OpenMP target
17  */
18 #ifndef QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H
19 #define QMCPLUSPLUS_SPLINE_C2R_OMPTARGET_H
20 
21 #include <memory>
24 #include "spline2/MultiBspline.hpp"
26 #include "Utilities/FairDivide.h"
27 #include "Utilities/TimerManager.h"
28 #include <ResourceHandle.h>
30 
31 namespace qmcplusplus
32 {
33 /** class to match std::complex<ST> spline with BsplineSet::ValueType (real) SPOs with OpenMP offload
34  * @tparam ST precision of spline
35  *
36  * Requires temporage storage and multiplication of phase vectors
37  * The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded.
38  * Calling assign_v assign_vgl should be restricted to the actual number of complex splines (kPoints.size()).
39  * The first nComplexBands complex splines produce 2 real orbitals.
40  * The rest complex splines produce 1 real orbital.
41  * All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize.
42  */
43 template<typename ST>
45 {
46 public:
47  using SplineType = typename bspline_traits<ST, 3>::SplineType;
48  using BCType = typename bspline_traits<ST, 3>::BCType;
49  using DataType = ST;
51  using SingleSplineType = UBspline_3d_d;
52  // types for evaluation results
53  using TT = typename BsplineSet::ValueType;
58 
63 
64  template<typename DT>
66  template<typename DT>
68 
69 private:
70  /// timer for offload portion
72  ///primitive cell
74  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian
76  ///number of complex bands
78  ///multi bspline set
79  std::shared_ptr<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>> SplineInst;
80 
81  std::shared_ptr<OffloadVector<ST>> mKK;
82  std::shared_ptr<OffloadPosVector<ST>> myKcart;
83  std::shared_ptr<OffloadVector<ST>> GGt_offload;
84  std::shared_ptr<OffloadVector<ST>> PrimLattice_G_offload;
85 
87 
88  ///team private ratios for reduction, numVP x numTeams
90  ///offload scratch space, dynamically resized to the maximal need
92  ///result scratch space, dynamically resized to the maximal need
94  ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer
96  ///position scratch space, used to avoid allocation on the fly and faster transfer
98 
102  const RefVector<ValueVector>& psi_v_list,
103  const RefVector<GradVector>& dpsi_v_list,
104  const RefVector<ValueVector>& d2psi_v_list) const;
105 
106 protected:
107  /// intermediate result vectors
113 
114 public:
115  SplineC2ROMPTarget(const std::string& my_name)
116  : BsplineSet(my_name),
117  offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)),
118  nComplexBands(0),
119  GGt_offload(std::make_shared<OffloadVector<ST>>(9)),
120  PrimLattice_G_offload(std::make_shared<OffloadVector<ST>>(9))
121  {}
122 
124 
125  virtual std::string getClassName() const override { return "SplineC2ROMPTarget"; }
126  virtual std::string getKeyword() const override { return "SplineC2R"; }
127  bool isComplex() const override { return true; };
128  virtual bool isOMPoffload() const override { return true; }
129 
130  void createResource(ResourceCollection& collection) const override
131  {
132  auto resource_index = collection.addResource(std::make_unique<SplineOMPTargetMultiWalkerMem<ST, TT>>());
133  }
134 
135  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const override
136  {
137  assert(this == &spo_list.getLeader());
138  auto& phi_leader = spo_list.getCastedLeader<SplineC2ROMPTarget<ST>>();
140  }
141 
142  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const override
143  {
144  assert(this == &spo_list.getLeader());
145  auto& phi_leader = spo_list.getCastedLeader<SplineC2ROMPTarget<ST>>();
146  collection.takebackResource(phi_leader.mw_mem_handle_);
147  }
148 
149  std::unique_ptr<SPOSet> makeClone() const override { return std::make_unique<SplineC2ROMPTarget>(*this); }
150 
151  inline void resizeStorage(size_t n, size_t nvals)
152  {
153  init_base(n);
154  size_t npad = getAlignedSize<ST>(2 * n);
155  myV.resize(npad);
156  myG.resize(npad);
157  myL.resize(npad);
158  myH.resize(npad);
159  mygH.resize(npad);
160  }
161 
162  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
163 
165  {
166  if (comm->size() == 1)
167  return;
168  const int Nbands = kPoints.size();
169  const int Nbandgroups = comm->size();
170  offset.resize(Nbandgroups + 1, 0);
171  FairDivideLow(Nbands, Nbandgroups, offset);
172 
173  for (size_t ib = 0; ib < offset.size(); ib++)
174  offset[ib] = offset[ib] * 2;
175  gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset);
176  }
177 
178  template<typename GT, typename BCT>
179  void create_spline(GT& xyz_g, BCT& xyz_bc)
180  {
181  resize_kpoints();
182  SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>();
183  SplineInst->create(xyz_g, xyz_bc, myV.size());
184 
185  app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
186  << "for the coefficients in 3D spline orbital representation" << std::endl;
187  }
188 
189  /// this routine can not be called from threaded region
190  void finalizeConstruction() override
191  {
192  // map the SplineInst->getSplinePtr() structure to GPU
193  auto* MultiSpline = SplineInst->getSplinePtr();
194  auto* restrict coefs = MultiSpline->coefs;
195  // attach pointers on the device to achieve deep copy
196  PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])")
197  {
198  MultiSpline->coefs = coefs;
199  }
200 
201  // transfer static data to GPU
202  auto* mKK_ptr = mKK->data();
203  PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])")
204  auto* myKcart_ptr = myKcart->data();
205  PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])")
206  for (uint32_t i = 0; i < 9; i++)
207  {
208  (*GGt_offload)[i] = GGt[i];
209  (*PrimLattice_G_offload)[i] = PrimLattice.G[i];
210  }
211  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
212  PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])")
213  auto* GGt_ptr = GGt_offload->data();
214  PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])")
215  }
216 
217  inline void flush_zero() { SplineInst->flush_zero(); }
218 
219  /** remap kPoints to pack the double copy */
220  inline void resize_kpoints()
221  {
222  nComplexBands = this->remap_kpoints();
223  const int nk = kPoints.size();
224  mKK = std::make_shared<OffloadVector<ST>>(nk);
225  myKcart = std::make_shared<OffloadPosVector<ST>>(nk);
226  for (size_t i = 0; i < nk; ++i)
227  {
228  (*mKK)[i] = -dot(kPoints[i], kPoints[i]);
229  (*myKcart)(i) = kPoints[i];
230  }
231  }
232 
233  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
234 
235  bool read_splines(hdf_archive& h5f);
236 
237  bool write_splines(hdf_archive& h5f);
238 
239  void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
240 
241  virtual void evaluateValue(const ParticleSet& P, const int iat, ValueVector& psi) override;
242 
243  virtual void evaluateDetRatios(const VirtualParticleSet& VP,
244  ValueVector& psi,
245  const ValueVector& psiinv,
246  std::vector<ValueType>& ratios) override;
247 
248  virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSet>& spo_list,
250  const RefVector<ValueVector>& psi_list,
251  const std::vector<const ValueType*>& invRow_ptr_list,
252  std::vector<std::vector<ValueType>>& ratios_list) const override;
253 
254  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian
255  */
256  void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
257 
258  virtual void evaluateVGL(const ParticleSet& P,
259  const int iat,
260  ValueVector& psi,
261  GradVector& dpsi,
262  ValueVector& d2psi) override;
263 
264  virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSet>& sa_list,
265  const RefVectorWithLeader<ParticleSet>& P_list,
266  int iat,
267  const RefVector<ValueVector>& psi_v_list,
268  const RefVector<GradVector>& dpsi_v_list,
269  const RefVector<ValueVector>& d2psi_v_list) const override;
270 
271  virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSet>& spo_list,
272  const RefVectorWithLeader<ParticleSet>& P_list,
273  int iat,
274  const std::vector<const ValueType*>& invRow_ptr_list,
275  OffloadMWVGLArray& phi_vgl_v,
276  std::vector<ValueType>& ratios,
277  std::vector<GradType>& grads) const override;
278 
279  void assign_vgh(const PointType& r,
280  ValueVector& psi,
281  GradVector& dpsi,
282  HessVector& grad_grad_psi,
283  int first,
284  int last) const;
285 
286  virtual void evaluateVGH(const ParticleSet& P,
287  const int iat,
288  ValueVector& psi,
289  GradVector& dpsi,
290  HessVector& grad_grad_psi) override;
291 
292  void assign_vghgh(const PointType& r,
293  ValueVector& psi,
294  GradVector& dpsi,
295  HessVector& grad_grad_psi,
296  GGGVector& grad_grad_grad_psi,
297  int first = 0,
298  int last = -1) const;
299 
300  virtual void evaluateVGHGH(const ParticleSet& P,
301  const int iat,
302  ValueVector& psi,
303  GradVector& dpsi,
304  HessVector& grad_grad_psi,
305  GGGVector& grad_grad_grad_psi) override;
306 
307  virtual void evaluate_notranspose(const ParticleSet& P,
308  int first,
309  int last,
310  ValueMatrix& logdet,
311  GradMatrix& dlogdet,
312  ValueMatrix& d2logdet) override;
313 
314  template<class BSPLINESPO>
315  friend class SplineSetReader;
316  friend struct BsplineReader;
317 };
318 
319 extern template class SplineC2ROMPTarget<float>;
320 extern template class SplineC2ROMPTarget<double>;
321 
322 } // namespace qmcplusplus
323 #endif
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const override
acquire a shared resource from collection
void resize(size_type n, Type_t val=Type_t())
Resize the container.
Definition: OhmmsVector.h:166
OrbitalSetTraits< ValueType >::HessVector HessVector
Definition: SPOSet.h:53
Vector< ST, OffloadPinnedAllocator< ST > > multi_pos_copy
position scratch space, used to avoid allocation on the fly and faster transfer
Fixed-size array.
Definition: OhmmsTinyMeta.h:30
Matrix< TT, OffloadPinnedAllocator< TT > > ratios_private
team private ratios for reduction, numVP x numTeams
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
void takebackResource(ResourceHandle< RS > &res_handle)
Vector< ST, aligned_allocator< ST > > vContainer_type
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
timer_manager class.
BsplineSet is the base class for SplineC2C, SplineC2R, SplineR2R.
Definition: BsplineSet.h:34
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const override
evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ra...
std::ostream & app_log()
Definition: OutputManager.h:65
virtual void evaluateVGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) override
evaluate the values, gradients and hessians of this single-particle orbital set
Vector< TT, OffloadPinnedAllocator< TT > > results_scratch
result scratch space, dynamically resized to the maximal need
virtual void mw_evaluateDetRatios(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType *> &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const override
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers ...
ResourceHandle manages the temporary resource referenced from a collection.
CrystalLattice< ST, 3 > PrimLattice
primitive cell
A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet Virtu...
Soa Container for D-dim vectors.
OrbitalSetTraits< ValueType >::ValueMatrix ValueMatrix
Definition: SPOSet.h:50
void assign_vgl_from_l(const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)
assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian ...
NewTimer & offload_timer_
timer for offload portion
class to handle hdf file
Definition: hdf_archive.h:51
Timer accumulates time and call counts.
Definition: NewTimer.h:135
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
void finalizeConstruction() override
this routine can not be called from threaded region
OrbitalSetTraits< ValueType >::ValueVector ValueVector
int size() const
return the number of tasks
Definition: Communicate.h:118
bool read_splines(hdf_archive &h5f)
void evaluateVGLMultiPos(const Vector< ST, OffloadPinnedAllocator< ST >> &multi_pos_copy, Vector< ST, OffloadPinnedAllocator< ST >> &offload_scratch, Vector< TT, OffloadPinnedAllocator< TT >> &results_scratch, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
Tensor< ST, 3 > GGt
, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian
void FairDivideLow(int ntot, int npart, IV &adist)
partition ntot elements among npart
Definition: FairDivide.h:114
OrbitalSetTraits< ValueType >::GradMatrix GradMatrix
Definition: SPOSet.h:52
Wrapping information on parallelism.
Definition: Communicate.h:68
void assign_vgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
void bcast_tables(Communicate *comm)
Each SplineC2X needs a reader derived from BsplineReader.
Definition: BsplineReader.h:39
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
void assign_vghgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
General SplineSetReader to handle any unitcell.
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
std::vector< int > offset
band offsets used for communication
Definition: BsplineSet.h:56
vContainer_type myV
intermediate result vectors
A collection of functions for dividing fairly.
size_type size() const
return the current size
Definition: OhmmsVector.h:162
class to match std::complex<ST> spline with BsplineSet::ValueType (real) SPOs with OpenMP offload ...
Array< ValueType, 3, OffloadPinnedAllocator< ValueType > > OffloadMWVGLArray
Definition: SPOSet.h:58
virtual void mw_evaluateVGL(const RefVectorWithLeader< SPOSet > &sa_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const override
evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walker...
bool write_splines(hdf_archive &h5f)
QTBase::ValueType ValueType
Definition: Configuration.h:60
ResourceHandle< SplineOMPTargetMultiWalkerMem< ST, TT > > mw_mem_handle_
void create_spline(GT &xyz_g, BCT &xyz_bc)
OrbitalSetTraits< ValueType >::ValueVector ValueVector
Definition: SPOSet.h:49
virtual void evaluateDetRatios(const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios) override
evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
std::unique_ptr< SPOSet > makeClone() const override
make a clone of itself every derived class must implement this to have threading working correctly...
typename bspline_traits< ST, 3 >::BCType BCType
virtual std::string getKeyword() const override
BsplineSet is a SPOSet derived class and serves as a base class for B-spline SPO C2C/C2R/R2R implemen...
virtual void evaluateVGHGH(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) override
evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set ...
std::shared_ptr< OffloadVector< ST > > mKK
OMPallocator is an allocator with fused device and dualspace allocator functionality.
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
void createResource(ResourceCollection &collection) const override
initialize a shared resource and hand it to collection
std::vector< std::reference_wrapper< T > > RefVector
Vector< TT, OffloadPinnedAllocator< TT > > psiinv_pos_copy
psiinv and position scratch space, used to avoid allocation on the fly and faster transfer ...
void gather_tables(Communicate *comm)
virtual bool isOMPoffload() const override
Query if this SPOSet uses OpenMP offload.
void set_spline(SingleSplineType *spline_r, SingleSplineType *spline_i, int twist, int ispline, int level)
OrbitalSetTraits< ValueType >::GradHessVector GGGVector
Definition: SPOSet.h:55
OrbitalSetTraits< ValueType >::GradVector GradVector
Definition: SPOSet.h:51
typename BsplineSet::ValueType TT
void init_base(int n)
Definition: BsplineSet.h:66
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
ResourceHandle< RS > lendResource()
virtual void evaluate_notranspose(const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, ValueMatrix &d2logdet) override
evaluate the values, gradients and laplacians of this single-particle orbital for [first...
void resize_kpoints()
remap kPoints to pack the double copy
void resize(size_type n)
resize myData
virtual void evaluateVGL(const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) override
evaluate the values, gradients and laplacians of this single-particle orbital set ...
SplineC2ROMPTarget(const std::string &my_name)
void gatherv(T *sb, T *rb, int n, IT &counts, IT &displ, int dest)
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const override
return a shared resource to collection
int remap_kpoints()
remap kpoints to group general kpoints & special kpoints
Definition: BsplineSet.h:76
virtual void evaluateValue(const ParticleSet &P, const int iat, ValueVector &psi) override
evaluate the values of this single-particle orbital set
void resizeStorage(size_t n, size_t nvals)
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
typename bspline_traits< ST, 3 >::SplineType SplineType
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52
virtual std::string getClassName() const override
return class name