QMCPACK
SplineC2COMPTarget< ST > Class Template Reference

class to match std::complex<ST> spline with BsplineSet::ValueType (complex) SPOs with OpenMP offload More...

+ Inheritance diagram for SplineC2COMPTarget< ST >:
+ Collaboration diagram for SplineC2COMPTarget< ST >:

Public Types

using SplineType = typename bspline_traits< ST, 3 >::SplineType
 
using BCType = typename bspline_traits< ST, 3 >::BCType
 
using DataType = ST
 
using PointType = TinyVector< ST, 3 >
 
using SingleSplineType = UBspline_3d_d
 
using ComplexT = typename BsplineSet::ValueType
 
using vContainer_type = Vector< ST, aligned_allocator< ST > >
 
using gContainer_type = VectorSoaContainer< ST, 3 >
 
using hContainer_type = VectorSoaContainer< ST, 6 >
 
using ghContainer_type = VectorSoaContainer< ST, 10 >
 
template<typename DT >
using OffloadVector = Vector< DT, OffloadAllocator< DT > >
 
template<typename DT >
using OffloadPosVector = VectorSoaContainer< DT, 3, OffloadAllocator< DT > >
 
- Public Types inherited from SPOSet
using ValueVector = OrbitalSetTraits< ValueType >::ValueVector
 
using ValueMatrix = OrbitalSetTraits< ValueType >::ValueMatrix
 
using GradVector = OrbitalSetTraits< ValueType >::GradVector
 
using GradMatrix = OrbitalSetTraits< ValueType >::GradMatrix
 
using HessVector = OrbitalSetTraits< ValueType >::HessVector
 
using HessMatrix = OrbitalSetTraits< ValueType >::HessMatrix
 
using GGGVector = OrbitalSetTraits< ValueType >::GradHessVector
 
using GGGMatrix = OrbitalSetTraits< ValueType >::GradHessMatrix
 
using SPOMap = std::map< std::string, const std::unique_ptr< const SPOSet > >
 
using OffloadMWVGLArray = Array< ValueType, 3, OffloadPinnedAllocator< ValueType > >
 
using OffloadMWVArray = Array< ValueType, 2, OffloadPinnedAllocator< ValueType > >
 
template<typename DT >
using OffloadMatrix = Matrix< DT, OffloadPinnedAllocator< DT > >
 
- Public Types inherited from QMCTraits
enum  { DIM = OHMMS_DIM, DIM_VGL = OHMMS_DIM + 2 }
 
using QTBase = QMCTypes< OHMMS_PRECISION, DIM >
 
using QTFull = QMCTypes< OHMMS_PRECISION_FULL, DIM >
 
using RealType = QTBase::RealType
 
using ComplexType = QTBase::ComplexType
 
using ValueType = QTBase::ValueType
 
using PosType = QTBase::PosType
 
using GradType = QTBase::GradType
 
using TensorType = QTBase::TensorType
 
using IndexType = OHMMS_INDEXTYPE
 define other types More...
 
using FullPrecRealType = QTFull::RealType
 
using FullPrecValueType = QTFull::ValueType
 
using PropertySetType = RecordNamedProperty< FullPrecRealType >
 define PropertyList_t More...
 
using PtclGrpIndexes = std::vector< std::pair< int, int > >
 

Public Member Functions

 SplineC2COMPTarget (const std::string &my_name)
 
 SplineC2COMPTarget (const SplineC2COMPTarget &in)
 
virtual std::string getClassName () const override
 return class name More...
 
virtual std::string getKeyword () const override
 
bool isComplex () const override
 
virtual bool isOMPoffload () const override
 Query if this SPOSet uses OpenMP offload. More...
 
void createResource (ResourceCollection &collection) const override
 initialize a shared resource and hand it to collection More...
 
void acquireResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const override
 acquire a shared resource from collection More...
 
void releaseResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const override
 return a shared resource to collection More...
 
std::unique_ptr< SPOSetmakeClone () const override
 make a clone of itself every derived class must implement this to have threading working correctly. More...
 
void resizeStorage (size_t n, size_t nvals)
 
void bcast_tables (Communicate *comm)
 
void gather_tables (Communicate *comm)
 
template<typename GT , typename BCT >
void create_spline (GT &xyz_g, BCT &xyz_bc)
 
void finalizeConstruction () override
 this routine can not be called from threaded region More...
 
void flush_zero ()
 
void resize_kpoints ()
 remap kPoints to pack the double copy More...
 
void set_spline (SingleSplineType *spline_r, SingleSplineType *spline_i, int twist, int ispline, int level)
 
bool read_splines (hdf_archive &h5f)
 
bool write_splines (hdf_archive &h5f)
 
void assign_v (const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
 
virtual void evaluateValue (const ParticleSet &P, const int iat, ValueVector &psi) override
 evaluate the values of this single-particle orbital set More...
 
virtual void evaluateDetRatios (const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios) override
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP More...
 
virtual void mw_evaluateDetRatios (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType *> &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const override
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers More...
 
void assign_vgl_from_l (const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)
 assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian More...
 
virtual void evaluateVGL (const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) override
 evaluate the values, gradients and laplacians of this single-particle orbital set More...
 
virtual void mw_evaluateVGL (const RefVectorWithLeader< SPOSet > &sa_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const override
 evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLandDetRatioGrads (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const override
 evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers. More...
 
void assign_vgh (const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
 
virtual void evaluateVGH (const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) override
 evaluate the values, gradients and hessians of this single-particle orbital set More...
 
void assign_vghgh (const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
 
virtual void evaluateVGHGH (const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) override
 evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set More...
 
virtual void evaluate_notranspose (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, ValueMatrix &d2logdet) override
 evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles More...
 
- Public Member Functions inherited from BsplineSet
 BsplineSet (const std::string &my_name)
 
auto & getHalfG () const
 
void init_base (int n)
 
int remap_kpoints ()
 remap kpoints to group general kpoints & special kpoints More...
 
void setOrbitalSetSize (int norbs) override
 set the OrbitalSetSize More...
 
void mw_evaluate_notranspose (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int first, int last, const RefVector< ValueMatrix > &logdet_list, const RefVector< GradMatrix > &dlogdet_list, const RefVector< ValueMatrix > &d2logdet_list) const override
 
void evaluate_notranspose (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, HessMatrix &grad_grad_logdet) override
 evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles More...
 
void evaluate_notranspose (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, HessMatrix &grad_grad_logdet, GGGMatrix &grad_grad_grad_logdet) override
 evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles More...
 
void evaluateGradSource (const ParticleSet &P, int first, int last, const ParticleSet &source, int iat_src, GradMatrix &gradphi) override
 evaluate the gradients of this single-particle orbital for [first,last) target particles with respect to the given source particle More...
 
void evaluateGradSource (const ParticleSet &P, int first, int last, const ParticleSet &source, int iat_src, GradMatrix &grad_phi, HessMatrix &grad_grad_phi, GradMatrix &grad_lapl_phi) override
 evaluate the gradients of values, gradients, laplacians of this single-particle orbital for [first,last) target particles with respect to the given source particle More...
 
virtual void evaluateDetRatios (const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios)
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP More...
 
virtual void evaluateValue (const ParticleSet &P, int iat, ValueVector &psi)=0
 evaluate the values of this single-particle orbital set More...
 
virtual void evaluateVGH (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi)
 evaluate the values, gradients and hessians of this single-particle orbital set More...
 
virtual void evaluateVGHGH (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi)
 evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set More...
 
virtual void evaluateVGL (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)=0
 evaluate the values, gradients and laplacians of this single-particle orbital set More...
 
virtual void finalizeConstruction ()
 finalize the construction of SPOSet More...
 
virtual void mw_evaluateDetRatios (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType * > &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers More...
 
virtual void mw_evaluateVGL (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
 evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLandDetRatioGrads (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType * > &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const
 evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers. More...
 
virtual void acquireResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const
 acquire a shared resource from collection More...
 
virtual void createResource (ResourceCollection &collection) const
 initialize a shared resource and hand it to collection More...
 
virtual void releaseResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const
 return a shared resource to collection More...
 
- Public Member Functions inherited from SPOSet
 SPOSet (const std::string &my_name)
 constructor More...
 
virtual ~SPOSet ()=default
 destructor More...
 
int size () const
 return the size of the orbital set Ye: this needs to be replaced by getOrbitalSetSize(); More...
 
void basic_report (const std::string &pad="") const
 print basic SPOSet information More...
 
virtual void report (const std::string &pad="") const
 print SPOSet information More...
 
int getOrbitalSetSize () const
 return the size of the orbitals More...
 
virtual bool isOptimizable () const
 Query if this SPOSet is optimizable. More...
 
virtual void extractOptimizableObjectRefs (UniqueOptObjRefs &opt_obj_refs)
 extract underlying OptimizableObject references More...
 
virtual void checkOutVariables (const opt_variables_type &active)
 check out variational optimizable variables More...
 
virtual bool hasIonDerivs () const
 Query if this SPOSet has an explicit ion dependence. More...
 
virtual void checkObject () const
 check a few key parameters before putting the SPO into a determinant More...
 
virtual bool isRotationSupported () const
 return true if this SPOSet can be wrappered by RotatedSPO More...
 
virtual void storeParamsBeforeRotation ()
 store parameters before getting destroyed by rotation. More...
 
virtual void applyRotation (const ValueMatrix &rot_mat, bool use_stored_copy=false)
 apply rotation to all the orbitals More...
 
virtual void evaluateDerivatives (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, Vector< ValueType > &dhpsioverpsi, const int &FirstIndex, const int &LastIndex)
 Parameter derivatives of the wavefunction and the Laplacian of the wavefunction. More...
 
virtual void evaluateDerivativesWF (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, int FirstIndex, int LastIndex)
 Parameter derivatives of the wavefunction. More...
 
virtual void evaluateDerivatives (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, Vector< ValueType > &dhpsioverpsi, const ValueType &psiCurrent, const std::vector< ValueType > &Coeff, const std::vector< size_t > &C2node_up, const std::vector< size_t > &C2node_dn, const ValueVector &detValues_up, const ValueVector &detValues_dn, const GradMatrix &grads_up, const GradMatrix &grads_dn, const ValueMatrix &lapls_up, const ValueMatrix &lapls_dn, const ValueMatrix &M_up, const ValueMatrix &M_dn, const ValueMatrix &Minv_up, const ValueMatrix &Minv_dn, const GradMatrix &B_grad, const ValueMatrix &B_lapl, const std::vector< int > &detData_up, const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, const std::vector< std::vector< int >> &lookup_tbl)
 Evaluate the derivative of the optimized orbitals with respect to the parameters this is used only for MSD, to be refined for better serving both single and multi SD. More...
 
virtual void evaluateDerivativesWF (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, const QTFull::ValueType &psiCurrent, const std::vector< ValueType > &Coeff, const std::vector< size_t > &C2node_up, const std::vector< size_t > &C2node_dn, const ValueVector &detValues_up, const ValueVector &detValues_dn, const ValueMatrix &M_up, const ValueMatrix &M_dn, const ValueMatrix &Minv_up, const ValueMatrix &Minv_dn, const std::vector< int > &detData_up, const std::vector< std::vector< int >> &lookup_tbl)
 Evaluate the derivative of the optimized orbitals with respect to the parameters this is used only for MSD, to be refined for better serving both single and multi SD. More...
 
virtual void evaluateDetSpinorRatios (const VirtualParticleSet &VP, ValueVector &psi, const std::pair< ValueVector, ValueVector > &spinor_multiplier, const ValueVector &invrow, std::vector< ValueType > &ratios)
 evaluate determinant ratios for virtual moves, specifically for Spinor SPOSets More...
 
virtual void evaluateDerivRatios (const VirtualParticleSet &VP, const opt_variables_type &optvars, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios, Matrix< ValueType > &dratios, int FirstIndex, int LastIndex)
 Determinant ratios and parameter derivatives of the wavefunction for virtual moves. More...
 
virtual void evaluateVGL_spin (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi, ValueVector &dspin)
 evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set More...
 
virtual void mw_evaluateValue (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list) const
 evaluate the values this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLWithSpin (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list, OffloadMatrix< ComplexType > &mw_dspin) const
 evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLandDetRatioGradsWithSpin (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads, std::vector< ValueType > &spingrads) const
 evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers. More...
 
virtual void evaluate_spin (const ParticleSet &P, int iat, ValueVector &psi, ValueVector &dpsi)
 evaluate the values of this single-particle orbital set More...
 
virtual void evaluateThirdDeriv (const ParticleSet &P, int first, int last, GGGMatrix &grad_grad_grad_logdet)
 evaluate the third derivatives of this single-particle orbital set More...
 
virtual void evaluate_notranspose_spin (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, ValueMatrix &d2logdet, ValueMatrix &dspinlogdet)
 evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient More...
 
virtual void evaluateGradSourceRow (const ParticleSet &P, int iel, const ParticleSet &source, int iat_src, GradVector &gradphi)
 Returns a row of d/dR_iat phi_j(r) evaluated at position r. More...
 
virtual PosType get_k (int orb)
 access the k point related to the given orbital More...
 
virtual bool transformSPOSet ()
 Used only by cusp correction in AOS LCAO. More...
 
const std::string & getName () const
 return object name More...
 

Protected Attributes

vContainer_type myV
 intermediate result vectors More...
 
vContainer_type myL
 
gContainer_type myG
 
hContainer_type myH
 
ghContainer_type mygH
 
- Protected Attributes inherited from BsplineSet
size_t MyIndex
 Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases. More...
 
size_t first_spo
 first index of the SPOs this Spline handles More...
 
size_t last_spo
 last index of the SPOs this Spline handles More...
 
TinyVector< int, DHalfG
 sign bits at the G/2 boundaries More...
 
std::vector< bool > MakeTwoCopies
 flags to unpack sin/cos More...
 
std::vector< SPOSet::PosTypekPoints
 kpoints for each unique orbitals. More...
 
aligned_vector< int > BandIndexMap
 remap splines to orbitals More...
 
std::vector< int > offset
 band offsets used for communication More...
 
- Protected Attributes inherited from SPOSet
const std::string my_name_
 name of the object, unique identifier More...
 
IndexType OrbitalSetSize
 number of Single-particle orbitals More...
 
opt_variables_type myVars
 Optimizable variables. More...
 

Private Member Functions

void evaluateVGLMultiPos (const Vector< ST, OffloadPinnedAllocator< ST >> &multi_pos_copy, Vector< ST, OffloadPinnedAllocator< ST >> &offload_scratch, Vector< ComplexT, OffloadPinnedAllocator< ComplexT >> &results_scratch, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
 

Private Attributes

NewTimeroffload_timer_
 timer for offload portion More...
 
CrystalLattice< ST, 3 > PrimLattice
 primitive cell More...
 
Tensor< ST, 3 > GGt
 $GGt=G^t G $, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian More...
 
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
 multi bspline set More...
 
std::shared_ptr< OffloadVector< ST > > mKK
 
std::shared_ptr< OffloadPosVector< ST > > myKcart
 
std::shared_ptr< OffloadVector< ST > > GGt_offload
 
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
 
ResourceHandle< SplineOMPTargetMultiWalkerMem< ST, ComplexT > > mw_mem_handle_
 
Matrix< ComplexT, OffloadPinnedAllocator< ComplexT > > ratios_private
 team private ratios for reduction, numVP x numTeams More...
 
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
 offload scratch space, dynamically resized to the maximal need More...
 
Vector< ComplexT, OffloadPinnedAllocator< ComplexT > > results_scratch
 result scratch space, dynamically resized to the maximal need More...
 
Vector< ComplexT, OffloadPinnedAllocator< ComplexT > > psiinv_pos_copy
 psiinv and position scratch space, used to avoid allocation on the fly and faster transfer More...
 
Vector< ST, OffloadPinnedAllocator< ST > > multi_pos_copy
 position scratch space, used to avoid allocation on the fly and faster transfer More...
 

Friends

template<class BSPLINESPO >
class SplineSetReader
 
struct BsplineReader
 

Additional Inherited Members

- Static Protected Attributes inherited from BsplineSet
static const int D = DIM
 

Detailed Description

template<typename ST>
class qmcplusplus::SplineC2COMPTarget< ST >

class to match std::complex<ST> spline with BsplineSet::ValueType (complex) SPOs with OpenMP offload

Template Parameters
STprecision of spline

Requires temporage storage and multiplication of phase vectors The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. All the output orbitals are complex.

Definition at line 41 of file SplineC2COMPTarget.h.

Member Typedef Documentation

◆ BCType

using BCType = typename bspline_traits<ST, 3>::BCType

Definition at line 45 of file SplineC2COMPTarget.h.

◆ ComplexT

using ComplexT = typename BsplineSet::ValueType

Definition at line 50 of file SplineC2COMPTarget.h.

◆ DataType

using DataType = ST

Definition at line 46 of file SplineC2COMPTarget.h.

◆ gContainer_type

Definition at line 57 of file SplineC2COMPTarget.h.

◆ ghContainer_type

Definition at line 59 of file SplineC2COMPTarget.h.

◆ hContainer_type

Definition at line 58 of file SplineC2COMPTarget.h.

◆ OffloadPosVector

Definition at line 64 of file SplineC2COMPTarget.h.

◆ OffloadVector

Definition at line 62 of file SplineC2COMPTarget.h.

◆ PointType

using PointType = TinyVector<ST, 3>

Definition at line 47 of file SplineC2COMPTarget.h.

◆ SingleSplineType

using SingleSplineType = UBspline_3d_d

Definition at line 48 of file SplineC2COMPTarget.h.

◆ SplineType

using SplineType = typename bspline_traits<ST, 3>::SplineType

Definition at line 44 of file SplineC2COMPTarget.h.

◆ vContainer_type

Definition at line 56 of file SplineC2COMPTarget.h.

Constructor & Destructor Documentation

◆ SplineC2COMPTarget() [1/2]

SplineC2COMPTarget ( const std::string &  my_name)
inline

Definition at line 110 of file SplineC2COMPTarget.h.

111  : BsplineSet(my_name),
112  offload_timer_(createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)),
113  GGt_offload(std::make_shared<OffloadVector<ST>>(9)),
114  PrimLattice_G_offload(std::make_shared<OffloadVector<ST>>(9))
115  {}
BsplineSet(const std::string &my_name)
Definition: BsplineSet.h:59
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
NewTimer & offload_timer_
timer for offload portion

◆ SplineC2COMPTarget() [2/2]

SplineC2COMPTarget ( const SplineC2COMPTarget< ST > &  in)
default

Member Function Documentation

◆ acquireResource()

void acquireResource ( ResourceCollection collection,
const RefVectorWithLeader< SPOSet > &  spo_list 
) const
inlineoverridevirtual

acquire a shared resource from collection

Reimplemented from SPOSet.

Definition at line 129 of file SplineC2COMPTarget.h.

References RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), ResourceCollection::lendResource(), and SplineC2COMPTarget< ST >::mw_mem_handle_.

130  {
131  assert(this == &spo_list.getLeader());
132  auto& phi_leader = spo_list.getCastedLeader<SplineC2COMPTarget<ST>>();
133  phi_leader.mw_mem_handle_ = collection.lendResource<SplineOMPTargetMultiWalkerMem<ST, ComplexT>>();
134  }

◆ assign_v()

void assign_v ( const PointType r,
const vContainer_type myV,
ValueVector psi,
int  first,
int  last 
) const
inline

Definition at line 56 of file SplineC2COMPTarget.cpp.

References TinyVector< T, D >::data(), omptarget::min(), qmcplusplus::Units::time::s, and qmcplusplus::sincos().

61 {
62  // protect last
63  const size_t last_cplx = std::min(kPoints.size(), psi.size());
64  last = last > last_cplx ? last_cplx : last;
65 
66  const ST x = r[0], y = r[1], z = r[2];
67  const ST* restrict kx = myKcart->data(0);
68  const ST* restrict ky = myKcart->data(1);
69  const ST* restrict kz = myKcart->data(2);
70 #pragma omp simd
71  for (size_t j = first; j < last; ++j)
72  {
73  ST s, c;
74  const ST val_r = myV[2 * j];
75  const ST val_i = myV[2 * j + 1];
76  omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
77  psi[j + first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s);
78  }
79 }
T min(T a, T b)
typename BsplineSet::ValueType ComplexT
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ assign_vgh()

void assign_vgh ( const PointType r,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi,
int  first,
int  last 
) const

Definition at line 771 of file SplineC2COMPTarget.cpp.

References TinyVector< T, D >::data(), omptarget::min(), qmcplusplus::Units::time::s, qmcplusplus::sincos(), and qmcplusplus::v_m_v().

777 {
778  // protect last
779  const size_t last_cplx = std::min(kPoints.size(), psi.size());
780  last = last > last_cplx ? last_cplx : last;
781 
782  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
783  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
784  g22 = PrimLattice.G(8);
785  const ST x = r[0], y = r[1], z = r[2];
786 
787  const ST* restrict k0 = myKcart->data(0);
788  const ST* restrict k1 = myKcart->data(1);
789  const ST* restrict k2 = myKcart->data(2);
790 
791  const ST* restrict g0 = myG.data(0);
792  const ST* restrict g1 = myG.data(1);
793  const ST* restrict g2 = myG.data(2);
794  const ST* restrict h00 = myH.data(0);
795  const ST* restrict h01 = myH.data(1);
796  const ST* restrict h02 = myH.data(2);
797  const ST* restrict h11 = myH.data(3);
798  const ST* restrict h12 = myH.data(4);
799  const ST* restrict h22 = myH.data(5);
800 
801 #pragma omp simd
802  for (size_t j = first; j < last; ++j)
803  {
804  int jr = j << 1;
805  int ji = jr + 1;
806 
807  const ST kX = k0[j];
808  const ST kY = k1[j];
809  const ST kZ = k2[j];
810  const ST val_r = myV[jr];
811  const ST val_i = myV[ji];
812 
813  //phase
814  ST s, c;
815  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
816 
817  //dot(PrimLattice.G,myG[j])
818  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
819  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
820  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
821 
822  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
823  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
824  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
825 
826  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
827  const ST gX_r = dX_r + val_i * kX;
828  const ST gY_r = dY_r + val_i * kY;
829  const ST gZ_r = dZ_r + val_i * kZ;
830  const ST gX_i = dX_i - val_r * kX;
831  const ST gY_i = dY_i - val_r * kY;
832  const ST gZ_i = dZ_i - val_r * kZ;
833 
834  const size_t psiIndex = j + first_spo;
835  psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
836  dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
837  dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
838  dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
839 
840  const ST h_xx_r =
841  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
842  const ST h_xy_r =
843  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
844  const ST h_xz_r =
845  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
846  const ST h_yx_r =
847  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
848  const ST h_yy_r =
849  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
850  const ST h_yz_r =
851  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
852  const ST h_zx_r =
853  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
854  const ST h_zy_r =
855  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
856  const ST h_zz_r =
857  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
858 
859  const ST h_xx_i =
860  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
861  const ST h_xy_i =
862  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
863  const ST h_xz_i =
864  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
865  const ST h_yx_i =
866  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
867  const ST h_yy_i =
868  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
869  const ST h_yz_i =
870  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
871  const ST h_zx_i =
872  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
873  const ST h_zy_i =
874  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
875  const ST h_zz_i =
876  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
877 
878  grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
879  grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
880  grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
881  grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r);
882  grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
883  grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
884  grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r);
885  grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r);
886  grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
887  }
888 }
T v_m_v(T h00, T h01, T h02, T h11, T h12, T h22, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z)
compute vector[3]^T x matrix[3][3] x vector[3]
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
T min(T a, T b)
typename BsplineSet::ValueType ComplexT
CrystalLattice< ST, 3 > PrimLattice
primitive cell
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ assign_vghgh()

void assign_vghgh ( const PointType r,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi,
GGGVector grad_grad_grad_psi,
int  first = 0,
int  last = -1 
) const

Definition at line 912 of file SplineC2COMPTarget.cpp.

References TinyVector< T, D >::data(), omptarget::min(), qmcplusplus::Units::time::s, qmcplusplus::sincos(), qmcplusplus::t3_contract(), and qmcplusplus::v_m_v().

919 {
920  // protect last
921  const size_t last_cplx = std::min(kPoints.size(), psi.size());
922  last = last < 0 ? last_cplx : (last > last_cplx ? last_cplx : last);
923 
924  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
925  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
926  g22 = PrimLattice.G(8);
927  const ST x = r[0], y = r[1], z = r[2];
928 
929  const ST* restrict k0 = myKcart->data(0);
930  const ST* restrict k1 = myKcart->data(1);
931  const ST* restrict k2 = myKcart->data(2);
932 
933  const ST* restrict g0 = myG.data(0);
934  const ST* restrict g1 = myG.data(1);
935  const ST* restrict g2 = myG.data(2);
936  const ST* restrict h00 = myH.data(0);
937  const ST* restrict h01 = myH.data(1);
938  const ST* restrict h02 = myH.data(2);
939  const ST* restrict h11 = myH.data(3);
940  const ST* restrict h12 = myH.data(4);
941  const ST* restrict h22 = myH.data(5);
942 
943  const ST* restrict gh000 = mygH.data(0);
944  const ST* restrict gh001 = mygH.data(1);
945  const ST* restrict gh002 = mygH.data(2);
946  const ST* restrict gh011 = mygH.data(3);
947  const ST* restrict gh012 = mygH.data(4);
948  const ST* restrict gh022 = mygH.data(5);
949  const ST* restrict gh111 = mygH.data(6);
950  const ST* restrict gh112 = mygH.data(7);
951  const ST* restrict gh122 = mygH.data(8);
952  const ST* restrict gh222 = mygH.data(9);
953 
954 //SIMD doesn't work quite right yet. Comment out until further debugging.
955 #pragma omp simd
956  for (size_t j = first; j < last; ++j)
957  {
958  int jr = j << 1;
959  int ji = jr + 1;
960 
961  const ST kX = k0[j];
962  const ST kY = k1[j];
963  const ST kZ = k2[j];
964  const ST val_r = myV[jr];
965  const ST val_i = myV[ji];
966 
967  //phase
968  ST s, c;
969  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
970 
971  //dot(PrimLattice.G,myG[j])
972  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
973  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
974  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
975 
976  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
977  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
978  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
979 
980  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
981  const ST gX_r = dX_r + val_i * kX;
982  const ST gY_r = dY_r + val_i * kY;
983  const ST gZ_r = dZ_r + val_i * kZ;
984  const ST gX_i = dX_i - val_r * kX;
985  const ST gY_i = dY_i - val_r * kY;
986  const ST gZ_i = dZ_i - val_r * kZ;
987 
988  const size_t psiIndex = j + first_spo;
989  psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
990  dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
991  dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
992  dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
993 
994  //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
995  const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
996  const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
997  const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
998  const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
999  const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
1000  const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
1001 
1002  const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
1003  const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
1004  const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
1005  const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
1006  const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
1007  const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
1008 
1009  const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
1010  const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
1011  const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
1012  const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
1013  const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
1014  const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
1015 
1016  const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
1017  const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
1018  const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
1019  const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
1020  const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
1021  const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
1022 
1023  grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
1024  grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
1025  grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
1026  grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
1027  grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
1028  grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
1029  grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
1030  grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
1031  grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
1032 
1033  //These are the real and imaginary components of the third SPO derivative. _xxx denotes
1034  // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
1035 
1036  const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1037  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1038  const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1039  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1040  const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1041  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1042  const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1043  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1044  const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1045  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1046  const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1047  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1048  const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1049  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1050  const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1051  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1052  const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1053  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1054  const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1055  gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1056 
1057  const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1058  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1059  const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1060  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1061  const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1062  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1063  const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1064  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1065  const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1066  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1067  const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1068  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1069  const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1070  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1071  const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1072  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1073  const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1074  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1075  const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1076  gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1077 
1078  //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
1079  const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
1080  const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
1081  const ST gh_xxy_r =
1082  f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
1083  const ST gh_xxy_i =
1084  f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
1085  const ST gh_xxz_r =
1086  f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
1087  const ST gh_xxz_i =
1088  f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
1089  const ST gh_xyy_r =
1090  f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
1091  const ST gh_xyy_i =
1092  f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
1093  const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
1094  (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
1095  const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
1096  (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
1097  const ST gh_xzz_r =
1098  f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
1099  const ST gh_xzz_i =
1100  f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
1101  const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
1102  const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
1103  const ST gh_yyz_r =
1104  f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
1105  const ST gh_yyz_i =
1106  f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
1107  const ST gh_yzz_r =
1108  f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
1109  const ST gh_yzz_i =
1110  f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
1111  const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
1112  const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
1113 
1114  grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r);
1115  grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
1116  grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
1117  grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
1118  grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
1119  grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
1120  grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
1121  grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
1122  grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
1123 
1124  grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
1125  grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
1126  grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
1127  grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
1128  grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r);
1129  grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
1130  grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
1131  grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
1132  grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
1133 
1134 
1135  grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
1136  grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
1137  grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
1138  grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
1139  grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
1140  grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
1141  grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
1142  grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
1143  grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r);
1144  }
1145 }
T v_m_v(T h00, T h01, T h02, T h11, T h12, T h22, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z)
compute vector[3]^T x matrix[3][3] x vector[3]
T t3_contract(T h000, T h001, T h002, T h011, T h012, T h022, T h111, T h112, T h122, T h222, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z, T g3x, T g3y, T g3z)
Coordinate transform for a 3rd rank symmetric tensor representing coordinate derivatives (hence t3_co...
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
T min(T a, T b)
typename BsplineSet::ValueType ComplexT
CrystalLattice< ST, 3 > PrimLattice
primitive cell
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ assign_vgl_from_l()

void assign_vgl_from_l ( const PointType r,
ValueVector psi,
GradVector dpsi,
ValueVector d2psi 
)
inline

assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian

Definition at line 320 of file SplineC2COMPTarget.cpp.

References TinyVector< T, D >::data(), qmcplusplus::Units::force::N, qmcplusplus::Units::time::s, and qmcplusplus::sincos().

324 {
325  constexpr ST two(2);
326  const ST x = r[0], y = r[1], z = r[2];
327 
328  const ST* restrict k0 = myKcart->data(0);
329  const ST* restrict k1 = myKcart->data(1);
330  const ST* restrict k2 = myKcart->data(2);
331 
332  const ST* restrict g0 = myG.data(0);
333  const ST* restrict g1 = myG.data(1);
334  const ST* restrict g2 = myG.data(2);
335 
336  const size_t last_cplx = last_spo > psi.size() ? psi.size() : last_spo;
337  const size_t N = last_cplx - first_spo;
338 #pragma omp simd
339  for (size_t j = 0; j < N; ++j)
340  {
341  const size_t jr = j << 1;
342  const size_t ji = jr + 1;
343 
344  const ST kX = k0[j];
345  const ST kY = k1[j];
346  const ST kZ = k2[j];
347  const ST val_r = myV[jr];
348  const ST val_i = myV[ji];
349 
350  //phase
351  ST s, c;
352  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
353 
354  //dot(PrimLattice.G,myG[j])
355  const ST dX_r = g0[jr];
356  const ST dY_r = g1[jr];
357  const ST dZ_r = g2[jr];
358 
359  const ST dX_i = g0[ji];
360  const ST dY_i = g1[ji];
361  const ST dZ_i = g2[ji];
362 
363  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
364  const ST gX_r = dX_r + val_i * kX;
365  const ST gY_r = dY_r + val_i * kY;
366  const ST gZ_r = dZ_r + val_i * kZ;
367  const ST gX_i = dX_i - val_r * kX;
368  const ST gY_i = dY_i - val_r * kY;
369  const ST gZ_i = dZ_i - val_r * kZ;
370 
371  const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
372  const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
373 
374  const size_t psiIndex = j + first_spo;
375  psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
376  dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
377  dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
378  dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
379  d2psi[psiIndex] = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
380  }
381 }
typename BsplineSet::ValueType ComplexT
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
size_t last_spo
last index of the SPOs this Spline handles
Definition: BsplineSet.h:43

◆ bcast_tables()

void bcast_tables ( Communicate comm)
inline

Definition at line 156 of file SplineC2COMPTarget.h.

References qmcplusplus::comm, and SplineC2COMPTarget< ST >::SplineInst.

156 { chunked_bcast(comm, SplineInst->getSplinePtr()); }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ create_spline()

void create_spline ( GT &  xyz_g,
BCT &  xyz_bc 
)
inline

Definition at line 173 of file SplineC2COMPTarget.h.

References qmcplusplus::app_log(), SplineC2COMPTarget< ST >::myV, SplineC2COMPTarget< ST >::resize_kpoints(), Vector< T, Alloc >::size(), and SplineC2COMPTarget< ST >::SplineInst.

174  {
175  resize_kpoints();
176  SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>();
177  SplineInst->create(xyz_g, xyz_bc, myV.size());
178 
179  app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
180  << "for the coefficients in 3D spline orbital representation" << std::endl;
181  }
std::ostream & app_log()
Definition: OutputManager.h:65
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
size_type size() const
return the current size
Definition: OhmmsVector.h:162
vContainer_type myV
intermediate result vectors
void resize_kpoints()
remap kPoints to pack the double copy

◆ createResource()

void createResource ( ResourceCollection collection) const
inlineoverridevirtual

initialize a shared resource and hand it to collection

Reimplemented from SPOSet.

Definition at line 124 of file SplineC2COMPTarget.h.

References ResourceCollection::addResource().

125  {
126  auto resource_index = collection.addResource(std::make_unique<SplineOMPTargetMultiWalkerMem<ST, ComplexT>>());
127  }

◆ evaluate_notranspose()

void evaluate_notranspose ( const ParticleSet P,
int  first,
int  last,
ValueMatrix logdet,
GradMatrix dlogdet,
ValueMatrix d2logdet 
)
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles

Parameters
[in]Pcurrent ParticleSet
[in]firststarting index of the particles
[in]lastending index of the particles
[out]logdetdeterminant matrix to be inverted
[out]dlogdetgradients
[out]d2logdetlaplacians

Reimplemented from BsplineSet.

Definition at line 1168 of file SplineC2COMPTarget.cpp.

References ParticleSet::activeR(), and omptarget::min().

1174 {
1175  // chunk the [first, last) loop into blocks to save temporary memory usage
1176  const int block_size = 16;
1177 
1178  // reference vectors refer to the rows of matrices
1179  std::vector<ValueVector> multi_psi_v;
1180  std::vector<GradVector> multi_dpsi_v;
1181  std::vector<ValueVector> multi_d2psi_v;
1182  RefVector<ValueVector> psi_v_list;
1183  RefVector<GradVector> dpsi_v_list;
1184  RefVector<ValueVector> d2psi_v_list;
1185 
1186  multi_psi_v.reserve(block_size);
1187  multi_dpsi_v.reserve(block_size);
1188  multi_d2psi_v.reserve(block_size);
1189  psi_v_list.reserve(block_size);
1190  dpsi_v_list.reserve(block_size);
1191  d2psi_v_list.reserve(block_size);
1192 
1193  for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size)
1194  {
1195  const int actual_block_size = std::min(last - iat, block_size);
1196  multi_pos_copy.resize(actual_block_size * 6);
1197  multi_psi_v.clear();
1198  multi_dpsi_v.clear();
1199  multi_d2psi_v.clear();
1200  psi_v_list.clear();
1201  dpsi_v_list.clear();
1202  d2psi_v_list.clear();
1203 
1204  for (int ipos = 0; ipos < actual_block_size; ++ipos)
1205  {
1206  // pack particle positions
1207  const PointType& r = P.activeR(iat + ipos);
1209  multi_pos_copy[ipos * 6] = r[0];
1210  multi_pos_copy[ipos * 6 + 1] = r[1];
1211  multi_pos_copy[ipos * 6 + 2] = r[2];
1212  multi_pos_copy[ipos * 6 + 3] = ru[0];
1213  multi_pos_copy[ipos * 6 + 4] = ru[1];
1214  multi_pos_copy[ipos * 6 + 5] = ru[2];
1215 
1216  multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols());
1217  multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols());
1218  multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols());
1219 
1220  psi_v_list.push_back(multi_psi_v[ipos]);
1221  dpsi_v_list.push_back(multi_dpsi_v[ipos]);
1222  d2psi_v_list.push_back(multi_d2psi_v[ipos]);
1223  }
1224 
1225  evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list);
1226  }
1227 }
T min(T a, T b)
void evaluateVGLMultiPos(const Vector< ST, OffloadPinnedAllocator< ST >> &multi_pos_copy, Vector< ST, OffloadPinnedAllocator< ST >> &offload_scratch, Vector< ComplexT, OffloadPinnedAllocator< ComplexT >> &results_scratch, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
CrystalLattice< ST, 3 > PrimLattice
primitive cell
Vector< ST, OffloadPinnedAllocator< ST > > multi_pos_copy
position scratch space, used to avoid allocation on the fly and faster transfer
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
Vector< ComplexT, OffloadPinnedAllocator< ComplexT > > results_scratch
result scratch space, dynamically resized to the maximal need

◆ evaluateDetRatios()

void evaluateDetRatios ( const VirtualParticleSet VP,
ValueVector psi,
const ValueVector psiinv,
std::vector< ValueType > &  ratios 
)
overridevirtual

evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP

Parameters
VPvirtual particle set
psivalues of the SPO, used as a scratch space if needed
psiinvthe row of inverse slater matrix corresponding to the particle moved virtually
ratiosreturn determinant ratios

Reimplemented from SPOSet.

Definition at line 99 of file SplineC2COMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2C::assign_v(), qmcplusplus::syclBLAS::copy_n(), VirtualParticleSet::getTotalNum(), omptarget::min(), and TinyVector< T, D >::size().

103 {
104  const int nVP = VP.getTotalNum();
105  psiinv_pos_copy.resize(psiinv.size() + nVP * 3);
106 
107  // stage psiinv to psiinv_pos_copy
108  std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data());
109 
110  // pack particle positions
111  auto* restrict pos_scratch = reinterpret_cast<RealType*>(psiinv_pos_copy.data() + psiinv.size());
112  for (int iat = 0; iat < nVP; ++iat)
113  {
114  const PointType& r = VP.activeR(iat);
116  pos_scratch[iat * 6] = r[0];
117  pos_scratch[iat * 6 + 1] = r[1];
118  pos_scratch[iat * 6 + 2] = r[2];
119  pos_scratch[iat * 6 + 3] = ru[0];
120  pos_scratch[iat * 6 + 4] = ru[1];
121  pos_scratch[iat * 6 + 5] = ru[2];
122  }
123 
124  const size_t ChunkSizePerTeam = 512;
125  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
126  ratios_private.resize(nVP, NumTeams);
127  const auto spline_padded_size = myV.size();
128  const auto sposet_padded_size = getAlignedSize<ValueType>(OrbitalSetSize);
129  offload_scratch.resize(spline_padded_size * nVP);
130  results_scratch.resize(sposet_padded_size * nVP);
131 
132  // Ye: need to extract sizes and pointers before entering target region
133  const auto* spline_ptr = SplineInst->getSplinePtr();
134  auto* offload_scratch_ptr = offload_scratch.data();
135  auto* results_scratch_ptr = results_scratch.data();
136  const auto myKcart_padded_size = myKcart->capacity();
137  auto* myKcart_ptr = myKcart->data();
138  auto* psiinv_ptr = psiinv_pos_copy.data();
139  auto* ratios_private_ptr = ratios_private.data();
140  const size_t first_spo_local = first_spo;
141  const auto orb_size = psiinv.size();
142 
143  {
144  ScopedTimer offload(offload_timer_);
145  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \
146  map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \
147  map(always, from: ratios_private_ptr[0:NumTeams*nVP])")
148  for (int iat = 0; iat < nVP; iat++)
149  for (int team_id = 0; team_id < NumTeams; team_id++)
150  {
151  const size_t first = ChunkSizePerTeam * team_id;
152  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
153 
154  auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat;
155  auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat;
156  auto* restrict pos_scratch = reinterpret_cast<RealType*>(psiinv_ptr + orb_size);
157 
158  int ix, iy, iz;
159  ST a[4], b[4], c[4];
160  spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
161  ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
162 
163  PRAGMA_OFFLOAD("omp parallel for")
164  for (int index = 0; index < last - first; index++)
165  spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
166  offload_scratch_iat_ptr + first + index);
167  const size_t first_cplx = first / 2;
168  const size_t last_cplx = omptarget::min(last / 2, orb_size);
169  PRAGMA_OFFLOAD("omp parallel for")
170  for (int index = first_cplx; index < last_cplx; index++)
171  C2C::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]),
172  psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index);
173 
174  ComplexT sum(0);
175  PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
176  for (int i = first_cplx; i < last_cplx; i++)
177  sum += psi_iat_ptr[i] * psiinv_ptr[i];
178  ratios_private_ptr[iat * NumTeams + team_id] = sum;
179  }
180  }
181 
182  // do the reduction manually
183  for (int iat = 0; iat < nVP; ++iat)
184  {
185  ratios[iat] = ComplexT(0);
186  for (int tid = 0; tid < NumTeams; tid++)
187  ratios[iat] += ratios_private[iat][tid];
188  }
189 }
Matrix< ComplexT, OffloadPinnedAllocator< ComplexT > > ratios_private
team private ratios for reduction, numVP x numTeams
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
size_type size() const
return the current size
Definition: OhmmsVector.h:162
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
Vector< ComplexT, OffloadPinnedAllocator< ComplexT > > psiinv_pos_copy
psiinv and position scratch space, used to avoid allocation on the fly and faster transfer ...
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
typename BsplineSet::ValueType ComplexT
CrystalLattice< ST, 3 > PrimLattice
primitive cell
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
vContainer_type myV
intermediate result vectors
QMCTraits::RealType RealType
std::shared_ptr< OffloadPosVector< ST > > myKcart
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
NewTimer & offload_timer_
timer for offload portion
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
Vector< ComplexT, OffloadPinnedAllocator< ComplexT > > results_scratch
result scratch space, dynamically resized to the maximal need

◆ evaluateValue()

void evaluateValue ( const ParticleSet P,
const int  iat,
ValueVector psi 
)
overridevirtual

evaluate the values of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO

Implements SPOSet.

Definition at line 82 of file SplineC2COMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2C::assign_v(), FairDivideAligned(), omp_get_num_threads(), and omp_get_thread_num().

83 {
84  const PointType& r = P.activeR(iat);
86 
87 #pragma omp parallel
88  {
89  int first, last;
90  // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
91  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
92 
93  spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
94  assign_v(r, myV, psi, first / 2, last / 2);
95  }
96 }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
CrystalLattice< ST, 3 > PrimLattice
primitive cell
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
vContainer_type myV
intermediate result vectors

◆ evaluateVGH()

void evaluateVGH ( const ParticleSet P,
const int  iat,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi 
)
overridevirtual

evaluate the values, gradients and hessians of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO
dpsigradients of the SPO
grad_grad_psihessians of the SPO

Reimplemented from SPOSet.

Definition at line 891 of file SplineC2COMPTarget.cpp.

References ParticleSet::activeR(), FairDivideAligned(), omp_get_num_threads(), and omp_get_thread_num().

896 {
897  const PointType& r = P.activeR(iat);
899 
900 #pragma omp parallel
901  {
902  int first, last;
903  // Factor of 2 because psi is complex and the spline storage and evaluation uses a real type
904  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
905 
906  spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
907  assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
908  }
909 }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
void assign_vgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
CrystalLattice< ST, 3 > PrimLattice
primitive cell
vContainer_type myV
intermediate result vectors

◆ evaluateVGHGH()

void evaluateVGHGH ( const ParticleSet P,
const int  iat,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi,
GGGVector grad_grad_grad_psi 
)
overridevirtual

evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO
dpsigradients of the SPO
grad_grad_psihessians of the SPO
grad_grad_grad_psigrad hessians of the SPO

Reimplemented from SPOSet.

Definition at line 1148 of file SplineC2COMPTarget.cpp.

References ParticleSet::activeR(), FairDivideAligned(), omp_get_num_threads(), and omp_get_thread_num().

1154 {
1155  const PointType& r = P.activeR(iat);
1157 #pragma omp parallel
1158  {
1159  int first, last;
1160  FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
1161 
1162  spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
1163  assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
1164  }
1165 }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
void assign_vghgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
CrystalLattice< ST, 3 > PrimLattice
primitive cell
vContainer_type myV
intermediate result vectors

◆ evaluateVGL()

void evaluateVGL ( const ParticleSet P,
const int  iat,
ValueVector psi,
GradVector dpsi,
ValueVector d2psi 
)
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO
dpsigradients of the SPO
d2psilaplacians of the SPO

Implements SPOSet.

Definition at line 384 of file SplineC2COMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2C::assign_vgl(), TinyVector< T, D >::data(), qmcplusplus::HESS00, qmcplusplus::HESS01, qmcplusplus::HESS02, qmcplusplus::HESS11, qmcplusplus::HESS12, qmcplusplus::HESS22, qmcplusplus::LAPL, omptarget::min(), qmcplusplus::NUM_FIELDS, TinyVector< T, D >::size(), and qmcplusplus::SymTrace().

389 {
390  const PointType& r = P.activeR(iat);
392 
393  const size_t ChunkSizePerTeam = 512;
394  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
395 
396  const auto spline_padded_size = myV.size();
397  const auto sposet_padded_size = getAlignedSize<ValueType>(OrbitalSetSize);
398  offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS);
399  // for V(1)G(3)L(1) final result
400  results_scratch.resize(sposet_padded_size * 5);
401 
402  // Ye: need to extract sizes and pointers before entering target region
403  const auto* spline_ptr = SplineInst->getSplinePtr();
404  auto* offload_scratch_ptr = offload_scratch.data();
405  auto* results_scratch_ptr = results_scratch.data();
406  const auto x = r[0], y = r[1], z = r[2];
407  const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
408  const auto myKcart_padded_size = myKcart->capacity();
409  auto* mKK_ptr = mKK->data();
410  auto* GGt_ptr = GGt_offload->data();
411  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
412  auto* myKcart_ptr = myKcart->data();
413  const size_t first_spo_local = first_spo;
414  const auto orb_size = psi.size();
415 
416  {
417  ScopedTimer offload(offload_timer_);
418  PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
419  map(always, from: results_scratch_ptr[0:sposet_padded_size*5])")
420  for (int team_id = 0; team_id < NumTeams; team_id++)
421  {
422  const size_t first = ChunkSizePerTeam * team_id;
423  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
424 
425  int ix, iy, iz;
426  ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
427  spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
428 
429  const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
430  PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
431  PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
432  const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
433  GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
434 
435  PRAGMA_OFFLOAD("omp parallel for")
436  for (int index = 0; index < last - first; index++)
437  {
438  spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
439  offload_scratch_ptr + first + index, spline_padded_size);
440  const int output_index = first + index;
441  offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
442  SymTrace(offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
443  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
444  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
445  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
446  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
447  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
448  }
449 
450  const size_t first_cplx = first / 2;
451  const size_t last_cplx = omptarget::min(last / 2, orb_size);
452  PRAGMA_OFFLOAD("omp parallel for")
453  for (int index = first_cplx; index < last_cplx; index++)
454  C2C::assign_vgl(x, y, z, results_scratch_ptr, sposet_padded_size, mKK_ptr, offload_scratch_ptr,
455  spline_padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, index);
456  }
457  }
458 
459  for (size_t i = 0; i < orb_size; i++)
460  {
461  psi[i] = results_scratch[i];
462  dpsi[i][0] = results_scratch[i + sposet_padded_size];
463  dpsi[i][1] = results_scratch[i + sposet_padded_size * 2];
464  dpsi[i][2] = results_scratch[i + sposet_padded_size * 3];
465  d2psi[i] = results_scratch[i + sposet_padded_size * 4];
466  }
467 }
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
size_type size() const
return the current size
Definition: OhmmsVector.h:162
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
std::shared_ptr< OffloadVector< ST > > GGt_offload
std::shared_ptr< OffloadVector< ST > > mKK
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
CrystalLattice< ST, 3 > PrimLattice
primitive cell
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
NewTimer & offload_timer_
timer for offload portion
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
Vector< ComplexT, OffloadPinnedAllocator< ComplexT > > results_scratch
result scratch space, dynamically resized to the maximal need

◆ evaluateVGLMultiPos()

void evaluateVGLMultiPos ( const Vector< ST, OffloadPinnedAllocator< ST >> &  multi_pos_copy,
Vector< ST, OffloadPinnedAllocator< ST >> &  offload_scratch,
Vector< ComplexT, OffloadPinnedAllocator< ComplexT >> &  results_scratch,
const RefVector< ValueVector > &  psi_v_list,
const RefVector< GradVector > &  dpsi_v_list,
const RefVector< ValueVector > &  d2psi_v_list 
) const
private

Definition at line 470 of file SplineC2COMPTarget.cpp.

References qmcplusplus::C2C::assign_vgl(), qmcplusplus::HESS00, qmcplusplus::HESS01, qmcplusplus::HESS02, qmcplusplus::HESS11, qmcplusplus::HESS12, qmcplusplus::HESS22, qmcplusplus::LAPL, omptarget::min(), qmcplusplus::NUM_FIELDS, and qmcplusplus::SymTrace().

476 {
477  const size_t num_pos = psi_v_list.size();
478  const size_t ChunkSizePerTeam = 512;
479  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
480  const auto spline_padded_size = myV.size();
481  const auto sposet_padded_size = getAlignedSize<ValueType>(OrbitalSetSize);
482  offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
483  const auto orb_size = psi_v_list[0].get().size();
484  // for V(1)G(3)L(1) final result
485  results_scratch.resize(sposet_padded_size * num_pos * 5);
486 
487  // Ye: need to extract sizes and pointers before entering target region
488  const auto* spline_ptr = SplineInst->getSplinePtr();
489  auto* pos_copy_ptr = multi_pos.data();
490  auto* offload_scratch_ptr = offload_scratch.data();
491  auto* results_scratch_ptr = results_scratch.data();
492  const auto myKcart_padded_size = myKcart->capacity();
493  auto* mKK_ptr = mKK->data();
494  auto* GGt_ptr = GGt_offload->data();
495  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
496  auto* myKcart_ptr = myKcart->data();
497  const size_t first_spo_local = first_spo;
498 
499  {
500  ScopedTimer offload(offload_timer_);
501  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
502  map(always, to: pos_copy_ptr[0:num_pos*6]) \
503  map(always, from: results_scratch_ptr[0:sposet_padded_size*num_pos*5])")
504  for (int iw = 0; iw < num_pos; iw++)
505  for (int team_id = 0; team_id < NumTeams; team_id++)
506  {
507  const size_t first = ChunkSizePerTeam * team_id;
508  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
509 
510  auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
511  auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5;
512 
513  int ix, iy, iz;
514  ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
515  spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4],
516  pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
517 
518  const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
519  PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
520  PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
521  const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
522  GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
523 
524  PRAGMA_OFFLOAD("omp parallel for")
525  for (int index = 0; index < last - first; index++)
526  {
527  spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
528  d2c, offload_scratch_iw_ptr + first + index, spline_padded_size);
529  const int output_index = first + index;
530  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
531  SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
532  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
533  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
534  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
535  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
536  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
537  }
538 
539  const size_t first_cplx = first / 2;
540  const size_t last_cplx = omptarget::min(last / 2, orb_size);
541  PRAGMA_OFFLOAD("omp parallel for")
542  for (int index = first_cplx; index < last_cplx; index++)
543  C2C::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr,
544  sposet_padded_size, mKK_ptr, offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr,
545  myKcart_padded_size, first_spo_local, index);
546  }
547  }
548 
549  for (int iw = 0; iw < num_pos; ++iw)
550  {
551  auto* restrict results_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5;
552  ValueVector& psi_v(psi_v_list[iw]);
553  GradVector& dpsi_v(dpsi_v_list[iw]);
554  ValueVector& d2psi_v(d2psi_v_list[iw]);
555  for (size_t i = 0; i < orb_size; i++)
556  {
557  psi_v[i] = results_iw_ptr[i];
558  dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size];
559  dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2];
560  dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3];
561  d2psi_v[i] = results_iw_ptr[i + sposet_padded_size * 4];
562  }
563  }
564 }
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
size_type size() const
return the current size
Definition: OhmmsVector.h:162
std::shared_ptr< OffloadVector< ST > > GGt_offload
std::shared_ptr< OffloadVector< ST > > mKK
OrbitalSetTraits< ValueType >::ValueVector ValueVector
Definition: SPOSet.h:49
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
OrbitalSetTraits< ValueType >::GradVector GradVector
Definition: SPOSet.h:51
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
NewTimer & offload_timer_
timer for offload portion
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
Vector< ComplexT, OffloadPinnedAllocator< ComplexT > > results_scratch
result scratch space, dynamically resized to the maximal need

◆ finalizeConstruction()

void finalizeConstruction ( )
inlineoverridevirtual

this routine can not be called from threaded region

Reimplemented from SPOSet.

Definition at line 184 of file SplineC2COMPTarget.h.

References CrystalLattice< T, D >::G, SplineC2COMPTarget< ST >::GGt, SplineC2COMPTarget< ST >::GGt_offload, SplineC2COMPTarget< ST >::mKK, SplineC2COMPTarget< ST >::myKcart, SplineC2COMPTarget< ST >::PrimLattice, SplineC2COMPTarget< ST >::PrimLattice_G_offload, and SplineC2COMPTarget< ST >::SplineInst.

185  {
186  // map the SplineInst->getSplinePtr() structure to GPU
187  auto* MultiSpline = SplineInst->getSplinePtr();
188  auto* restrict coefs = MultiSpline->coefs;
189  // attach pointers on the device to achieve deep copy
190  PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])")
191  {
192  MultiSpline->coefs = coefs;
193  }
194 
195  // transfer static data to GPU
196  auto* mKK_ptr = mKK->data();
197  PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])")
198  auto* myKcart_ptr = myKcart->data();
199  PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])")
200  for (uint32_t i = 0; i < 9; i++)
201  {
202  (*GGt_offload)[i] = GGt[i];
203  (*PrimLattice_G_offload)[i] = PrimLattice.G[i];
204  }
205  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
206  PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])")
207  auto* GGt_ptr = GGt_offload->data();
208  PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])")
209  }
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
std::shared_ptr< OffloadVector< ST > > mKK
CrystalLattice< ST, 3 > PrimLattice
primitive cell
std::shared_ptr< OffloadPosVector< ST > > myKcart
Tensor< ST, 3 > GGt
, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian

◆ flush_zero()

void flush_zero ( )
inline

Definition at line 211 of file SplineC2COMPTarget.h.

References SplineC2COMPTarget< ST >::SplineInst.

211 { SplineInst->flush_zero(); }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ gather_tables()

void gather_tables ( Communicate comm)
inline

Definition at line 158 of file SplineC2COMPTarget.h.

References qmcplusplus::comm, FairDivideLow(), gatherv(), BsplineSet::kPoints, BsplineSet::offset, Communicate::size(), and SplineC2COMPTarget< ST >::SplineInst.

159  {
160  if (comm->size() == 1)
161  return;
162  const int Nbands = kPoints.size();
163  const int Nbandgroups = comm->size();
164  offset.resize(Nbandgroups + 1, 0);
165  FairDivideLow(Nbands, Nbandgroups, offset);
166 
167  for (size_t ib = 0; ib < offset.size(); ib++)
168  offset[ib] *= 2;
169  gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset);
170  }
int size() const
return the number of tasks
Definition: Communicate.h:118
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
void FairDivideLow(int ntot, int npart, IV &adist)
partition ntot elements among npart
Definition: FairDivide.h:114
std::vector< int > offset
band offsets used for communication
Definition: BsplineSet.h:56
void gatherv(T *sb, T *rb, int n, IT &counts, IT &displ, int dest)
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ getClassName()

virtual std::string getClassName ( ) const
inlineoverridevirtual

return class name

Implements SPOSet.

Definition at line 119 of file SplineC2COMPTarget.h.

119 { return "SplineC2COMPTarget"; }

◆ getKeyword()

virtual std::string getKeyword ( ) const
inlineoverridevirtual

Implements BsplineSet.

Definition at line 120 of file SplineC2COMPTarget.h.

120 { return "SplineC2C"; }

◆ isComplex()

bool isComplex ( ) const
inlineoverridevirtual

Implements BsplineSet.

Definition at line 121 of file SplineC2COMPTarget.h.

121 { return true; };

◆ isOMPoffload()

virtual bool isOMPoffload ( ) const
inlineoverridevirtual

Query if this SPOSet uses OpenMP offload.

Reimplemented from SPOSet.

Definition at line 122 of file SplineC2COMPTarget.h.

122 { return true; }

◆ makeClone()

std::unique_ptr<SPOSet> makeClone ( ) const
inlineoverridevirtual

make a clone of itself every derived class must implement this to have threading working correctly.

Implements BsplineSet.

Definition at line 143 of file SplineC2COMPTarget.h.

143 { return std::make_unique<SplineC2COMPTarget>(*this); }

◆ mw_evaluateDetRatios()

void mw_evaluateDetRatios ( const RefVectorWithLeader< SPOSet > &  spo_list,
const RefVectorWithLeader< const VirtualParticleSet > &  vp_list,
const RefVector< ValueVector > &  psi_list,
const std::vector< const ValueType *> &  invRow_ptr_list,
std::vector< std::vector< ValueType >> &  ratios_list 
) const
overridevirtual

evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers

Parameters
spo_listthe list of SPOSet pointers in a walker batch
vp_lista list of virtual particle sets in a walker batch
psi_lista list of values of the SPO, used as a scratch space if needed
invRow_ptr_lista list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually
ratios_lista list of returning determinant ratios

Reimplemented from SPOSet.

Definition at line 192 of file SplineC2COMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2C::assign_v(), RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), VirtualParticleSet::getTotalNum(), omptarget::min(), SplineC2COMPTarget< ST >::mw_mem_handle_, and TinyVector< T, D >::size().

197 {
198  assert(this == &spo_list.getLeader());
199  auto& phi_leader = spo_list.getCastedLeader<SplineC2COMPTarget<ST>>();
200  auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
201  auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D;
202  auto& mw_ratios_private = mw_mem.mw_ratios_private;
203  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
204  auto& mw_results_scratch = mw_mem.mw_results_scratch;
205  const size_t nw = spo_list.size();
206  const size_t orb_size = phi_leader.size();
207 
208  size_t mw_nVP = 0;
209  for (const VirtualParticleSet& VP : vp_list)
210  mw_nVP += VP.getTotalNum();
211 
212  const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int));
213  det_ratios_buffer_H2D.resize(packed_size);
214 
215  // pack invRow_ptr_list to det_ratios_buffer_H2D
216  Vector<const ValueType*> ptr_buffer(reinterpret_cast<const ValueType**>(det_ratios_buffer_H2D.data()), nw);
217  for (size_t iw = 0; iw < nw; iw++)
218  ptr_buffer[iw] = invRow_ptr_list[iw];
219 
220  // pack particle positions
221  auto* pos_ptr = reinterpret_cast<ST*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*));
222  auto* ref_id_ptr =
223  reinterpret_cast<int*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST));
224  size_t iVP = 0;
225  for (size_t iw = 0; iw < nw; iw++)
226  {
227  const VirtualParticleSet& VP = vp_list[iw];
228  assert(ratios_list[iw].size() == VP.getTotalNum());
229  for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP)
230  {
231  ref_id_ptr[iVP] = iw;
232  const PointType& r = VP.activeR(iat);
234  pos_ptr[0] = r[0];
235  pos_ptr[1] = r[1];
236  pos_ptr[2] = r[2];
237  pos_ptr[3] = ru[0];
238  pos_ptr[4] = ru[1];
239  pos_ptr[5] = ru[2];
240  pos_ptr += 6;
241  }
242  }
243 
244  const size_t ChunkSizePerTeam = 512;
245  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
246  mw_ratios_private.resize(mw_nVP, NumTeams);
247  const auto spline_padded_size = myV.size();
248  const auto sposet_padded_size = getAlignedSize<ValueType>(OrbitalSetSize);
249  mw_offload_scratch.resize(spline_padded_size * mw_nVP);
250  mw_results_scratch.resize(sposet_padded_size * mw_nVP);
251 
252  // Ye: need to extract sizes and pointers before entering target region
253  const auto* spline_ptr = SplineInst->getSplinePtr();
254  auto* offload_scratch_ptr = mw_offload_scratch.data();
255  auto* results_scratch_ptr = mw_results_scratch.data();
256  const auto myKcart_padded_size = myKcart->capacity();
257  auto* myKcart_ptr = myKcart->data();
258  auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data();
259  auto* ratios_private_ptr = mw_ratios_private.data();
260  const size_t first_spo_local = first_spo;
261 
262  {
263  ScopedTimer offload(offload_timer_);
264  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \
265  map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \
266  map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])")
267  for (int iat = 0; iat < mw_nVP; iat++)
268  for (int team_id = 0; team_id < NumTeams; team_id++)
269  {
270  const size_t first = ChunkSizePerTeam * team_id;
271  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
272 
273  auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat;
274  auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat;
275  auto* ref_id_ptr = reinterpret_cast<int*>(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST));
276  auto* restrict psiinv_ptr = reinterpret_cast<const ValueType**>(buffer_H2D_ptr)[ref_id_ptr[iat]];
277  auto* restrict pos_scratch = reinterpret_cast<ST*>(buffer_H2D_ptr + nw * sizeof(ValueType*));
278 
279  int ix, iy, iz;
280  ST a[4], b[4], c[4];
281  spline2::computeLocationAndFractional(spline_ptr, pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4],
282  pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c);
283 
284  PRAGMA_OFFLOAD("omp parallel for")
285  for (int index = 0; index < last - first; index++)
286  spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
287  offload_scratch_iat_ptr + first + index);
288  const size_t first_cplx = first / 2;
289  const size_t last_cplx = omptarget::min(last / 2, orb_size);
290  PRAGMA_OFFLOAD("omp parallel for")
291  for (int index = first_cplx; index < last_cplx; index++)
292  C2C::assign_v(pos_scratch[iat * 6], pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2], psi_iat_ptr,
293  offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index);
294 
295  ComplexT sum(0);
296  PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
297  for (int i = first_cplx; i < last_cplx; i++)
298  sum += psi_iat_ptr[i] * psiinv_ptr[i];
299  ratios_private_ptr[iat * NumTeams + team_id] = sum;
300  }
301  }
302 
303  // do the reduction manually
304  iVP = 0;
305  for (size_t iw = 0; iw < nw; iw++)
306  {
307  auto& ratios = ratios_list[iw];
308  for (size_t iat = 0; iat < ratios.size(); iat++, iVP++)
309  {
310  ratios[iat] = ComplexT(0);
311  for (int tid = 0; tid < NumTeams; ++tid)
312  ratios[iat] += mw_ratios_private[iVP][tid];
313  }
314  }
315 }
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
int size() const
return the size of the orbital set Ye: this needs to be replaced by getOrbitalSetSize(); ...
Definition: SPOSet.h:75
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
size_type size() const
return the current size
Definition: OhmmsVector.h:162
QTBase::ValueType ValueType
Definition: Configuration.h:60
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
typename BsplineSet::ValueType ComplexT
CrystalLattice< ST, 3 > PrimLattice
primitive cell
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
NewTimer & offload_timer_
timer for offload portion
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41

◆ mw_evaluateVGL()

void mw_evaluateVGL ( const RefVectorWithLeader< SPOSet > &  spo_list,
const RefVectorWithLeader< ParticleSet > &  P_list,
int  iat,
const RefVector< ValueVector > &  psi_v_list,
const RefVector< GradVector > &  dpsi_v_list,
const RefVector< ValueVector > &  d2psi_v_list 
) const
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers

Parameters
spo_listthe list of SPOSet pointers in a walker batch
P_listthe list of ParticleSet pointers in a walker batch
iatactive particle
psi_v_listthe list of value vector pointers in a walker batch
dpsi_v_listthe list of gradient vector pointers in a walker batch
d2psi_v_listthe list of laplacian vector pointers in a walker batch

Reimplemented from SPOSet.

Definition at line 567 of file SplineC2COMPTarget.cpp.

References RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), and SplineC2COMPTarget< ST >::mw_mem_handle_.

573 {
574  assert(this == &sa_list.getLeader());
575  auto& phi_leader = sa_list.getCastedLeader<SplineC2COMPTarget<ST>>();
576  auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
577  auto& mw_pos_copy = mw_mem.mw_pos_copy;
578  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
579  auto& mw_results_scratch = mw_mem.mw_results_scratch;
580  const int nwalkers = sa_list.size();
581  mw_pos_copy.resize(nwalkers * 6);
582 
583  // pack particle positions
584  for (int iw = 0; iw < nwalkers; ++iw)
585  {
586  const PointType& r = P_list[iw].activeR(iat);
588  mw_pos_copy[iw * 6] = r[0];
589  mw_pos_copy[iw * 6 + 1] = r[1];
590  mw_pos_copy[iw * 6 + 2] = r[2];
591  mw_pos_copy[iw * 6 + 3] = ru[0];
592  mw_pos_copy[iw * 6 + 4] = ru[1];
593  mw_pos_copy[iw * 6 + 5] = ru[2];
594  }
595 
596  phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list,
597  d2psi_v_list);
598 }
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
CrystalLattice< ST, 3 > PrimLattice
primitive cell

◆ mw_evaluateVGLandDetRatioGrads()

void mw_evaluateVGLandDetRatioGrads ( const RefVectorWithLeader< SPOSet > &  spo_list,
const RefVectorWithLeader< ParticleSet > &  P_list,
int  iat,
const std::vector< const ValueType *> &  invRow_ptr_list,
OffloadMWVGLArray phi_vgl_v,
std::vector< ValueType > &  ratios,
std::vector< GradType > &  grads 
) const
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers.

Device data of phi_vgl_v must be up-to-date upon return

Parameters
spo_listthe list of SPOSet pointers in a walker batch
P_listthe list of ParticleSet pointers in a walker batch
iatactive particle
phi_vgl_vorbital values, gradients and laplacians of all the walkers
psi_ratio_grads_vdeterminant ratio and grads of all the walkers

Reimplemented from SPOSet.

Definition at line 601 of file SplineC2COMPTarget.cpp.

References qmcplusplus::C2C::assign_vgl(), Array< T, D, ALLOC >::data(), RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), qmcplusplus::HESS00, qmcplusplus::HESS01, qmcplusplus::HESS02, qmcplusplus::HESS11, qmcplusplus::HESS12, qmcplusplus::HESS22, qmcplusplus::LAPL, omptarget::min(), SplineC2COMPTarget< ST >::mw_mem_handle_, qmcplusplus::NUM_FIELDS, Array< T, D, ALLOC >::size(), and qmcplusplus::SymTrace().

608 {
609  assert(this == &spo_list.getLeader());
610  auto& phi_leader = spo_list.getCastedLeader<SplineC2COMPTarget<ST>>();
611  auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
612  auto& buffer_H2D = mw_mem.buffer_H2D;
613  auto& rg_private = mw_mem.rg_private;
614  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
615  auto& mw_results_scratch = mw_mem.mw_results_scratch;
616  const int nwalkers = spo_list.size();
617  buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*));
618 
619  // pack particle positions and invRow pointers.
620  for (int iw = 0; iw < nwalkers; ++iw)
621  {
622  const PointType& r = P_list[iw].activeR(iat);
624  Vector<ST> pos_copy(reinterpret_cast<ST*>(buffer_H2D[iw]), 6);
625 
626  pos_copy[0] = r[0];
627  pos_copy[1] = r[1];
628  pos_copy[2] = r[2];
629  pos_copy[3] = ru[0];
630  pos_copy[4] = ru[1];
631  pos_copy[5] = ru[2];
632 
633  auto& invRow_ptr = *reinterpret_cast<const ValueType**>(buffer_H2D[iw] + sizeof(ST) * 6);
634  invRow_ptr = invRow_ptr_list[iw];
635  }
636 
637  const size_t num_pos = nwalkers;
638  const auto orb_size = phi_vgl_v.size(2);
639  const auto spline_padded_size = myV.size();
640  const auto sposet_padded_size = getAlignedSize<ValueType>(OrbitalSetSize);
641  const size_t ChunkSizePerTeam = 512;
642  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
643  mw_offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
644  // for V(1)G(3)L(1) final result
645  mw_results_scratch.resize(sposet_padded_size * num_pos * 5);
646  // per team ratio and grads
647  rg_private.resize(num_pos, NumTeams * 4);
648 
649  // Ye: need to extract sizes and pointers before entering target region
650  const auto* spline_ptr = SplineInst->getSplinePtr();
651  auto* buffer_H2D_ptr = buffer_H2D.data();
652  auto* offload_scratch_ptr = mw_offload_scratch.data();
653  auto* results_scratch_ptr = mw_results_scratch.data();
654  const auto myKcart_padded_size = myKcart->capacity();
655  auto* mKK_ptr = mKK->data();
656  auto* GGt_ptr = GGt_offload->data();
657  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
658  auto* myKcart_ptr = myKcart->data();
659  auto* phi_vgl_ptr = phi_vgl_v.data();
660  auto* rg_private_ptr = rg_private.data();
661  const size_t buffer_H2D_stride = buffer_H2D.cols();
662  const size_t first_spo_local = first_spo;
663  const size_t phi_vgl_stride = num_pos * orb_size;
664 
665  {
666  ScopedTimer offload(offload_timer_);
667  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
668  map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \
669  map(always, from: rg_private_ptr[0:rg_private.size()])")
670  for (int iw = 0; iw < num_pos; iw++)
671  for (int team_id = 0; team_id < NumTeams; team_id++)
672  {
673  const size_t first = ChunkSizePerTeam * team_id;
674  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
675 
676  auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
677  auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5;
678  const auto* restrict pos_iw_ptr = reinterpret_cast<ST*>(buffer_H2D_ptr + buffer_H2D_stride * iw);
679  const auto* restrict invRow_iw_ptr =
680  *reinterpret_cast<ValueType**>(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6);
681 
682  int ix, iy, iz;
683  ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
684  spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b,
685  c, da, db, dc, d2a, d2b, d2c);
686 
687  const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
688  PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
689  PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
690  const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
691  GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
692 
693  PRAGMA_OFFLOAD("omp parallel for")
694  for (int index = 0; index < last - first; index++)
695  {
696  spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
697  d2c, offload_scratch_iw_ptr + first + index, spline_padded_size);
698  const int output_index = first + index;
699  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
700  SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
701  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
702  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
703  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
704  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
705  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
706  }
707 
708  const size_t first_cplx = first / 2;
709  const size_t last_cplx = omptarget::min(last / 2, orb_size);
710  PRAGMA_OFFLOAD("omp parallel for")
711  for (int index = first_cplx; index < last_cplx; index++)
712  C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, sposet_padded_size, mKK_ptr,
713  offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, myKcart_padded_size,
714  first_spo_local, index);
715 
716  ValueType* restrict psi = psi_iw_ptr;
717  ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size;
718  ValueType* restrict dpsi_y = psi_iw_ptr + sposet_padded_size * 2;
719  ValueType* restrict dpsi_z = psi_iw_ptr + sposet_padded_size * 3;
720  ValueType* restrict d2psi = psi_iw_ptr + sposet_padded_size * 4;
721 
722  ValueType* restrict out_phi = phi_vgl_ptr + iw * orb_size;
723  ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride;
724  ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride;
725  ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride;
726  ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride;
727 
728  ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
729  PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)")
730  for (int j = first_cplx; j < last_cplx; j++)
731  {
732  const size_t psiIndex = first_spo_local + j;
733 
734  out_phi[psiIndex] = psi[psiIndex];
735  out_dphi_x[psiIndex] = dpsi_x[psiIndex];
736  out_dphi_y[psiIndex] = dpsi_y[psiIndex];
737  out_dphi_z[psiIndex] = dpsi_z[psiIndex];
738  out_d2phi[psiIndex] = d2psi[psiIndex];
739 
740  ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex];
741  grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex];
742  grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex];
743  grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex];
744  }
745 
746  rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio;
747  rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x;
748  rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y;
749  rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z;
750  }
751  }
752 
753  for (int iw = 0; iw < num_pos; iw++)
754  {
755  ValueType ratio(0);
756  for (int team_id = 0; team_id < NumTeams; team_id++)
757  ratio += rg_private[iw][team_id * 4];
758  ratios[iw] = ratio;
759 
760  ValueType grad_x(0), grad_y(0), grad_z(0);
761  for (int team_id = 0; team_id < NumTeams; team_id++)
762  {
763  grad_x += rg_private[iw][team_id * 4 + 1];
764  grad_y += rg_private[iw][team_id * 4 + 2];
765  grad_z += rg_private[iw][team_id * 4 + 3];
766  }
767  grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio};
768  }
769 }
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
QTBase::GradType GradType
Definition: Configuration.h:62
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
size_type size() const
return the current size
Definition: OhmmsVector.h:162
QTBase::ValueType ValueType
Definition: Configuration.h:60
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
std::shared_ptr< OffloadVector< ST > > GGt_offload
std::shared_ptr< OffloadVector< ST > > mKK
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
CrystalLattice< ST, 3 > PrimLattice
primitive cell
vContainer_type myV
intermediate result vectors
std::shared_ptr< OffloadPosVector< ST > > myKcart
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
NewTimer & offload_timer_
timer for offload portion
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41

◆ read_splines()

bool read_splines ( hdf_archive h5f)

Definition at line 38 of file SplineC2COMPTarget.cpp.

References hdf_archive::readEntry().

39 {
40  std::ostringstream o;
41  o << "spline_" << MyIndex;
42  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
43  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
44 }
size_t MyIndex
Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases.
Definition: BsplineSet.h:39
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ releaseResource()

void releaseResource ( ResourceCollection collection,
const RefVectorWithLeader< SPOSet > &  spo_list 
) const
inlineoverridevirtual

return a shared resource to collection

Reimplemented from SPOSet.

Definition at line 136 of file SplineC2COMPTarget.h.

References RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), and ResourceCollection::takebackResource().

137  {
138  assert(this == &spo_list.getLeader());
139  auto& phi_leader = spo_list.getCastedLeader<SplineC2COMPTarget<ST>>();
140  collection.takebackResource(phi_leader.mw_mem_handle_);
141  }

◆ resize_kpoints()

void resize_kpoints ( )
inline

remap kPoints to pack the double copy

Definition at line 214 of file SplineC2COMPTarget.h.

References qmcplusplus::dot(), BsplineSet::kPoints, SplineC2COMPTarget< ST >::mKK, and SplineC2COMPTarget< ST >::myKcart.

Referenced by SplineC2COMPTarget< ST >::create_spline().

215  {
216  const size_t nk = kPoints.size();
217  mKK = std::make_shared<OffloadVector<ST>>(nk);
218  myKcart = std::make_shared<OffloadPosVector<ST>>(nk);
219  for (size_t i = 0; i < nk; ++i)
220  {
221  (*mKK)[i] = -dot(kPoints[i], kPoints[i]);
222  (*myKcart)(i) = kPoints[i];
223  }
224  }
std::shared_ptr< OffloadVector< ST > > mKK
std::shared_ptr< OffloadPosVector< ST > > myKcart
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ resizeStorage()

void resizeStorage ( size_t  n,
size_t  nvals 
)
inline

Definition at line 145 of file SplineC2COMPTarget.h.

References BsplineSet::init_base(), SplineC2COMPTarget< ST >::myG, SplineC2COMPTarget< ST >::mygH, SplineC2COMPTarget< ST >::myH, SplineC2COMPTarget< ST >::myL, SplineC2COMPTarget< ST >::myV, qmcplusplus::n, VectorSoaContainer< T, D, Alloc >::resize(), and Vector< T, Alloc >::resize().

146  {
147  init_base(n);
148  size_t npad = getAlignedSize<ST>(2 * n);
149  myV.resize(npad);
150  myG.resize(npad);
151  myL.resize(npad);
152  myH.resize(npad);
153  mygH.resize(npad);
154  }
void resize(size_type n, Type_t val=Type_t())
Resize the container.
Definition: OhmmsVector.h:166
vContainer_type myV
intermediate result vectors
void init_base(int n)
Definition: BsplineSet.h:66
void resize(size_type n)
resize myData

◆ set_spline()

void set_spline ( SingleSplineType spline_r,
SingleSplineType spline_i,
int  twist,
int  ispline,
int  level 
)
inline

Definition at line 27 of file SplineC2COMPTarget.cpp.

32 {
33  SplineInst->copy_spline(spline_r, 2 * ispline);
34  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
35 }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ write_splines()

bool write_splines ( hdf_archive h5f)

Definition at line 47 of file SplineC2COMPTarget.cpp.

References hdf_archive::writeEntry().

48 {
49  std::ostringstream o;
50  o << "spline_" << MyIndex;
51  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
52  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
53 }
size_t MyIndex
Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases.
Definition: BsplineSet.h:39
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

Friends And Related Function Documentation

◆ BsplineReader

friend struct BsplineReader
friend

Definition at line 309 of file SplineC2COMPTarget.h.

◆ SplineSetReader

friend class SplineSetReader
friend

Definition at line 308 of file SplineC2COMPTarget.h.

Member Data Documentation

◆ GGt

Tensor<ST, 3> GGt
private

$GGt=G^t G $, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian

Definition at line 72 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::finalizeConstruction().

◆ GGt_offload

std::shared_ptr<OffloadVector<ST> > GGt_offload
private

Definition at line 78 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::finalizeConstruction().

◆ mKK

std::shared_ptr<OffloadVector<ST> > mKK
private

◆ multi_pos_copy

Vector<ST, OffloadPinnedAllocator<ST> > multi_pos_copy
private

position scratch space, used to avoid allocation on the fly and faster transfer

Definition at line 92 of file SplineC2COMPTarget.h.

◆ mw_mem_handle_

◆ myG

gContainer_type myG
protected

Definition at line 105 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::resizeStorage().

◆ mygH

ghContainer_type mygH
protected

Definition at line 107 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::resizeStorage().

◆ myH

hContainer_type myH
protected

Definition at line 106 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::resizeStorage().

◆ myKcart

std::shared_ptr<OffloadPosVector<ST> > myKcart
private

◆ myL

vContainer_type myL
protected

Definition at line 104 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::resizeStorage().

◆ myV

vContainer_type myV
protected

intermediate result vectors

Definition at line 103 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::create_spline(), and SplineC2COMPTarget< ST >::resizeStorage().

◆ offload_scratch

Vector<ST, OffloadPinnedAllocator<ST> > offload_scratch
private

offload scratch space, dynamically resized to the maximal need

Definition at line 86 of file SplineC2COMPTarget.h.

◆ offload_timer_

NewTimer& offload_timer_
private

timer for offload portion

Definition at line 68 of file SplineC2COMPTarget.h.

◆ PrimLattice

CrystalLattice<ST, 3> PrimLattice
private

primitive cell

Definition at line 70 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::finalizeConstruction().

◆ PrimLattice_G_offload

std::shared_ptr<OffloadVector<ST> > PrimLattice_G_offload
private

Definition at line 79 of file SplineC2COMPTarget.h.

Referenced by SplineC2COMPTarget< ST >::finalizeConstruction().

◆ psiinv_pos_copy

Vector<ComplexT, OffloadPinnedAllocator<ComplexT> > psiinv_pos_copy
private

psiinv and position scratch space, used to avoid allocation on the fly and faster transfer

Definition at line 90 of file SplineC2COMPTarget.h.

◆ ratios_private

Matrix<ComplexT, OffloadPinnedAllocator<ComplexT> > ratios_private
private

team private ratios for reduction, numVP x numTeams

Definition at line 84 of file SplineC2COMPTarget.h.

◆ results_scratch

Vector<ComplexT, OffloadPinnedAllocator<ComplexT> > results_scratch
private

result scratch space, dynamically resized to the maximal need

Definition at line 88 of file SplineC2COMPTarget.h.

◆ SplineInst


The documentation for this class was generated from the following files: