QMCPACK
SplineC2ROMPTarget< ST > Class Template Reference

class to match std::complex<ST> spline with BsplineSet::ValueType (real) SPOs with OpenMP offload More...

+ Inheritance diagram for SplineC2ROMPTarget< ST >:
+ Collaboration diagram for SplineC2ROMPTarget< ST >:

Public Types

using SplineType = typename bspline_traits< ST, 3 >::SplineType
 
using BCType = typename bspline_traits< ST, 3 >::BCType
 
using DataType = ST
 
using PointType = TinyVector< ST, 3 >
 
using SingleSplineType = UBspline_3d_d
 
using TT = typename BsplineSet::ValueType
 
using vContainer_type = Vector< ST, aligned_allocator< ST > >
 
using gContainer_type = VectorSoaContainer< ST, 3 >
 
using hContainer_type = VectorSoaContainer< ST, 6 >
 
using ghContainer_type = VectorSoaContainer< ST, 10 >
 
template<typename DT >
using OffloadVector = Vector< DT, OffloadAllocator< DT > >
 
template<typename DT >
using OffloadPosVector = VectorSoaContainer< DT, 3, OffloadAllocator< DT > >
 
- Public Types inherited from SPOSet
using ValueVector = OrbitalSetTraits< ValueType >::ValueVector
 
using ValueMatrix = OrbitalSetTraits< ValueType >::ValueMatrix
 
using GradVector = OrbitalSetTraits< ValueType >::GradVector
 
using GradMatrix = OrbitalSetTraits< ValueType >::GradMatrix
 
using HessVector = OrbitalSetTraits< ValueType >::HessVector
 
using HessMatrix = OrbitalSetTraits< ValueType >::HessMatrix
 
using GGGVector = OrbitalSetTraits< ValueType >::GradHessVector
 
using GGGMatrix = OrbitalSetTraits< ValueType >::GradHessMatrix
 
using SPOMap = std::map< std::string, const std::unique_ptr< const SPOSet > >
 
using OffloadMWVGLArray = Array< ValueType, 3, OffloadPinnedAllocator< ValueType > >
 
using OffloadMWVArray = Array< ValueType, 2, OffloadPinnedAllocator< ValueType > >
 
template<typename DT >
using OffloadMatrix = Matrix< DT, OffloadPinnedAllocator< DT > >
 
- Public Types inherited from QMCTraits
enum  { DIM = OHMMS_DIM, DIM_VGL = OHMMS_DIM + 2 }
 
using QTBase = QMCTypes< OHMMS_PRECISION, DIM >
 
using QTFull = QMCTypes< OHMMS_PRECISION_FULL, DIM >
 
using RealType = QTBase::RealType
 
using ComplexType = QTBase::ComplexType
 
using ValueType = QTBase::ValueType
 
using PosType = QTBase::PosType
 
using GradType = QTBase::GradType
 
using TensorType = QTBase::TensorType
 
using IndexType = OHMMS_INDEXTYPE
 define other types More...
 
using FullPrecRealType = QTFull::RealType
 
using FullPrecValueType = QTFull::ValueType
 
using PropertySetType = RecordNamedProperty< FullPrecRealType >
 define PropertyList_t More...
 
using PtclGrpIndexes = std::vector< std::pair< int, int > >
 

Public Member Functions

 SplineC2ROMPTarget (const std::string &my_name)
 
 SplineC2ROMPTarget (const SplineC2ROMPTarget &in)
 
virtual std::string getClassName () const override
 return class name More...
 
virtual std::string getKeyword () const override
 
bool isComplex () const override
 
virtual bool isOMPoffload () const override
 Query if this SPOSet uses OpenMP offload. More...
 
void createResource (ResourceCollection &collection) const override
 initialize a shared resource and hand it to collection More...
 
void acquireResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const override
 acquire a shared resource from collection More...
 
void releaseResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const override
 return a shared resource to collection More...
 
std::unique_ptr< SPOSetmakeClone () const override
 make a clone of itself every derived class must implement this to have threading working correctly. More...
 
void resizeStorage (size_t n, size_t nvals)
 
void bcast_tables (Communicate *comm)
 
void gather_tables (Communicate *comm)
 
template<typename GT , typename BCT >
void create_spline (GT &xyz_g, BCT &xyz_bc)
 
void finalizeConstruction () override
 this routine can not be called from threaded region More...
 
void flush_zero ()
 
void resize_kpoints ()
 remap kPoints to pack the double copy More...
 
void set_spline (SingleSplineType *spline_r, SingleSplineType *spline_i, int twist, int ispline, int level)
 
bool read_splines (hdf_archive &h5f)
 
bool write_splines (hdf_archive &h5f)
 
void assign_v (const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
 
virtual void evaluateValue (const ParticleSet &P, const int iat, ValueVector &psi) override
 evaluate the values of this single-particle orbital set More...
 
virtual void evaluateDetRatios (const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios) override
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP More...
 
virtual void mw_evaluateDetRatios (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType *> &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const override
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers More...
 
void assign_vgl_from_l (const PointType &r, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)
 assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian More...
 
virtual void evaluateVGL (const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi) override
 evaluate the values, gradients and laplacians of this single-particle orbital set More...
 
virtual void mw_evaluateVGL (const RefVectorWithLeader< SPOSet > &sa_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const override
 evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLandDetRatioGrads (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const override
 evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers. More...
 
void assign_vgh (const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
 
virtual void evaluateVGH (const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi) override
 evaluate the values, gradients and hessians of this single-particle orbital set More...
 
void assign_vghgh (const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
 
virtual void evaluateVGHGH (const ParticleSet &P, const int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi) override
 evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set More...
 
virtual void evaluate_notranspose (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, ValueMatrix &d2logdet) override
 evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles More...
 
- Public Member Functions inherited from BsplineSet
 BsplineSet (const std::string &my_name)
 
auto & getHalfG () const
 
void init_base (int n)
 
int remap_kpoints ()
 remap kpoints to group general kpoints & special kpoints More...
 
void setOrbitalSetSize (int norbs) override
 set the OrbitalSetSize More...
 
void mw_evaluate_notranspose (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int first, int last, const RefVector< ValueMatrix > &logdet_list, const RefVector< GradMatrix > &dlogdet_list, const RefVector< ValueMatrix > &d2logdet_list) const override
 
void evaluate_notranspose (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, HessMatrix &grad_grad_logdet) override
 evaluate the values, gradients and hessians of this single-particle orbital for [first,last) particles More...
 
void evaluate_notranspose (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, HessMatrix &grad_grad_logdet, GGGMatrix &grad_grad_grad_logdet) override
 evaluate the values, gradients, hessians and third derivatives of this single-particle orbital for [first,last) particles More...
 
void evaluateGradSource (const ParticleSet &P, int first, int last, const ParticleSet &source, int iat_src, GradMatrix &gradphi) override
 evaluate the gradients of this single-particle orbital for [first,last) target particles with respect to the given source particle More...
 
void evaluateGradSource (const ParticleSet &P, int first, int last, const ParticleSet &source, int iat_src, GradMatrix &grad_phi, HessMatrix &grad_grad_phi, GradMatrix &grad_lapl_phi) override
 evaluate the gradients of values, gradients, laplacians of this single-particle orbital for [first,last) target particles with respect to the given source particle More...
 
virtual void evaluateDetRatios (const VirtualParticleSet &VP, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios)
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP More...
 
virtual void evaluateValue (const ParticleSet &P, int iat, ValueVector &psi)=0
 evaluate the values of this single-particle orbital set More...
 
virtual void evaluateVGH (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi)
 evaluate the values, gradients and hessians of this single-particle orbital set More...
 
virtual void evaluateVGHGH (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi)
 evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set More...
 
virtual void evaluateVGL (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi)=0
 evaluate the values, gradients and laplacians of this single-particle orbital set More...
 
virtual void finalizeConstruction ()
 finalize the construction of SPOSet More...
 
virtual void mw_evaluateDetRatios (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, const RefVector< ValueVector > &psi_list, const std::vector< const ValueType * > &invRow_ptr_list, std::vector< std::vector< ValueType >> &ratios_list) const
 evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers More...
 
virtual void mw_evaluateVGL (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
 evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLandDetRatioGrads (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType * > &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads) const
 evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers. More...
 
virtual void acquireResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const
 acquire a shared resource from collection More...
 
virtual void createResource (ResourceCollection &collection) const
 initialize a shared resource and hand it to collection More...
 
virtual void releaseResource (ResourceCollection &collection, const RefVectorWithLeader< SPOSet > &spo_list) const
 return a shared resource to collection More...
 
- Public Member Functions inherited from SPOSet
 SPOSet (const std::string &my_name)
 constructor More...
 
virtual ~SPOSet ()=default
 destructor More...
 
int size () const
 return the size of the orbital set Ye: this needs to be replaced by getOrbitalSetSize(); More...
 
void basic_report (const std::string &pad="") const
 print basic SPOSet information More...
 
virtual void report (const std::string &pad="") const
 print SPOSet information More...
 
int getOrbitalSetSize () const
 return the size of the orbitals More...
 
virtual bool isOptimizable () const
 Query if this SPOSet is optimizable. More...
 
virtual void extractOptimizableObjectRefs (UniqueOptObjRefs &opt_obj_refs)
 extract underlying OptimizableObject references More...
 
virtual void checkOutVariables (const opt_variables_type &active)
 check out variational optimizable variables More...
 
virtual bool hasIonDerivs () const
 Query if this SPOSet has an explicit ion dependence. More...
 
virtual void checkObject () const
 check a few key parameters before putting the SPO into a determinant More...
 
virtual bool isRotationSupported () const
 return true if this SPOSet can be wrappered by RotatedSPO More...
 
virtual void storeParamsBeforeRotation ()
 store parameters before getting destroyed by rotation. More...
 
virtual void applyRotation (const ValueMatrix &rot_mat, bool use_stored_copy=false)
 apply rotation to all the orbitals More...
 
virtual void evaluateDerivatives (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, Vector< ValueType > &dhpsioverpsi, const int &FirstIndex, const int &LastIndex)
 Parameter derivatives of the wavefunction and the Laplacian of the wavefunction. More...
 
virtual void evaluateDerivativesWF (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, int FirstIndex, int LastIndex)
 Parameter derivatives of the wavefunction. More...
 
virtual void evaluateDerivatives (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, Vector< ValueType > &dhpsioverpsi, const ValueType &psiCurrent, const std::vector< ValueType > &Coeff, const std::vector< size_t > &C2node_up, const std::vector< size_t > &C2node_dn, const ValueVector &detValues_up, const ValueVector &detValues_dn, const GradMatrix &grads_up, const GradMatrix &grads_dn, const ValueMatrix &lapls_up, const ValueMatrix &lapls_dn, const ValueMatrix &M_up, const ValueMatrix &M_dn, const ValueMatrix &Minv_up, const ValueMatrix &Minv_dn, const GradMatrix &B_grad, const ValueMatrix &B_lapl, const std::vector< int > &detData_up, const size_t N1, const size_t N2, const size_t NP1, const size_t NP2, const std::vector< std::vector< int >> &lookup_tbl)
 Evaluate the derivative of the optimized orbitals with respect to the parameters this is used only for MSD, to be refined for better serving both single and multi SD. More...
 
virtual void evaluateDerivativesWF (ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi, const QTFull::ValueType &psiCurrent, const std::vector< ValueType > &Coeff, const std::vector< size_t > &C2node_up, const std::vector< size_t > &C2node_dn, const ValueVector &detValues_up, const ValueVector &detValues_dn, const ValueMatrix &M_up, const ValueMatrix &M_dn, const ValueMatrix &Minv_up, const ValueMatrix &Minv_dn, const std::vector< int > &detData_up, const std::vector< std::vector< int >> &lookup_tbl)
 Evaluate the derivative of the optimized orbitals with respect to the parameters this is used only for MSD, to be refined for better serving both single and multi SD. More...
 
virtual void evaluateDetSpinorRatios (const VirtualParticleSet &VP, ValueVector &psi, const std::pair< ValueVector, ValueVector > &spinor_multiplier, const ValueVector &invrow, std::vector< ValueType > &ratios)
 evaluate determinant ratios for virtual moves, specifically for Spinor SPOSets More...
 
virtual void evaluateDerivRatios (const VirtualParticleSet &VP, const opt_variables_type &optvars, ValueVector &psi, const ValueVector &psiinv, std::vector< ValueType > &ratios, Matrix< ValueType > &dratios, int FirstIndex, int LastIndex)
 Determinant ratios and parameter derivatives of the wavefunction for virtual moves. More...
 
virtual void evaluateVGL_spin (const ParticleSet &P, int iat, ValueVector &psi, GradVector &dpsi, ValueVector &d2psi, ValueVector &dspin)
 evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital set More...
 
virtual void mw_evaluateValue (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list) const
 evaluate the values this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLWithSpin (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list, OffloadMatrix< ComplexType > &mw_dspin) const
 evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets of multiple walkers More...
 
virtual void mw_evaluateVGLandDetRatioGradsWithSpin (const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const std::vector< const ValueType *> &invRow_ptr_list, OffloadMWVGLArray &phi_vgl_v, std::vector< ValueType > &ratios, std::vector< GradType > &grads, std::vector< ValueType > &spingrads) const
 evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers. More...
 
virtual void evaluate_spin (const ParticleSet &P, int iat, ValueVector &psi, ValueVector &dpsi)
 evaluate the values of this single-particle orbital set More...
 
virtual void evaluateThirdDeriv (const ParticleSet &P, int first, int last, GGGMatrix &grad_grad_grad_logdet)
 evaluate the third derivatives of this single-particle orbital set More...
 
virtual void evaluate_notranspose_spin (const ParticleSet &P, int first, int last, ValueMatrix &logdet, GradMatrix &dlogdet, ValueMatrix &d2logdet, ValueMatrix &dspinlogdet)
 evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient More...
 
virtual void evaluateGradSourceRow (const ParticleSet &P, int iel, const ParticleSet &source, int iat_src, GradVector &gradphi)
 Returns a row of d/dR_iat phi_j(r) evaluated at position r. More...
 
virtual PosType get_k (int orb)
 access the k point related to the given orbital More...
 
virtual bool transformSPOSet ()
 Used only by cusp correction in AOS LCAO. More...
 
const std::string & getName () const
 return object name More...
 

Protected Attributes

vContainer_type myV
 intermediate result vectors More...
 
vContainer_type myL
 
gContainer_type myG
 
hContainer_type myH
 
ghContainer_type mygH
 
- Protected Attributes inherited from BsplineSet
size_t MyIndex
 Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases. More...
 
size_t first_spo
 first index of the SPOs this Spline handles More...
 
size_t last_spo
 last index of the SPOs this Spline handles More...
 
TinyVector< int, DHalfG
 sign bits at the G/2 boundaries More...
 
std::vector< bool > MakeTwoCopies
 flags to unpack sin/cos More...
 
std::vector< SPOSet::PosTypekPoints
 kpoints for each unique orbitals. More...
 
aligned_vector< int > BandIndexMap
 remap splines to orbitals More...
 
std::vector< int > offset
 band offsets used for communication More...
 
- Protected Attributes inherited from SPOSet
const std::string my_name_
 name of the object, unique identifier More...
 
IndexType OrbitalSetSize
 number of Single-particle orbitals More...
 
opt_variables_type myVars
 Optimizable variables. More...
 

Private Member Functions

void evaluateVGLMultiPos (const Vector< ST, OffloadPinnedAllocator< ST >> &multi_pos_copy, Vector< ST, OffloadPinnedAllocator< ST >> &offload_scratch, Vector< TT, OffloadPinnedAllocator< TT >> &results_scratch, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
 

Private Attributes

NewTimeroffload_timer_
 timer for offload portion More...
 
CrystalLattice< ST, 3 > PrimLattice
 primitive cell More...
 
Tensor< ST, 3 > GGt
 $GGt=G^t G $, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian More...
 
int nComplexBands
 number of complex bands More...
 
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
 multi bspline set More...
 
std::shared_ptr< OffloadVector< ST > > mKK
 
std::shared_ptr< OffloadPosVector< ST > > myKcart
 
std::shared_ptr< OffloadVector< ST > > GGt_offload
 
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
 
ResourceHandle< SplineOMPTargetMultiWalkerMem< ST, TT > > mw_mem_handle_
 
Matrix< TT, OffloadPinnedAllocator< TT > > ratios_private
 team private ratios for reduction, numVP x numTeams More...
 
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
 offload scratch space, dynamically resized to the maximal need More...
 
Vector< TT, OffloadPinnedAllocator< TT > > results_scratch
 result scratch space, dynamically resized to the maximal need More...
 
Vector< TT, OffloadPinnedAllocator< TT > > psiinv_pos_copy
 psiinv and position scratch space, used to avoid allocation on the fly and faster transfer More...
 
Vector< ST, OffloadPinnedAllocator< ST > > multi_pos_copy
 position scratch space, used to avoid allocation on the fly and faster transfer More...
 

Friends

template<class BSPLINESPO >
class SplineSetReader
 
struct BsplineReader
 

Additional Inherited Members

- Static Protected Attributes inherited from BsplineSet
static const int D = DIM
 

Detailed Description

template<typename ST>
class qmcplusplus::SplineC2ROMPTarget< ST >

class to match std::complex<ST> spline with BsplineSet::ValueType (real) SPOs with OpenMP offload

Template Parameters
STprecision of spline

Requires temporage storage and multiplication of phase vectors The internal storage of complex spline coefficients uses double sized real arrays of ST type, aligned and padded. Calling assign_v assign_vgl should be restricted to the actual number of complex splines (kPoints.size()). The first nComplexBands complex splines produce 2 real orbitals. The rest complex splines produce 1 real orbital. All the output orbitals are real (C2R). The maximal number of output orbitals is OrbitalSetSize.

Definition at line 44 of file SplineC2ROMPTarget.h.

Member Typedef Documentation

◆ BCType

using BCType = typename bspline_traits<ST, 3>::BCType

Definition at line 48 of file SplineC2ROMPTarget.h.

◆ DataType

using DataType = ST

Definition at line 49 of file SplineC2ROMPTarget.h.

◆ gContainer_type

Definition at line 60 of file SplineC2ROMPTarget.h.

◆ ghContainer_type

Definition at line 62 of file SplineC2ROMPTarget.h.

◆ hContainer_type

Definition at line 61 of file SplineC2ROMPTarget.h.

◆ OffloadPosVector

Definition at line 67 of file SplineC2ROMPTarget.h.

◆ OffloadVector

Definition at line 65 of file SplineC2ROMPTarget.h.

◆ PointType

using PointType = TinyVector<ST, 3>

Definition at line 50 of file SplineC2ROMPTarget.h.

◆ SingleSplineType

using SingleSplineType = UBspline_3d_d

Definition at line 51 of file SplineC2ROMPTarget.h.

◆ SplineType

using SplineType = typename bspline_traits<ST, 3>::SplineType

Definition at line 47 of file SplineC2ROMPTarget.h.

◆ TT

using TT = typename BsplineSet::ValueType

Definition at line 53 of file SplineC2ROMPTarget.h.

◆ vContainer_type

Definition at line 59 of file SplineC2ROMPTarget.h.

Constructor & Destructor Documentation

◆ SplineC2ROMPTarget() [1/2]

SplineC2ROMPTarget ( const std::string &  my_name)
inline

Definition at line 115 of file SplineC2ROMPTarget.h.

116  : BsplineSet(my_name),
117  offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)),
118  nComplexBands(0),
119  GGt_offload(std::make_shared<OffloadVector<ST>>(9)),
120  PrimLattice_G_offload(std::make_shared<OffloadVector<ST>>(9))
121  {}
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
NewTimer & offload_timer_
timer for offload portion
BsplineSet(const std::string &my_name)
Definition: BsplineSet.h:59
int nComplexBands
number of complex bands
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)

◆ SplineC2ROMPTarget() [2/2]

SplineC2ROMPTarget ( const SplineC2ROMPTarget< ST > &  in)
default

Member Function Documentation

◆ acquireResource()

void acquireResource ( ResourceCollection collection,
const RefVectorWithLeader< SPOSet > &  spo_list 
) const
inlineoverridevirtual

acquire a shared resource from collection

Reimplemented from SPOSet.

Definition at line 135 of file SplineC2ROMPTarget.h.

References RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), ResourceCollection::lendResource(), and SplineC2ROMPTarget< ST >::mw_mem_handle_.

136  {
137  assert(this == &spo_list.getLeader());
138  auto& phi_leader = spo_list.getCastedLeader<SplineC2ROMPTarget<ST>>();
139  phi_leader.mw_mem_handle_ = collection.lendResource<SplineOMPTargetMultiWalkerMem<ST, TT>>();
140  }

◆ assign_v()

void assign_v ( const PointType r,
const vContainer_type myV,
ValueVector psi,
int  first,
int  last 
) const
inline

Definition at line 55 of file SplineC2ROMPTarget.cpp.

References TinyVector< T, D >::data(), omptarget::min(), qmcplusplus::Units::time::s, and qmcplusplus::sincos().

60 {
61  // protect last
62  last = last > kPoints.size() ? kPoints.size() : last;
63 
64  const ST x = r[0], y = r[1], z = r[2];
65  const ST* restrict kx = myKcart->data(0);
66  const ST* restrict ky = myKcart->data(1);
67  const ST* restrict kz = myKcart->data(2);
68 
69  TT* restrict psi_s = psi.data() + first_spo;
70  const size_t requested_orb_size = psi.size();
71 #pragma omp simd
72  for (size_t j = first; j < std::min(nComplexBands, last); j++)
73  {
74  ST s, c;
75  const size_t jr = j << 1;
76  const size_t ji = jr + 1;
77  const ST val_r = myV[jr];
78  const ST val_i = myV[ji];
79  omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
80  if (jr < requested_orb_size)
81  psi_s[jr] = val_r * c - val_i * s;
82  if (ji < requested_orb_size)
83  psi_s[ji] = val_i * c + val_r * s;
84  }
85 
86  psi_s += nComplexBands;
87 #pragma omp simd
88  for (size_t j = std::max(nComplexBands, first); j < last; j++)
89  {
90  ST s, c;
91  const ST val_r = myV[2 * j];
92  const ST val_i = myV[2 * j + 1];
93  omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
94  if (j + nComplexBands < requested_orb_size)
95  psi_s[j] = val_r * c - val_i * s;
96  }
97 }
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
T min(T a, T b)
vContainer_type myV
intermediate result vectors
typename BsplineSet::ValueType TT
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ assign_vgh()

void assign_vgh ( const PointType r,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi,
int  first,
int  last 
) const

Definition at line 928 of file SplineC2ROMPTarget.cpp.

References TinyVector< T, D >::data(), omptarget::min(), qmcplusplus::Units::time::s, qmcplusplus::sincos(), and qmcplusplus::v_m_v().

934 {
935  // protect last
936  last = last > kPoints.size() ? kPoints.size() : last;
937 
938  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
939  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
940  g22 = PrimLattice.G(8);
941  const ST x = r[0], y = r[1], z = r[2];
942 
943  const ST* restrict k0 = myKcart->data(0);
944  const ST* restrict k1 = myKcart->data(1);
945  const ST* restrict k2 = myKcart->data(2);
946 
947  const ST* restrict g0 = myG.data(0);
948  const ST* restrict g1 = myG.data(1);
949  const ST* restrict g2 = myG.data(2);
950  const ST* restrict h00 = myH.data(0);
951  const ST* restrict h01 = myH.data(1);
952  const ST* restrict h02 = myH.data(2);
953  const ST* restrict h11 = myH.data(3);
954  const ST* restrict h12 = myH.data(4);
955  const ST* restrict h22 = myH.data(5);
956 
957  const size_t requested_orb_size = psi.size();
958 #pragma omp simd
959  for (size_t j = first; j < std::min(nComplexBands, last); j++)
960  {
961  int jr = j << 1;
962  int ji = jr + 1;
963 
964  const ST kX = k0[j];
965  const ST kY = k1[j];
966  const ST kZ = k2[j];
967  const ST val_r = myV[jr];
968  const ST val_i = myV[ji];
969 
970  //phase
971  ST s, c;
972  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
973 
974  //dot(PrimLattice.G,myG[j])
975  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
976  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
977  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
978 
979  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
980  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
981  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
982 
983  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
984  const ST gX_r = dX_r + val_i * kX;
985  const ST gY_r = dY_r + val_i * kY;
986  const ST gZ_r = dZ_r + val_i * kZ;
987  const ST gX_i = dX_i - val_r * kX;
988  const ST gY_i = dY_i - val_r * kY;
989  const ST gZ_i = dZ_i - val_r * kZ;
990 
991  const size_t psiIndex = first_spo + jr;
992 
993  if (psiIndex < requested_orb_size)
994  {
995  psi[psiIndex] = c * val_r - s * val_i;
996  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
997  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
998  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
999  }
1000 
1001  if (psiIndex + 1 < requested_orb_size)
1002  {
1003  psi[psiIndex + 1] = c * val_i + s * val_r;
1004  dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
1005  dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
1006  dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
1007  }
1008 
1009  const ST h_xx_r =
1010  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
1011  const ST h_xy_r =
1012  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
1013  const ST h_xz_r =
1014  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
1015  const ST h_yx_r =
1016  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
1017  const ST h_yy_r =
1018  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
1019  const ST h_yz_r =
1020  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
1021  const ST h_zx_r =
1022  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
1023  const ST h_zy_r =
1024  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
1025  const ST h_zz_r =
1026  v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
1027 
1028  const ST h_xx_i =
1029  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
1030  const ST h_xy_i =
1031  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
1032  const ST h_xz_i =
1033  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
1034  const ST h_yx_i =
1035  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
1036  const ST h_yy_i =
1037  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
1038  const ST h_yz_i =
1039  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
1040  const ST h_zx_i =
1041  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
1042  const ST h_zy_i =
1043  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
1044  const ST h_zz_i =
1045  v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
1046 
1047  if (psiIndex < requested_orb_size)
1048  {
1049  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
1050  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
1051  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
1052  grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
1053  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
1054  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
1055  grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
1056  grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
1057  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
1058  }
1059 
1060  if (psiIndex + 1 < requested_orb_size)
1061  {
1062  grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
1063  grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
1064  grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
1065  grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r;
1066  grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
1067  grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
1068  grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r;
1069  grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r;
1070  grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
1071  }
1072  }
1073 
1074 #pragma omp simd
1075  for (size_t j = std::max(nComplexBands, first); j < last; j++)
1076  {
1077  int jr = j << 1;
1078  int ji = jr + 1;
1079 
1080  const ST kX = k0[j];
1081  const ST kY = k1[j];
1082  const ST kZ = k2[j];
1083  const ST val_r = myV[jr];
1084  const ST val_i = myV[ji];
1085 
1086  //phase
1087  ST s, c;
1088  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
1089 
1090  //dot(PrimLattice.G,myG[j])
1091  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
1092  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
1093  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
1094 
1095  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
1096  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
1097  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
1098 
1099  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
1100  const ST gX_r = dX_r + val_i * kX;
1101  const ST gY_r = dY_r + val_i * kY;
1102  const ST gZ_r = dZ_r + val_i * kZ;
1103  const ST gX_i = dX_i - val_r * kX;
1104  const ST gY_i = dY_i - val_r * kY;
1105  const ST gZ_i = dZ_i - val_r * kZ;
1106 
1107  if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size)
1108  {
1109  psi[psiIndex] = c * val_r - s * val_i;
1110  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
1111  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
1112  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
1113 
1114  const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) +
1115  kX * (gX_i + dX_i);
1116  const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) +
1117  kX * (gY_i + dY_i);
1118  const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) +
1119  kX * (gZ_i + dZ_i);
1120  const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) +
1121  kY * (gX_i + dX_i);
1122  const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) +
1123  kY * (gY_i + dY_i);
1124  const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) +
1125  kY * (gZ_i + dZ_i);
1126  const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) +
1127  kZ * (gX_i + dX_i);
1128  const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) +
1129  kZ * (gY_i + dY_i);
1130  const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) +
1131  kZ * (gZ_i + dZ_i);
1132 
1133  const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) -
1134  kX * (gX_r + dX_r);
1135  const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) -
1136  kX * (gY_r + dY_r);
1137  const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) -
1138  kX * (gZ_r + dZ_r);
1139  const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) -
1140  kY * (gX_r + dX_r);
1141  const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) -
1142  kY * (gY_r + dY_r);
1143  const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) -
1144  kY * (gZ_r + dZ_r);
1145  const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) -
1146  kZ * (gX_r + dX_r);
1147  const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) -
1148  kZ * (gY_r + dY_r);
1149  const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) -
1150  kZ * (gZ_r + dZ_r);
1151 
1152  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
1153  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
1154  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
1155  grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
1156  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
1157  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
1158  grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
1159  grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
1160  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
1161  }
1162  }
1163 }
CrystalLattice< ST, 3 > PrimLattice
primitive cell
T v_m_v(T h00, T h01, T h02, T h11, T h12, T h22, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z)
compute vector[3]^T x matrix[3][3] x vector[3]
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
T min(T a, T b)
vContainer_type myV
intermediate result vectors
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ assign_vghgh()

void assign_vghgh ( const PointType r,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi,
GGGVector grad_grad_grad_psi,
int  first = 0,
int  last = -1 
) const

Definition at line 1185 of file SplineC2ROMPTarget.cpp.

References TinyVector< T, D >::data(), omptarget::min(), qmcplusplus::Units::time::s, qmcplusplus::sincos(), qmcplusplus::t3_contract(), and qmcplusplus::v_m_v().

1192 {
1193  // protect last
1194  last = last < 0 ? kPoints.size() : (last > kPoints.size() ? kPoints.size() : last);
1195 
1196  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
1197  g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
1198  g22 = PrimLattice.G(8);
1199  const ST x = r[0], y = r[1], z = r[2];
1200 
1201  const ST* restrict k0 = myKcart->data(0);
1202  const ST* restrict k1 = myKcart->data(1);
1203  const ST* restrict k2 = myKcart->data(2);
1204 
1205  const ST* restrict g0 = myG.data(0);
1206  const ST* restrict g1 = myG.data(1);
1207  const ST* restrict g2 = myG.data(2);
1208  const ST* restrict h00 = myH.data(0);
1209  const ST* restrict h01 = myH.data(1);
1210  const ST* restrict h02 = myH.data(2);
1211  const ST* restrict h11 = myH.data(3);
1212  const ST* restrict h12 = myH.data(4);
1213  const ST* restrict h22 = myH.data(5);
1214 
1215  const ST* restrict gh000 = mygH.data(0);
1216  const ST* restrict gh001 = mygH.data(1);
1217  const ST* restrict gh002 = mygH.data(2);
1218  const ST* restrict gh011 = mygH.data(3);
1219  const ST* restrict gh012 = mygH.data(4);
1220  const ST* restrict gh022 = mygH.data(5);
1221  const ST* restrict gh111 = mygH.data(6);
1222  const ST* restrict gh112 = mygH.data(7);
1223  const ST* restrict gh122 = mygH.data(8);
1224  const ST* restrict gh222 = mygH.data(9);
1225 
1226  const size_t requested_orb_size = psi.size();
1227 //SIMD doesn't work quite right yet. Comment out until further debugging.
1228 #pragma omp simd
1229  for (size_t j = first; j < std::min(nComplexBands, last); j++)
1230  {
1231  int jr = j << 1;
1232  int ji = jr + 1;
1233 
1234  const ST kX = k0[j];
1235  const ST kY = k1[j];
1236  const ST kZ = k2[j];
1237  const ST val_r = myV[jr];
1238  const ST val_i = myV[ji];
1239 
1240  //phase
1241  ST s, c;
1242  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
1243 
1244  //dot(PrimLattice.G,myG[j])
1245  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
1246  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
1247  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
1248 
1249  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
1250  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
1251  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
1252 
1253  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
1254  const ST gX_r = dX_r + val_i * kX;
1255  const ST gY_r = dY_r + val_i * kY;
1256  const ST gZ_r = dZ_r + val_i * kZ;
1257  const ST gX_i = dX_i - val_r * kX;
1258  const ST gY_i = dY_i - val_r * kY;
1259  const ST gZ_i = dZ_i - val_r * kZ;
1260 
1261  const size_t psiIndex = first_spo + jr;
1262 
1263  if (psiIndex < requested_orb_size)
1264  {
1265  psi[psiIndex] = c * val_r - s * val_i;
1266  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
1267  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
1268  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
1269  }
1270 
1271  if (psiIndex + 1 < requested_orb_size)
1272  {
1273  psi[psiIndex + 1] = c * val_i + s * val_r;
1274  dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
1275  dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
1276  dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
1277  }
1278 
1279  //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
1280  const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
1281  const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
1282  const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
1283  const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
1284  const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
1285  const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
1286 
1287  const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
1288  const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
1289  const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
1290  const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
1291  const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
1292  const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
1293 
1294  const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
1295  const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
1296  const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
1297  const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
1298  const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
1299  const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
1300 
1301  const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
1302  const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
1303  const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
1304  const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
1305  const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
1306  const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
1307 
1308  if (psiIndex < requested_orb_size)
1309  {
1310  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
1311  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
1312  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
1313  grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
1314  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
1315  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
1316  grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
1317  grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
1318  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
1319  }
1320 
1321  if (psiIndex + 1 < requested_orb_size)
1322  {
1323  grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
1324  grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
1325  grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
1326  grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r;
1327  grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
1328  grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
1329  grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r;
1330  grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r;
1331  grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
1332  }
1333 
1334  //These are the real and imaginary components of the third SPO derivative. _xxx denotes
1335  // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
1336 
1337  const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1338  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1339  const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1340  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1341  const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1342  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1343  const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1344  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1345  const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1346  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1347  const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1348  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1349  const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1350  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1351  const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1352  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1353  const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1354  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1355  const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1356  gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1357 
1358  const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1359  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1360  const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1361  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1362  const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1363  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1364  const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1365  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1366  const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1367  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1368  const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1369  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1370  const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1371  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1372  const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1373  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1374  const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1375  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1376  const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1377  gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1378 
1379  //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
1380  const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
1381  const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
1382  const ST gh_xxy_r =
1383  f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
1384  const ST gh_xxy_i =
1385  f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
1386  const ST gh_xxz_r =
1387  f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
1388  const ST gh_xxz_i =
1389  f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
1390  const ST gh_xyy_r =
1391  f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
1392  const ST gh_xyy_i =
1393  f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
1394  const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
1395  (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
1396  const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
1397  (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
1398  const ST gh_xzz_r =
1399  f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
1400  const ST gh_xzz_i =
1401  f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
1402  const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
1403  const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
1404  const ST gh_yyz_r =
1405  f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
1406  const ST gh_yyz_i =
1407  f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
1408  const ST gh_yzz_r =
1409  f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
1410  const ST gh_yzz_i =
1411  f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
1412  const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
1413  const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
1414 
1415  if (psiIndex < requested_orb_size)
1416  {
1417  grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
1418  grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
1419  grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
1420  grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
1421  grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
1422  grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
1423  grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
1424  grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
1425  grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
1426 
1427  grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
1428  grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
1429  grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
1430  grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
1431  grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
1432  grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
1433  grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
1434  grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
1435  grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
1436 
1437  grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
1438  grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
1439  grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
1440  grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
1441  grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
1442  grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
1443  grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
1444  grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
1445  grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
1446  }
1447 
1448  if (psiIndex + 1 < requested_orb_size)
1449  {
1450  grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r;
1451  grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r;
1452  grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r;
1453  grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r;
1454  grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r;
1455  grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r;
1456  grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r;
1457  grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r;
1458  grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r;
1459 
1460  grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r;
1461  grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r;
1462  grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r;
1463  grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r;
1464  grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r;
1465  grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r;
1466  grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r;
1467  grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r;
1468  grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r;
1469 
1470  grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r;
1471  grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r;
1472  grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r;
1473  grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r;
1474  grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r;
1475  grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r;
1476  grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r;
1477  grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r;
1478  grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r;
1479  }
1480  }
1481 #pragma omp simd
1482  for (size_t j = std::max(nComplexBands, first); j < last; j++)
1483  {
1484  int jr = j << 1;
1485  int ji = jr + 1;
1486 
1487  const ST kX = k0[j];
1488  const ST kY = k1[j];
1489  const ST kZ = k2[j];
1490  const ST val_r = myV[jr];
1491  const ST val_i = myV[ji];
1492 
1493  //phase
1494  ST s, c;
1495  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
1496 
1497  //dot(PrimLattice.G,myG[j])
1498  const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
1499  const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
1500  const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
1501 
1502  const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
1503  const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
1504  const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
1505 
1506  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
1507  const ST gX_r = dX_r + val_i * kX;
1508  const ST gY_r = dY_r + val_i * kY;
1509  const ST gZ_r = dZ_r + val_i * kZ;
1510  const ST gX_i = dX_i - val_r * kX;
1511  const ST gY_i = dY_i - val_r * kY;
1512  const ST gZ_i = dZ_i - val_r * kZ;
1513 
1514  if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size)
1515  {
1516  psi[psiIndex] = c * val_r - s * val_i;
1517  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
1518  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
1519  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
1520 
1521  //intermediates for computation of hessian. \partial_i \partial_j phi in cartesian coordinates.
1522  const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
1523  const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
1524  const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
1525  const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
1526  const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
1527  const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
1528 
1529  const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
1530  const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
1531  const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
1532  const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
1533  const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
1534  const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
1535 
1536  const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
1537  const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
1538  const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
1539  const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
1540  const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
1541  const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
1542 
1543  const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
1544  const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
1545  const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
1546  const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
1547  const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
1548  const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
1549 
1550  grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
1551  grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
1552  grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
1553  grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
1554  grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
1555  grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
1556  grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
1557  grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
1558  grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
1559 
1560  //These are the real and imaginary components of the third SPO derivative. _xxx denotes
1561  // third derivative w.r.t. x, _xyz, a derivative with resepect to x,y, and z, and so on.
1562 
1563  const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1564  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1565  const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1566  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1567  const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1568  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1569  const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1570  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1571  const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1572  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1573  const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1574  gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1575  const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1576  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1577  const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1578  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1579  const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1580  gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1581  const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
1582  gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1583 
1584  const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1585  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
1586  const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1587  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
1588  const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1589  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
1590  const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1591  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
1592  const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1593  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
1594  const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1595  gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
1596  const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1597  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
1598  const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1599  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
1600  const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1601  gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
1602  const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
1603  gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
1604 
1605  //Here is where we build up the components of the physical hessian gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
1606  const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
1607  const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
1608  const ST gh_xxy_r =
1609  f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
1610  const ST gh_xxy_i =
1611  f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
1612  const ST gh_xxz_r =
1613  f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
1614  const ST gh_xxz_i =
1615  f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
1616  const ST gh_xyy_r =
1617  f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
1618  const ST gh_xyy_i =
1619  f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
1620  const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
1621  (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
1622  const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
1623  (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
1624  const ST gh_xzz_r =
1625  f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
1626  const ST gh_xzz_i =
1627  f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
1628  const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
1629  const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
1630  const ST gh_yyz_r =
1631  f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
1632  const ST gh_yyz_i =
1633  f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
1634  const ST gh_yzz_r =
1635  f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
1636  const ST gh_yzz_i =
1637  f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
1638  const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
1639  const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
1640  //[x][xx] //These are the unique entries
1641  grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
1642  grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
1643  grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
1644  grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
1645  grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
1646  grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
1647  grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
1648  grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
1649  grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
1650 
1651  grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
1652  grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
1653  grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
1654  grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
1655  grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
1656  grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
1657  grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
1658  grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
1659  grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
1660 
1661  grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
1662  grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
1663  grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
1664  grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
1665  grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
1666  grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
1667  grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
1668  grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
1669  grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
1670  }
1671  }
1672 }
CrystalLattice< ST, 3 > PrimLattice
primitive cell
T v_m_v(T h00, T h01, T h02, T h11, T h12, T h22, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z)
compute vector[3]^T x matrix[3][3] x vector[3]
T t3_contract(T h000, T h001, T h002, T h011, T h012, T h022, T h111, T h112, T h122, T h222, T g1x, T g1y, T g1z, T g2x, T g2y, T g2z, T g3x, T g3y, T g3z)
Coordinate transform for a 3rd rank symmetric tensor representing coordinate derivatives (hence t3_co...
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
T min(T a, T b)
vContainer_type myV
intermediate result vectors
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ assign_vgl_from_l()

void assign_vgl_from_l ( const PointType r,
ValueVector psi,
GradVector dpsi,
ValueVector d2psi 
)
inline

assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian

Definition at line 405 of file SplineC2ROMPTarget.cpp.

References ASSUME_ALIGNED, TinyVector< T, D >::data(), qmcplusplus::Units::force::N, qmcplusplus::Units::time::s, and qmcplusplus::sincos().

409 {
410  constexpr ST two(2);
411  const ST x = r[0], y = r[1], z = r[2];
412 
413  const ST* restrict k0 = myKcart->data(0);
414  ASSUME_ALIGNED(k0);
415  const ST* restrict k1 = myKcart->data(1);
416  ASSUME_ALIGNED(k1);
417  const ST* restrict k2 = myKcart->data(2);
418  ASSUME_ALIGNED(k2);
419 
420  const ST* restrict g0 = myG.data(0);
421  ASSUME_ALIGNED(g0);
422  const ST* restrict g1 = myG.data(1);
423  ASSUME_ALIGNED(g1);
424  const ST* restrict g2 = myG.data(2);
425  ASSUME_ALIGNED(g2);
426 
427  const size_t N = kPoints.size();
428 
429  const size_t requested_orb_size = psi.size();
430 #pragma omp simd
431  for (size_t j = 0; j < nComplexBands; j++)
432  {
433  const size_t jr = j << 1;
434  const size_t ji = jr + 1;
435 
436  const ST kX = k0[j];
437  const ST kY = k1[j];
438  const ST kZ = k2[j];
439  const ST val_r = myV[jr];
440  const ST val_i = myV[ji];
441 
442  //phase
443  ST s, c;
444  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
445 
446  //dot(PrimLattice.G,myG[j])
447  const ST dX_r = g0[jr];
448  const ST dY_r = g1[jr];
449  const ST dZ_r = g2[jr];
450 
451  const ST dX_i = g0[ji];
452  const ST dY_i = g1[ji];
453  const ST dZ_i = g2[ji];
454 
455  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
456  const ST gX_r = dX_r + val_i * kX;
457  const ST gY_r = dY_r + val_i * kY;
458  const ST gZ_r = dZ_r + val_i * kZ;
459  const ST gX_i = dX_i - val_r * kX;
460  const ST gY_i = dY_i - val_r * kY;
461  const ST gZ_i = dZ_i - val_r * kZ;
462 
463  const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
464  const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
465 
466  const size_t psiIndex = first_spo + jr;
467  if (psiIndex < requested_orb_size)
468  {
469  psi[psiIndex] = c * val_r - s * val_i;
470  d2psi[psiIndex] = c * lap_r - s * lap_i;
471  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
472  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
473  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
474  }
475  if (psiIndex + 1 < requested_orb_size)
476  {
477  psi[psiIndex + 1] = c * val_i + s * val_r;
478  d2psi[psiIndex + 1] = c * lap_i + s * lap_r;
479  dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
480  dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
481  dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
482  }
483  }
484 
485 #pragma omp simd
486  for (size_t j = nComplexBands; j < N; j++)
487  {
488  const size_t jr = j << 1;
489  const size_t ji = jr + 1;
490 
491  const ST kX = k0[j];
492  const ST kY = k1[j];
493  const ST kZ = k2[j];
494  const ST val_r = myV[jr];
495  const ST val_i = myV[ji];
496 
497  //phase
498  ST s, c;
499  omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
500 
501  //dot(PrimLattice.G,myG[j])
502  const ST dX_r = g0[jr];
503  const ST dY_r = g1[jr];
504  const ST dZ_r = g2[jr];
505 
506  const ST dX_i = g0[ji];
507  const ST dY_i = g1[ji];
508  const ST dZ_i = g2[ji];
509 
510  // \f$\nabla \psi_r + {\bf k}\psi_i\f$
511  const ST gX_r = dX_r + val_i * kX;
512  const ST gY_r = dY_r + val_i * kY;
513  const ST gZ_r = dZ_r + val_i * kZ;
514  const ST gX_i = dX_i - val_r * kX;
515  const ST gY_i = dY_i - val_r * kY;
516  const ST gZ_i = dZ_i - val_r * kZ;
517  if (const size_t psiIndex = first_spo + nComplexBands + j; psiIndex < requested_orb_size)
518  {
519  psi[psiIndex] = c * val_r - s * val_i;
520  dpsi[psiIndex][0] = c * gX_r - s * gX_i;
521  dpsi[psiIndex][1] = c * gY_r - s * gY_i;
522  dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
523 
524  const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
525  const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
526  d2psi[psiIndex] = c * lap_r - s * lap_i;
527  }
528  }
529 }
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
vContainer_type myV
intermediate result vectors
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
#define ASSUME_ALIGNED(x)
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ bcast_tables()

void bcast_tables ( Communicate comm)
inline

Definition at line 162 of file SplineC2ROMPTarget.h.

References qmcplusplus::comm, and SplineC2ROMPTarget< ST >::SplineInst.

162 { chunked_bcast(comm, SplineInst->getSplinePtr()); }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ create_spline()

void create_spline ( GT &  xyz_g,
BCT &  xyz_bc 
)
inline

Definition at line 179 of file SplineC2ROMPTarget.h.

References qmcplusplus::app_log(), SplineC2ROMPTarget< ST >::myV, SplineC2ROMPTarget< ST >::resize_kpoints(), Vector< T, Alloc >::size(), and SplineC2ROMPTarget< ST >::SplineInst.

180  {
181  resize_kpoints();
182  SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>();
183  SplineInst->create(xyz_g, xyz_bc, myV.size());
184 
185  app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
186  << "for the coefficients in 3D spline orbital representation" << std::endl;
187  }
std::ostream & app_log()
Definition: OutputManager.h:65
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
void resize_kpoints()
remap kPoints to pack the double copy
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ createResource()

void createResource ( ResourceCollection collection) const
inlineoverridevirtual

initialize a shared resource and hand it to collection

Reimplemented from SPOSet.

Definition at line 130 of file SplineC2ROMPTarget.h.

References ResourceCollection::addResource().

131  {
132  auto resource_index = collection.addResource(std::make_unique<SplineOMPTargetMultiWalkerMem<ST, TT>>());
133  }

◆ evaluate_notranspose()

void evaluate_notranspose ( const ParticleSet P,
int  first,
int  last,
ValueMatrix logdet,
GradMatrix dlogdet,
ValueMatrix d2logdet 
)
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles

Parameters
[in]Pcurrent ParticleSet
[in]firststarting index of the particles
[in]lastending index of the particles
[out]logdetdeterminant matrix to be inverted
[out]dlogdetgradients
[out]d2logdetlaplacians

Reimplemented from BsplineSet.

Definition at line 1695 of file SplineC2ROMPTarget.cpp.

References ParticleSet::activeR(), and omptarget::min().

1701 {
1702  // chunk the [first, last) loop into blocks to save temporary memory usage
1703  const int block_size = 16;
1704 
1705  // reference vectors refer to the rows of matrices
1706  std::vector<ValueVector> multi_psi_v;
1707  std::vector<GradVector> multi_dpsi_v;
1708  std::vector<ValueVector> multi_d2psi_v;
1709  RefVector<ValueVector> psi_v_list;
1710  RefVector<GradVector> dpsi_v_list;
1711  RefVector<ValueVector> d2psi_v_list;
1712 
1713  multi_psi_v.reserve(block_size);
1714  multi_dpsi_v.reserve(block_size);
1715  multi_d2psi_v.reserve(block_size);
1716  psi_v_list.reserve(block_size);
1717  dpsi_v_list.reserve(block_size);
1718  d2psi_v_list.reserve(block_size);
1719 
1720  for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size)
1721  {
1722  const int actual_block_size = std::min(last - iat, block_size);
1723  multi_pos_copy.resize(actual_block_size * 6);
1724  multi_psi_v.clear();
1725  multi_dpsi_v.clear();
1726  multi_d2psi_v.clear();
1727  psi_v_list.clear();
1728  dpsi_v_list.clear();
1729  d2psi_v_list.clear();
1730 
1731  for (int ipos = 0; ipos < actual_block_size; ++ipos)
1732  {
1733  // pack particle positions
1734  const PointType& r = P.activeR(iat + ipos);
1736  multi_pos_copy[ipos * 6] = r[0];
1737  multi_pos_copy[ipos * 6 + 1] = r[1];
1738  multi_pos_copy[ipos * 6 + 2] = r[2];
1739  multi_pos_copy[ipos * 6 + 3] = ru[0];
1740  multi_pos_copy[ipos * 6 + 4] = ru[1];
1741  multi_pos_copy[ipos * 6 + 5] = ru[2];
1742 
1743  multi_psi_v.emplace_back(logdet[i + ipos], OrbitalSetSize);
1744  multi_dpsi_v.emplace_back(dlogdet[i + ipos], OrbitalSetSize);
1745  multi_d2psi_v.emplace_back(d2logdet[i + ipos], OrbitalSetSize);
1746 
1747  psi_v_list.push_back(multi_psi_v[ipos]);
1748  dpsi_v_list.push_back(multi_dpsi_v[ipos]);
1749  d2psi_v_list.push_back(multi_d2psi_v[ipos]);
1750  }
1751 
1752  evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list);
1753  }
1754 }
Vector< ST, OffloadPinnedAllocator< ST > > multi_pos_copy
position scratch space, used to avoid allocation on the fly and faster transfer
Vector< TT, OffloadPinnedAllocator< TT > > results_scratch
result scratch space, dynamically resized to the maximal need
CrystalLattice< ST, 3 > PrimLattice
primitive cell
void evaluateVGLMultiPos(const Vector< ST, OffloadPinnedAllocator< ST >> &multi_pos_copy, Vector< ST, OffloadPinnedAllocator< ST >> &offload_scratch, Vector< TT, OffloadPinnedAllocator< TT >> &results_scratch, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list) const
T min(T a, T b)
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need

◆ evaluateDetRatios()

void evaluateDetRatios ( const VirtualParticleSet VP,
ValueVector psi,
const ValueVector psiinv,
std::vector< ValueType > &  ratios 
)
overridevirtual

evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP

Parameters
VPvirtual particle set
psivalues of the SPO, used as a scratch space if needed
psiinvthe row of inverse slater matrix corresponding to the particle moved virtually
ratiosreturn determinant ratios

Reimplemented from SPOSet.

Definition at line 172 of file SplineC2ROMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2R::assign_v(), qmcplusplus::syclBLAS::copy_n(), VirtualParticleSet::getTotalNum(), omptarget::min(), and TinyVector< T, D >::size().

176 {
177  const int nVP = VP.getTotalNum();
178  psiinv_pos_copy.resize(psiinv.size() + nVP * 6);
179 
180  // stage psiinv to psiinv_pos_copy
181  std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data());
182 
183  // pack particle positions
184  auto* restrict pos_scratch = psiinv_pos_copy.data() + psiinv.size();
185  for (int iat = 0; iat < nVP; ++iat)
186  {
187  const PointType& r = VP.activeR(iat);
189  pos_scratch[iat * 6] = r[0];
190  pos_scratch[iat * 6 + 1] = r[1];
191  pos_scratch[iat * 6 + 2] = r[2];
192  pos_scratch[iat * 6 + 3] = ru[0];
193  pos_scratch[iat * 6 + 4] = ru[1];
194  pos_scratch[iat * 6 + 5] = ru[2];
195  }
196 
197  const size_t ChunkSizePerTeam = 512;
198  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
199  ratios_private.resize(nVP, NumTeams);
200  const auto spline_padded_size = myV.size();
201  const auto sposet_padded_size = getAlignedSize<TT>(OrbitalSetSize);
202  offload_scratch.resize(spline_padded_size * nVP);
203  results_scratch.resize(sposet_padded_size * nVP);
204 
205  // Ye: need to extract sizes and pointers before entering target region
206  const auto* spline_ptr = SplineInst->getSplinePtr();
207  auto* offload_scratch_ptr = offload_scratch.data();
208  auto* results_scratch_ptr = results_scratch.data();
209  const auto myKcart_padded_size = myKcart->capacity();
210  auto* myKcart_ptr = myKcart->data();
211  auto* psiinv_ptr = psiinv_pos_copy.data();
212  auto* ratios_private_ptr = ratios_private.data();
213  const size_t first_spo_local = first_spo;
214  const size_t nComplexBands_local = nComplexBands;
215  const auto requested_orb_size = psiinv.size();
216  const auto num_complex_splines = kPoints.size();
217 
218  {
219  ScopedTimer offload(offload_timer_);
220  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \
221  map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \
222  map(always, from: ratios_private_ptr[0:NumTeams*nVP])")
223  for (int iat = 0; iat < nVP; iat++)
224  for (int team_id = 0; team_id < NumTeams; team_id++)
225  {
226  const size_t first = ChunkSizePerTeam * team_id;
227  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
228 
229  auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat;
230  auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat;
231  auto* restrict pos_scratch = psiinv_ptr + requested_orb_size;
232 
233  int ix, iy, iz;
234  ST a[4], b[4], c[4];
235  spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
236  ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
237 
238  PRAGMA_OFFLOAD("omp parallel for")
239  for (int index = 0; index < last - first; index++)
240  spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
241  offload_scratch_iat_ptr + first + index);
242  const size_t first_cplx = first / 2;
243  const size_t last_cplx = omptarget::min(last / 2, num_complex_splines);
244  PRAGMA_OFFLOAD("omp parallel for")
245  for (int index = first_cplx; index < last_cplx; index++)
246  C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]),
247  psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local,
248  nComplexBands_local, index);
249 
250  const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx);
251  const size_t last_real =
252  omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size);
253  TT sum(0);
254  PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
255  for (int i = first_real; i < last_real; i++)
256  sum += psi_iat_ptr[i] * psiinv_ptr[i];
257  ratios_private_ptr[iat * NumTeams + team_id] = sum;
258  }
259  }
260 
261  // do the reduction manually
262  for (int iat = 0; iat < nVP; ++iat)
263  {
264  ratios[iat] = TT(0);
265  for (int tid = 0; tid < NumTeams; tid++)
266  ratios[iat] += ratios_private[iat][tid];
267  }
268 }
Matrix< TT, OffloadPinnedAllocator< TT > > ratios_private
team private ratios for reduction, numVP x numTeams
Vector< TT, OffloadPinnedAllocator< TT > > results_scratch
result scratch space, dynamically resized to the maximal need
CrystalLattice< ST, 3 > PrimLattice
primitive cell
NewTimer & offload_timer_
timer for offload portion
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
Vector< TT, OffloadPinnedAllocator< TT > > psiinv_pos_copy
psiinv and position scratch space, used to avoid allocation on the fly and faster transfer ...
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
typename BsplineSet::ValueType TT
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ evaluateValue()

void evaluateValue ( const ParticleSet P,
const int  iat,
ValueVector psi 
)
overridevirtual

evaluate the values of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO

Implements SPOSet.

Definition at line 100 of file SplineC2ROMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2C::assign_v(), qmcplusplus::C2R::assign_v(), TinyVector< T, D >::data(), FairDivideAligned(), omptarget::min(), omp_get_num_threads(), and omp_get_thread_num().

101 {
102  const PointType& r = P.activeR(iat);
104 
105  if (true)
106  {
107 #pragma omp parallel
108  {
109  int first, last;
110  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
111 
112  spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
113  assign_v(r, myV, psi, first / 2, last / 2);
114  }
115  }
116  else
117  {
118  const size_t ChunkSizePerTeam = 512;
119  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
120 
121  const auto spline_padded_size = myV.size();
122  const auto sposet_padded_size = getAlignedSize<TT>(OrbitalSetSize);
123  offload_scratch.resize(spline_padded_size);
124  results_scratch.resize(sposet_padded_size);
125 
126  // Ye: need to extract sizes and pointers before entering target region
127  const auto* spline_ptr = SplineInst->getSplinePtr();
128  auto* offload_scratch_ptr = offload_scratch.data();
129  auto* results_scratch_ptr = results_scratch.data();
130  auto* psi_ptr = psi.data();
131  const auto x = r[0], y = r[1], z = r[2];
132  const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
133  const auto myKcart_padded_size = myKcart->capacity();
134  auto* myKcart_ptr = myKcart->data();
135  const size_t first_spo_local = first_spo;
136  const size_t nComplexBands_local = nComplexBands;
137  const auto requested_orb_size = psi.size();
138  const auto num_complex_splines = kPoints.size();
139 
140  {
141  ScopedTimer offload(offload_timer_);
142  PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
143  map(always, from: results_scratch_ptr[0:sposet_padded_size])")
144  for (int team_id = 0; team_id < NumTeams; team_id++)
145  {
146  const size_t first = ChunkSizePerTeam * team_id;
147  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
148 
149  int ix, iy, iz;
150  ST a[4], b[4], c[4];
151  spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c);
152 
153  PRAGMA_OFFLOAD("omp parallel for")
154  for (int index = 0; index < last - first; index++)
155  spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
156  offload_scratch_ptr + first + index);
157  const size_t first_cplx = first / 2;
158  const size_t last_cplx = omptarget::min(last / 2, num_complex_splines);
159  PRAGMA_OFFLOAD("omp parallel for")
160  for (int index = first_cplx; index < last_cplx; index++)
161  C2R::assign_v(x, y, z, results_scratch_ptr, offload_scratch_ptr, myKcart_ptr, myKcart_padded_size,
162  first_spo_local, nComplexBands_local, index);
163  }
164 
165  for (size_t i = 0; i < requested_orb_size; i++)
166  psi[i] = results_scratch[i];
167  }
168  }
169 }
Vector< TT, OffloadPinnedAllocator< TT > > results_scratch
result scratch space, dynamically resized to the maximal need
CrystalLattice< ST, 3 > PrimLattice
primitive cell
NewTimer & offload_timer_
timer for offload portion
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ evaluateVGH()

void evaluateVGH ( const ParticleSet P,
const int  iat,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi 
)
overridevirtual

evaluate the values, gradients and hessians of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO
dpsigradients of the SPO
grad_grad_psihessians of the SPO

Reimplemented from SPOSet.

Definition at line 1166 of file SplineC2ROMPTarget.cpp.

References ParticleSet::activeR(), FairDivideAligned(), omp_get_num_threads(), and omp_get_thread_num().

1171 {
1172  const PointType& r = P.activeR(iat);
1174 #pragma omp parallel
1175  {
1176  int first, last;
1177  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
1178 
1179  spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
1180  assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
1181  }
1182 }
CrystalLattice< ST, 3 > PrimLattice
primitive cell
void assign_vgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, int first, int last) const
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ evaluateVGHGH()

void evaluateVGHGH ( const ParticleSet P,
const int  iat,
ValueVector psi,
GradVector dpsi,
HessVector grad_grad_psi,
GGGVector grad_grad_grad_psi 
)
overridevirtual

evaluate the values, gradients, hessians, and grad hessians of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO
dpsigradients of the SPO
grad_grad_psihessians of the SPO
grad_grad_grad_psigrad hessians of the SPO

Reimplemented from SPOSet.

Definition at line 1675 of file SplineC2ROMPTarget.cpp.

References ParticleSet::activeR(), FairDivideAligned(), omp_get_num_threads(), and omp_get_thread_num().

1681 {
1682  const PointType& r = P.activeR(iat);
1684 #pragma omp parallel
1685  {
1686  int first, last;
1687  FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
1688 
1689  spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
1690  assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
1691  }
1692 }
CrystalLattice< ST, 3 > PrimLattice
primitive cell
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
void assign_vghgh(const PointType &r, ValueVector &psi, GradVector &dpsi, HessVector &grad_grad_psi, GGGVector &grad_grad_grad_psi, int first=0, int last=-1) const
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
void FairDivideAligned(const int ntot, const int base, const int npart, const int me, int &first, int &last)
Partition ntot over npart and the size of each partition is a multiple of base size.
Definition: FairDivide.h:96
omp_int_t omp_get_num_threads()
Definition: OpenMP.h:27
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ evaluateVGL()

void evaluateVGL ( const ParticleSet P,
const int  iat,
ValueVector psi,
GradVector dpsi,
ValueVector d2psi 
)
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital set

Parameters
Pcurrent ParticleSet
iatactive particle
psivalues of the SPO
dpsigradients of the SPO
d2psilaplacians of the SPO

Implements SPOSet.

Definition at line 532 of file SplineC2ROMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2R::assign_vgl(), TinyVector< T, D >::data(), qmcplusplus::HESS00, qmcplusplus::HESS01, qmcplusplus::HESS02, qmcplusplus::HESS11, qmcplusplus::HESS12, qmcplusplus::HESS22, qmcplusplus::LAPL, omptarget::min(), qmcplusplus::NUM_FIELDS, TinyVector< T, D >::size(), and qmcplusplus::SymTrace().

537 {
538  const PointType& r = P.activeR(iat);
540 
541  const size_t ChunkSizePerTeam = 512;
542  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
543 
544  const auto spline_padded_size = myV.size();
545  const auto sposet_padded_size = getAlignedSize<TT>(OrbitalSetSize);
546  // for V(1)G(3)H(6) intermediate result
547  offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS);
548  // for V(1)G(3)L(1) final result
549  results_scratch.resize(sposet_padded_size * 5);
550 
551  // Ye: need to extract sizes and pointers before entering target region
552  const auto* spline_ptr = SplineInst->getSplinePtr();
553  auto* offload_scratch_ptr = offload_scratch.data();
554  auto* results_scratch_ptr = results_scratch.data();
555  const auto x = r[0], y = r[1], z = r[2];
556  const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
557  const auto myKcart_padded_size = myKcart->capacity();
558  auto* mKK_ptr = mKK->data();
559  auto* GGt_ptr = GGt_offload->data();
560  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
561  auto* myKcart_ptr = myKcart->data();
562  const size_t first_spo_local = first_spo;
563  const size_t nComplexBands_local = nComplexBands;
564  const auto requested_orb_size = psi.size();
565  const auto num_complex_splines = kPoints.size();
566 
567  {
568  ScopedTimer offload(offload_timer_);
569  PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
570  map(always, from: results_scratch_ptr[0:sposet_padded_size*5])")
571  for (int team_id = 0; team_id < NumTeams; team_id++)
572  {
573  const size_t first = ChunkSizePerTeam * team_id;
574  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
575 
576  int ix, iy, iz;
577  ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
578  spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
579 
580  const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
581  PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
582  PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
583  const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
584  GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
585 
586  PRAGMA_OFFLOAD("omp parallel for")
587  for (int index = 0; index < last - first; index++)
588  {
589  spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
590  offload_scratch_ptr + first + index, spline_padded_size);
591  const int output_index = first + index;
592  offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
593  SymTrace(offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
594  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
595  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
596  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
597  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
598  offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
599  }
600  const size_t first_cplx = first / 2;
601  const size_t last_cplx = omptarget::min(last / 2, num_complex_splines);
602  PRAGMA_OFFLOAD("omp parallel for")
603  for (int index = first_cplx; index < last_cplx; index++)
604  C2R::assign_vgl(x, y, z, results_scratch_ptr, sposet_padded_size, mKK_ptr, offload_scratch_ptr,
605  spline_padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, nComplexBands_local,
606  index);
607  }
608  }
609 
610  for (size_t i = 0; i < requested_orb_size; i++)
611  {
612  psi[i] = results_scratch[i];
613  dpsi[i][0] = results_scratch[i + sposet_padded_size * 1];
614  dpsi[i][1] = results_scratch[i + sposet_padded_size * 2];
615  dpsi[i][2] = results_scratch[i + sposet_padded_size * 3];
616  d2psi[i] = results_scratch[i + sposet_padded_size * 4];
617  }
618 }
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
Vector< TT, OffloadPinnedAllocator< TT > > results_scratch
result scratch space, dynamically resized to the maximal need
CrystalLattice< ST, 3 > PrimLattice
primitive cell
NewTimer & offload_timer_
timer for offload portion
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
std::shared_ptr< OffloadVector< ST > > mKK
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ evaluateVGLMultiPos()

void evaluateVGLMultiPos ( const Vector< ST, OffloadPinnedAllocator< ST >> &  multi_pos_copy,
Vector< ST, OffloadPinnedAllocator< ST >> &  offload_scratch,
Vector< TT, OffloadPinnedAllocator< TT >> &  results_scratch,
const RefVector< ValueVector > &  psi_v_list,
const RefVector< GradVector > &  dpsi_v_list,
const RefVector< ValueVector > &  d2psi_v_list 
) const
private

Definition at line 621 of file SplineC2ROMPTarget.cpp.

References qmcplusplus::C2R::assign_vgl(), qmcplusplus::HESS00, qmcplusplus::HESS01, qmcplusplus::HESS02, qmcplusplus::HESS11, qmcplusplus::HESS12, qmcplusplus::HESS22, qmcplusplus::LAPL, omptarget::min(), qmcplusplus::NUM_FIELDS, and qmcplusplus::SymTrace().

627 {
628  const size_t num_pos = psi_v_list.size();
629  const size_t ChunkSizePerTeam = 512;
630  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
631  const auto spline_padded_size = myV.size();
632  const auto sposet_padded_size = getAlignedSize<TT>(OrbitalSetSize);
633  // for V(1)G(3)H(6) intermediate result
634  offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
635  // for V(1)G(3)L(1) final result
636  results_scratch.resize(sposet_padded_size * num_pos * 5);
637 
638  // Ye: need to extract sizes and pointers before entering target region
639  const auto* spline_ptr = SplineInst->getSplinePtr();
640  auto* pos_copy_ptr = multi_pos.data();
641  auto* offload_scratch_ptr = offload_scratch.data();
642  auto* results_scratch_ptr = results_scratch.data();
643  const auto myKcart_padded_size = myKcart->capacity();
644  auto* mKK_ptr = mKK->data();
645  auto* GGt_ptr = GGt_offload->data();
646  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
647  auto* myKcart_ptr = myKcart->data();
648  const size_t first_spo_local = first_spo;
649  const size_t nComplexBands_local = nComplexBands;
650  const auto requested_orb_size = psi_v_list[0].get().size();
651  const auto num_complex_splines = kPoints.size();
652 
653  {
654  ScopedTimer offload(offload_timer_);
655  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
656  map(always, to: pos_copy_ptr[0:num_pos*6]) \
657  map(always, from: results_scratch_ptr[0:sposet_padded_size*num_pos*5])")
658  for (int iw = 0; iw < num_pos; iw++)
659  for (int team_id = 0; team_id < NumTeams; team_id++)
660  {
661  const size_t first = ChunkSizePerTeam * team_id;
662  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
663 
664  auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
665  auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5;
666 
667  int ix, iy, iz;
668  ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
669  spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4],
670  pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
671 
672  const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
673  PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
674  PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
675  const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
676  GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
677 
678  PRAGMA_OFFLOAD("omp parallel for")
679  for (int index = 0; index < last - first; index++)
680  {
681  spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
682  d2c, offload_scratch_iw_ptr + first + index, spline_padded_size);
683  const int output_index = first + index;
684  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
685  SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
686  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
687  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
688  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
689  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
690  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
691  }
692  const size_t first_cplx = first / 2;
693  const size_t last_cplx = omptarget::min(last / 2, num_complex_splines);
694  PRAGMA_OFFLOAD("omp parallel for")
695  for (int index = first_cplx; index < last_cplx; index++)
696  C2R::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr,
697  sposet_padded_size, mKK_ptr, offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr,
698  myKcart_padded_size, first_spo_local, nComplexBands_local, index);
699  }
700  }
701 
702  for (int iw = 0; iw < num_pos; ++iw)
703  {
704  auto* restrict results_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5;
705  ValueVector& psi_v(psi_v_list[iw]);
706  GradVector& dpsi_v(dpsi_v_list[iw]);
707  ValueVector& d2psi_v(d2psi_v_list[iw]);
708  for (size_t i = 0; i < requested_orb_size; i++)
709  {
710  psi_v[i] = results_iw_ptr[i];
711  dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size];
712  dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2];
713  dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3];
714  d2psi_v[i] = results_iw_ptr[i + sposet_padded_size * 4];
715  }
716  }
717 }
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
Vector< TT, OffloadPinnedAllocator< TT > > results_scratch
result scratch space, dynamically resized to the maximal need
NewTimer & offload_timer_
timer for offload portion
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
OrbitalSetTraits< ValueType >::ValueVector ValueVector
Definition: SPOSet.h:49
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
std::shared_ptr< OffloadVector< ST > > mKK
Vector< ST, OffloadPinnedAllocator< ST > > offload_scratch
offload scratch space, dynamically resized to the maximal need
OrbitalSetTraits< ValueType >::GradVector GradVector
Definition: SPOSet.h:51
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ finalizeConstruction()

void finalizeConstruction ( )
inlineoverridevirtual

this routine can not be called from threaded region

Reimplemented from SPOSet.

Definition at line 190 of file SplineC2ROMPTarget.h.

References CrystalLattice< T, D >::G, SplineC2ROMPTarget< ST >::GGt, SplineC2ROMPTarget< ST >::GGt_offload, SplineC2ROMPTarget< ST >::mKK, SplineC2ROMPTarget< ST >::myKcart, SplineC2ROMPTarget< ST >::PrimLattice, SplineC2ROMPTarget< ST >::PrimLattice_G_offload, and SplineC2ROMPTarget< ST >::SplineInst.

191  {
192  // map the SplineInst->getSplinePtr() structure to GPU
193  auto* MultiSpline = SplineInst->getSplinePtr();
194  auto* restrict coefs = MultiSpline->coefs;
195  // attach pointers on the device to achieve deep copy
196  PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])")
197  {
198  MultiSpline->coefs = coefs;
199  }
200 
201  // transfer static data to GPU
202  auto* mKK_ptr = mKK->data();
203  PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])")
204  auto* myKcart_ptr = myKcart->data();
205  PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])")
206  for (uint32_t i = 0; i < 9; i++)
207  {
208  (*GGt_offload)[i] = GGt[i];
209  (*PrimLattice_G_offload)[i] = PrimLattice.G[i];
210  }
211  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
212  PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])")
213  auto* GGt_ptr = GGt_offload->data();
214  PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])")
215  }
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
CrystalLattice< ST, 3 > PrimLattice
primitive cell
std::shared_ptr< OffloadPosVector< ST > > myKcart
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
Tensor< ST, 3 > GGt
, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
std::shared_ptr< OffloadVector< ST > > mKK
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ flush_zero()

void flush_zero ( )
inline

Definition at line 217 of file SplineC2ROMPTarget.h.

References SplineC2ROMPTarget< ST >::SplineInst.

217 { SplineInst->flush_zero(); }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ gather_tables()

void gather_tables ( Communicate comm)
inline

Definition at line 164 of file SplineC2ROMPTarget.h.

References qmcplusplus::comm, FairDivideLow(), gatherv(), BsplineSet::kPoints, BsplineSet::offset, Communicate::size(), and SplineC2ROMPTarget< ST >::SplineInst.

165  {
166  if (comm->size() == 1)
167  return;
168  const int Nbands = kPoints.size();
169  const int Nbandgroups = comm->size();
170  offset.resize(Nbandgroups + 1, 0);
171  FairDivideLow(Nbands, Nbandgroups, offset);
172 
173  for (size_t ib = 0; ib < offset.size(); ib++)
174  offset[ib] = offset[ib] * 2;
175  gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset);
176  }
int size() const
return the number of tasks
Definition: Communicate.h:118
void FairDivideLow(int ntot, int npart, IV &adist)
partition ntot elements among npart
Definition: FairDivide.h:114
std::vector< int > offset
band offsets used for communication
Definition: BsplineSet.h:56
void gatherv(T *sb, T *rb, int n, IT &counts, IT &displ, int dest)
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ getClassName()

virtual std::string getClassName ( ) const
inlineoverridevirtual

return class name

Implements SPOSet.

Definition at line 125 of file SplineC2ROMPTarget.h.

125 { return "SplineC2ROMPTarget"; }

◆ getKeyword()

virtual std::string getKeyword ( ) const
inlineoverridevirtual

Implements BsplineSet.

Definition at line 126 of file SplineC2ROMPTarget.h.

126 { return "SplineC2R"; }

◆ isComplex()

bool isComplex ( ) const
inlineoverridevirtual

Implements BsplineSet.

Definition at line 127 of file SplineC2ROMPTarget.h.

127 { return true; };

◆ isOMPoffload()

virtual bool isOMPoffload ( ) const
inlineoverridevirtual

Query if this SPOSet uses OpenMP offload.

Reimplemented from SPOSet.

Definition at line 128 of file SplineC2ROMPTarget.h.

128 { return true; }

◆ makeClone()

std::unique_ptr<SPOSet> makeClone ( ) const
inlineoverridevirtual

make a clone of itself every derived class must implement this to have threading working correctly.

Implements BsplineSet.

Definition at line 149 of file SplineC2ROMPTarget.h.

149 { return std::make_unique<SplineC2ROMPTarget>(*this); }

◆ mw_evaluateDetRatios()

void mw_evaluateDetRatios ( const RefVectorWithLeader< SPOSet > &  spo_list,
const RefVectorWithLeader< const VirtualParticleSet > &  vp_list,
const RefVector< ValueVector > &  psi_list,
const std::vector< const ValueType *> &  invRow_ptr_list,
std::vector< std::vector< ValueType >> &  ratios_list 
) const
overridevirtual

evaluate determinant ratios for virtual moves, e.g., sphere move for nonlocalPP, of multiple walkers

Parameters
spo_listthe list of SPOSet pointers in a walker batch
vp_lista list of virtual particle sets in a walker batch
psi_lista list of values of the SPO, used as a scratch space if needed
invRow_ptr_lista list of pointers to the rows of inverse slater matrix corresponding to the particles moved virtually
ratios_lista list of returning determinant ratios

Reimplemented from SPOSet.

Definition at line 271 of file SplineC2ROMPTarget.cpp.

References ParticleSet::activeR(), qmcplusplus::C2R::assign_v(), RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), VirtualParticleSet::getTotalNum(), omptarget::min(), SplineC2ROMPTarget< ST >::mw_mem_handle_, and TinyVector< T, D >::size().

276 {
277  assert(this == &spo_list.getLeader());
278  auto& phi_leader = spo_list.getCastedLeader<SplineC2ROMPTarget<ST>>();
279  auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
280  auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D;
281  auto& mw_ratios_private = mw_mem.mw_ratios_private;
282  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
283  auto& mw_results_scratch = mw_mem.mw_results_scratch;
284  const size_t nw = spo_list.size();
285  const size_t requested_orb_size = phi_leader.size();
286 
287  size_t mw_nVP = 0;
288  for (const VirtualParticleSet& VP : vp_list)
289  mw_nVP += VP.getTotalNum();
290 
291  const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int));
292  det_ratios_buffer_H2D.resize(packed_size);
293 
294  // pack invRow_ptr_list to det_ratios_buffer_H2D
295  Vector<const ValueType*> ptr_buffer(reinterpret_cast<const ValueType**>(det_ratios_buffer_H2D.data()), nw);
296  for (size_t iw = 0; iw < nw; iw++)
297  ptr_buffer[iw] = invRow_ptr_list[iw];
298 
299  // pack particle positions
300  auto* pos_ptr = reinterpret_cast<TT*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*));
301  auto* ref_id_ptr =
302  reinterpret_cast<int*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT));
303  size_t iVP = 0;
304  for (size_t iw = 0; iw < nw; iw++)
305  {
306  const VirtualParticleSet& VP = vp_list[iw];
307  assert(ratios_list[iw].size() == VP.getTotalNum());
308  for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP)
309  {
310  ref_id_ptr[iVP] = iw;
311  const PointType& r = VP.activeR(iat);
313  pos_ptr[0] = r[0];
314  pos_ptr[1] = r[1];
315  pos_ptr[2] = r[2];
316  pos_ptr[3] = ru[0];
317  pos_ptr[4] = ru[1];
318  pos_ptr[5] = ru[2];
319  pos_ptr += 6;
320  }
321  }
322 
323  const size_t ChunkSizePerTeam = 512;
324  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
325  mw_ratios_private.resize(mw_nVP, NumTeams);
326  const auto spline_padded_size = myV.size();
327  const auto sposet_padded_size = getAlignedSize<TT>(OrbitalSetSize);
328  mw_offload_scratch.resize(spline_padded_size * mw_nVP);
329  mw_results_scratch.resize(sposet_padded_size * mw_nVP);
330 
331  // Ye: need to extract sizes and pointers before entering target region
332  const auto* spline_ptr = SplineInst->getSplinePtr();
333  auto* offload_scratch_ptr = mw_offload_scratch.data();
334  auto* results_scratch_ptr = mw_results_scratch.data();
335  const auto myKcart_padded_size = myKcart->capacity();
336  auto* myKcart_ptr = myKcart->data();
337  auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data();
338  auto* ratios_private_ptr = mw_ratios_private.data();
339  const size_t first_spo_local = first_spo;
340  const size_t nComplexBands_local = nComplexBands;
341  const auto num_complex_splines = kPoints.size();
342 
343  {
344  ScopedTimer offload(offload_timer_);
345  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \
346  map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \
347  map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])")
348  for (int iat = 0; iat < mw_nVP; iat++)
349  for (int team_id = 0; team_id < NumTeams; team_id++)
350  {
351  const size_t first = ChunkSizePerTeam * team_id;
352  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
353 
354  auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat;
355  auto* restrict psi_iat_ptr = results_scratch_ptr + sposet_padded_size * iat;
356  auto* ref_id_ptr = reinterpret_cast<int*>(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT));
357  auto* restrict psiinv_ptr = reinterpret_cast<const ValueType**>(buffer_H2D_ptr)[ref_id_ptr[iat]];
358  auto* restrict pos_scratch = reinterpret_cast<TT*>(buffer_H2D_ptr + nw * sizeof(ValueType*));
359 
360  int ix, iy, iz;
361  ST a[4], b[4], c[4];
362  spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
363  ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
364 
365  PRAGMA_OFFLOAD("omp parallel for")
366  for (int index = 0; index < last - first; index++)
367  spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
368  offload_scratch_iat_ptr + first + index);
369  const size_t first_cplx = first / 2;
370  const size_t last_cplx = omptarget::min(last / 2, num_complex_splines);
371  PRAGMA_OFFLOAD("omp parallel for")
372  for (int index = first_cplx; index < last_cplx; index++)
373  C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]),
374  psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local,
375  nComplexBands_local, index);
376 
377  const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx);
378  const size_t last_real =
379  omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size);
380  TT sum(0);
381  PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
382  for (int i = first_real; i < last_real; i++)
383  sum += psi_iat_ptr[i] * psiinv_ptr[i];
384  ratios_private_ptr[iat * NumTeams + team_id] = sum;
385  }
386  }
387 
388  // do the reduction manually
389  iVP = 0;
390  for (size_t iw = 0; iw < nw; iw++)
391  {
392  auto& ratios = ratios_list[iw];
393  for (size_t iat = 0; iat < ratios.size(); iat++, iVP++)
394  {
395  ratios[iat] = TT(0);
396  for (int tid = 0; tid < NumTeams; ++tid)
397  ratios[iat] += mw_ratios_private[iVP][tid];
398  }
399  }
400 }
CrystalLattice< ST, 3 > PrimLattice
primitive cell
NewTimer & offload_timer_
timer for offload portion
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
int size() const
return the size of the orbital set Ye: this needs to be replaced by getOrbitalSetSize(); ...
Definition: SPOSet.h:75
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
void assign_v(const PointType &r, const vContainer_type &myV, ValueVector &psi, int first, int last) const
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
QTBase::ValueType ValueType
Definition: Configuration.h:60
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
typename BsplineSet::ValueType TT
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ mw_evaluateVGL()

void mw_evaluateVGL ( const RefVectorWithLeader< SPOSet > &  spo_list,
const RefVectorWithLeader< ParticleSet > &  P_list,
int  iat,
const RefVector< ValueVector > &  psi_v_list,
const RefVector< GradVector > &  dpsi_v_list,
const RefVector< ValueVector > &  d2psi_v_list 
) const
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers

Parameters
spo_listthe list of SPOSet pointers in a walker batch
P_listthe list of ParticleSet pointers in a walker batch
iatactive particle
psi_v_listthe list of value vector pointers in a walker batch
dpsi_v_listthe list of gradient vector pointers in a walker batch
d2psi_v_listthe list of laplacian vector pointers in a walker batch

Reimplemented from SPOSet.

Definition at line 720 of file SplineC2ROMPTarget.cpp.

References RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), and SplineC2ROMPTarget< ST >::mw_mem_handle_.

726 {
727  assert(this == &sa_list.getLeader());
728  auto& phi_leader = sa_list.getCastedLeader<SplineC2ROMPTarget<ST>>();
729  auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
730  auto& mw_pos_copy = mw_mem.mw_pos_copy;
731  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
732  auto& mw_results_scratch = mw_mem.mw_results_scratch;
733  const int nwalkers = sa_list.size();
734  mw_pos_copy.resize(nwalkers * 6);
735 
736  // pack particle positions
737  for (int iw = 0; iw < nwalkers; ++iw)
738  {
739  const PointType& r = P_list[iw].activeR(iat);
741  mw_pos_copy[iw * 6] = r[0];
742  mw_pos_copy[iw * 6 + 1] = r[1];
743  mw_pos_copy[iw * 6 + 2] = r[2];
744  mw_pos_copy[iw * 6 + 3] = ru[0];
745  mw_pos_copy[iw * 6 + 4] = ru[1];
746  mw_pos_copy[iw * 6 + 5] = ru[2];
747  }
748 
749  phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list,
750  d2psi_v_list);
751 }
CrystalLattice< ST, 3 > PrimLattice
primitive cell
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const

◆ mw_evaluateVGLandDetRatioGrads()

void mw_evaluateVGLandDetRatioGrads ( const RefVectorWithLeader< SPOSet > &  spo_list,
const RefVectorWithLeader< ParticleSet > &  P_list,
int  iat,
const std::vector< const ValueType *> &  invRow_ptr_list,
OffloadMWVGLArray phi_vgl_v,
std::vector< ValueType > &  ratios,
std::vector< GradType > &  grads 
) const
overridevirtual

evaluate the values, gradients and laplacians of this single-particle orbital sets and determinant ratio and grads of multiple walkers.

Device data of phi_vgl_v must be up-to-date upon return

Parameters
spo_listthe list of SPOSet pointers in a walker batch
P_listthe list of ParticleSet pointers in a walker batch
iatactive particle
phi_vgl_vorbital values, gradients and laplacians of all the walkers
psi_ratio_grads_vdeterminant ratio and grads of all the walkers

Reimplemented from SPOSet.

Definition at line 754 of file SplineC2ROMPTarget.cpp.

References qmcplusplus::C2R::assign_vgl(), Array< T, D, ALLOC >::data(), RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), qmcplusplus::HESS00, qmcplusplus::HESS01, qmcplusplus::HESS02, qmcplusplus::HESS11, qmcplusplus::HESS12, qmcplusplus::HESS22, qmcplusplus::LAPL, omptarget::min(), SplineC2ROMPTarget< ST >::mw_mem_handle_, qmcplusplus::NUM_FIELDS, Array< T, D, ALLOC >::size(), TinyVector< T, D >::size(), and qmcplusplus::SymTrace().

761 {
762  assert(this == &spo_list.getLeader());
763  auto& phi_leader = spo_list.getCastedLeader<SplineC2ROMPTarget<ST>>();
764  auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
765  auto& buffer_H2D = mw_mem.buffer_H2D;
766  auto& rg_private = mw_mem.rg_private;
767  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
768  auto& mw_results_scratch = mw_mem.mw_results_scratch;
769  const int nwalkers = spo_list.size();
770  buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*));
771 
772  // pack particle positions and invRow pointers.
773  for (int iw = 0; iw < nwalkers; ++iw)
774  {
775  const PointType& r = P_list[iw].activeR(iat);
777  Vector<ST> pos_copy(reinterpret_cast<ST*>(buffer_H2D[iw]), 6);
778 
779  pos_copy[0] = r[0];
780  pos_copy[1] = r[1];
781  pos_copy[2] = r[2];
782  pos_copy[3] = ru[0];
783  pos_copy[4] = ru[1];
784  pos_copy[5] = ru[2];
785 
786  auto& invRow_ptr = *reinterpret_cast<const ValueType**>(buffer_H2D[iw] + sizeof(ST) * 6);
787  invRow_ptr = invRow_ptr_list[iw];
788  }
789 
790  const size_t num_pos = nwalkers;
791  const auto spline_padded_size = myV.size();
792  const auto sposet_padded_size = getAlignedSize<TT>(OrbitalSetSize);
793  const size_t ChunkSizePerTeam = 512;
794  const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
795 
796  mw_offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
797  // for V(1)G(3)L(1) final result
798  mw_results_scratch.resize(sposet_padded_size * num_pos * 5);
799  // per team ratio and grads
800  rg_private.resize(num_pos, NumTeams * 4);
801 
802  // Ye: need to extract sizes and pointers before entering target region
803  const auto* spline_ptr = SplineInst->getSplinePtr();
804  auto* buffer_H2D_ptr = buffer_H2D.data();
805  auto* offload_scratch_ptr = mw_offload_scratch.data();
806  auto* results_scratch_ptr = mw_results_scratch.data();
807  const auto myKcart_padded_size = myKcart->capacity();
808  auto* mKK_ptr = mKK->data();
809  auto* GGt_ptr = GGt_offload->data();
810  auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
811  auto* myKcart_ptr = myKcart->data();
812  auto* phi_vgl_ptr = phi_vgl_v.data();
813  auto* rg_private_ptr = rg_private.data();
814  const size_t buffer_H2D_stride = buffer_H2D.cols();
815  const size_t first_spo_local = first_spo;
816  const auto requested_orb_size = phi_vgl_v.size(2);
817  const size_t phi_vgl_stride = num_pos * requested_orb_size;
818  const size_t nComplexBands_local = nComplexBands;
819  const auto num_complex_splines = kPoints.size();
820 
821  {
822  ScopedTimer offload(offload_timer_);
823  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
824  map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \
825  map(always, from: rg_private_ptr[0:rg_private.size()])")
826  for (int iw = 0; iw < num_pos; iw++)
827  for (int team_id = 0; team_id < NumTeams; team_id++)
828  {
829  const size_t first = ChunkSizePerTeam * team_id;
830  const size_t last = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
831 
832  auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
833  auto* restrict psi_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5;
834  const auto* restrict pos_iw_ptr = reinterpret_cast<ST*>(buffer_H2D_ptr + buffer_H2D_stride * iw);
835  const auto* restrict invRow_iw_ptr =
836  *reinterpret_cast<ValueType**>(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6);
837 
838  int ix, iy, iz;
839  ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
840  spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b,
841  c, da, db, dc, d2a, d2b, d2c);
842 
843  const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
844  PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
845  PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
846  const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
847  GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
848 
849  PRAGMA_OFFLOAD("omp parallel for")
850  for (int index = 0; index < last - first; index++)
851  {
852  spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
853  d2c, offload_scratch_iw_ptr + first + index, spline_padded_size);
854  const int output_index = first + index;
855  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
856  SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
857  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
858  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
859  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
860  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
861  offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
862  }
863  const size_t first_cplx = first / 2;
864  const size_t last_cplx = omptarget::min(last / 2, num_complex_splines);
865  PRAGMA_OFFLOAD("omp parallel for")
866  for (int index = first_cplx; index < last_cplx; index++)
867  C2R::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, sposet_padded_size, mKK_ptr,
868  offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, myKcart_padded_size,
869  first_spo_local, nComplexBands_local, index);
870 
871  ValueType* restrict psi = psi_iw_ptr;
872  ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size;
873  ValueType* restrict dpsi_y = psi_iw_ptr + sposet_padded_size * 2;
874  ValueType* restrict dpsi_z = psi_iw_ptr + sposet_padded_size * 3;
875  ValueType* restrict d2psi = psi_iw_ptr + sposet_padded_size * 4;
876 
877  ValueType* restrict out_phi = phi_vgl_ptr + iw * requested_orb_size;
878  ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride;
879  ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride;
880  ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride;
881  ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride;
882 
883  const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx);
884  const size_t last_real =
885  omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size);
886  ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
887  PRAGMA_OFFLOAD("omp parallel for reduction(+: ratio, grad_x, grad_y, grad_z)")
888  for (int j = first_real; j < last_real; j++)
889  {
890  out_phi[j] = psi[j];
891  out_dphi_x[j] = dpsi_x[j];
892  out_dphi_y[j] = dpsi_y[j];
893  out_dphi_z[j] = dpsi_z[j];
894  out_d2phi[j] = d2psi[j];
895 
896  ratio += psi[j] * invRow_iw_ptr[j];
897  grad_x += dpsi_x[j] * invRow_iw_ptr[j];
898  grad_y += dpsi_y[j] * invRow_iw_ptr[j];
899  grad_z += dpsi_z[j] * invRow_iw_ptr[j];
900  }
901 
902  rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio;
903  rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x;
904  rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y;
905  rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z;
906  }
907  }
908 
909  for (int iw = 0; iw < num_pos; iw++)
910  {
911  ValueType ratio(0);
912  for (int team_id = 0; team_id < NumTeams; team_id++)
913  ratio += rg_private[iw][team_id * 4];
914  ratios[iw] = ratio;
915 
916  ValueType grad_x(0), grad_y(0), grad_z(0);
917  for (int team_id = 0; team_id < NumTeams; team_id++)
918  {
919  grad_x += rg_private[iw][team_id * 4 + 1];
920  grad_y += rg_private[iw][team_id * 4 + 2];
921  grad_z += rg_private[iw][team_id * 4 + 3];
922  }
923  grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio};
924  }
925 }
T SymTrace(T h00, T h01, T h02, T h11, T h12, T h22, const T gg[6])
compute Trace(H*G)
QTBase::GradType GradType
Definition: Configuration.h:62
std::shared_ptr< OffloadVector< ST > > PrimLattice_G_offload
std::shared_ptr< OffloadVector< ST > > GGt_offload
CrystalLattice< ST, 3 > PrimLattice
primitive cell
NewTimer & offload_timer_
timer for offload portion
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
vContainer_type myV
intermediate result vectors
size_type size() const
return the current size
Definition: OhmmsVector.h:162
QTBase::ValueType ValueType
Definition: Configuration.h:60
SingleParticlePos toUnit_floor(const TinyVector< T1, D > &r) const
IndexType OrbitalSetSize
number of Single-particle orbitals
Definition: SPOSet.h:566
std::shared_ptr< OffloadVector< ST > > mKK
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int index)
assign_vgl
size_t first_spo
first index of the SPOs this Spline handles
Definition: BsplineSet.h:41
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ read_splines()

bool read_splines ( hdf_archive h5f)

Definition at line 37 of file SplineC2ROMPTarget.cpp.

References hdf_archive::readEntry().

38 {
39  std::ostringstream o;
40  o << "spline_" << MyIndex;
41  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
42  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
43 }
size_t MyIndex
Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases.
Definition: BsplineSet.h:39
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ releaseResource()

void releaseResource ( ResourceCollection collection,
const RefVectorWithLeader< SPOSet > &  spo_list 
) const
inlineoverridevirtual

return a shared resource to collection

Reimplemented from SPOSet.

Definition at line 142 of file SplineC2ROMPTarget.h.

References RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), and ResourceCollection::takebackResource().

143  {
144  assert(this == &spo_list.getLeader());
145  auto& phi_leader = spo_list.getCastedLeader<SplineC2ROMPTarget<ST>>();
146  collection.takebackResource(phi_leader.mw_mem_handle_);
147  }

◆ resize_kpoints()

void resize_kpoints ( )
inline

remap kPoints to pack the double copy

Definition at line 220 of file SplineC2ROMPTarget.h.

References qmcplusplus::dot(), BsplineSet::kPoints, SplineC2ROMPTarget< ST >::mKK, SplineC2ROMPTarget< ST >::myKcart, SplineC2ROMPTarget< ST >::nComplexBands, and BsplineSet::remap_kpoints().

Referenced by SplineC2ROMPTarget< ST >::create_spline().

221  {
222  nComplexBands = this->remap_kpoints();
223  const int nk = kPoints.size();
224  mKK = std::make_shared<OffloadVector<ST>>(nk);
225  myKcart = std::make_shared<OffloadPosVector<ST>>(nk);
226  for (size_t i = 0; i < nk; ++i)
227  {
228  (*mKK)[i] = -dot(kPoints[i], kPoints[i]);
229  (*myKcart)(i) = kPoints[i];
230  }
231  }
int nComplexBands
number of complex bands
std::shared_ptr< OffloadPosVector< ST > > myKcart
std::shared_ptr< OffloadVector< ST > > mKK
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
int remap_kpoints()
remap kpoints to group general kpoints & special kpoints
Definition: BsplineSet.h:76
std::vector< SPOSet::PosType > kPoints
kpoints for each unique orbitals.
Definition: BsplineSet.h:52

◆ resizeStorage()

void resizeStorage ( size_t  n,
size_t  nvals 
)
inline

Definition at line 151 of file SplineC2ROMPTarget.h.

References BsplineSet::init_base(), SplineC2ROMPTarget< ST >::myG, SplineC2ROMPTarget< ST >::mygH, SplineC2ROMPTarget< ST >::myH, SplineC2ROMPTarget< ST >::myL, SplineC2ROMPTarget< ST >::myV, qmcplusplus::n, VectorSoaContainer< T, D, Alloc >::resize(), and Vector< T, Alloc >::resize().

152  {
153  init_base(n);
154  size_t npad = getAlignedSize<ST>(2 * n);
155  myV.resize(npad);
156  myG.resize(npad);
157  myL.resize(npad);
158  myH.resize(npad);
159  mygH.resize(npad);
160  }
void resize(size_type n, Type_t val=Type_t())
Resize the container.
Definition: OhmmsVector.h:166
vContainer_type myV
intermediate result vectors
void init_base(int n)
Definition: BsplineSet.h:66
void resize(size_type n)
resize myData

◆ set_spline()

void set_spline ( SingleSplineType spline_r,
SingleSplineType spline_i,
int  twist,
int  ispline,
int  level 
)
inline

Definition at line 26 of file SplineC2ROMPTarget.cpp.

31 {
32  SplineInst->copy_spline(spline_r, 2 * ispline);
33  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
34 }
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

◆ write_splines()

bool write_splines ( hdf_archive h5f)

Definition at line 46 of file SplineC2ROMPTarget.cpp.

References hdf_archive::writeEntry().

47 {
48  std::ostringstream o;
49  o << "spline_" << MyIndex;
50  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
51  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
52 }
size_t MyIndex
Index of this adoptor, when multiple adoptors are used for NUMA or distributed cases.
Definition: BsplineSet.h:39
std::shared_ptr< MultiBspline< ST, OffloadAllocator< ST >, OffloadAllocator< SplineType > > > SplineInst
multi bspline set

Friends And Related Function Documentation

◆ BsplineReader

friend struct BsplineReader
friend

Definition at line 316 of file SplineC2ROMPTarget.h.

◆ SplineSetReader

friend class SplineSetReader
friend

Definition at line 315 of file SplineC2ROMPTarget.h.

Member Data Documentation

◆ GGt

Tensor<ST, 3> GGt
private

$GGt=G^t G $, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian

Definition at line 75 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::finalizeConstruction().

◆ GGt_offload

std::shared_ptr<OffloadVector<ST> > GGt_offload
private

Definition at line 83 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::finalizeConstruction().

◆ mKK

std::shared_ptr<OffloadVector<ST> > mKK
private

◆ multi_pos_copy

Vector<ST, OffloadPinnedAllocator<ST> > multi_pos_copy
private

position scratch space, used to avoid allocation on the fly and faster transfer

Definition at line 97 of file SplineC2ROMPTarget.h.

◆ mw_mem_handle_

◆ myG

gContainer_type myG
protected

Definition at line 110 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::resizeStorage().

◆ mygH

ghContainer_type mygH
protected

Definition at line 112 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::resizeStorage().

◆ myH

hContainer_type myH
protected

Definition at line 111 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::resizeStorage().

◆ myKcart

std::shared_ptr<OffloadPosVector<ST> > myKcart
private

◆ myL

vContainer_type myL
protected

Definition at line 109 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::resizeStorage().

◆ myV

vContainer_type myV
protected

intermediate result vectors

Definition at line 108 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::create_spline(), and SplineC2ROMPTarget< ST >::resizeStorage().

◆ nComplexBands

int nComplexBands
private

number of complex bands

Definition at line 77 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::resize_kpoints().

◆ offload_scratch

Vector<ST, OffloadPinnedAllocator<ST> > offload_scratch
private

offload scratch space, dynamically resized to the maximal need

Definition at line 91 of file SplineC2ROMPTarget.h.

◆ offload_timer_

NewTimer& offload_timer_
private

timer for offload portion

Definition at line 71 of file SplineC2ROMPTarget.h.

◆ PrimLattice

CrystalLattice<ST, 3> PrimLattice
private

primitive cell

Definition at line 73 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::finalizeConstruction().

◆ PrimLattice_G_offload

std::shared_ptr<OffloadVector<ST> > PrimLattice_G_offload
private

Definition at line 84 of file SplineC2ROMPTarget.h.

Referenced by SplineC2ROMPTarget< ST >::finalizeConstruction().

◆ psiinv_pos_copy

Vector<TT, OffloadPinnedAllocator<TT> > psiinv_pos_copy
private

psiinv and position scratch space, used to avoid allocation on the fly and faster transfer

Definition at line 95 of file SplineC2ROMPTarget.h.

◆ ratios_private

Matrix<TT, OffloadPinnedAllocator<TT> > ratios_private
private

team private ratios for reduction, numVP x numTeams

Definition at line 89 of file SplineC2ROMPTarget.h.

◆ results_scratch

Vector<TT, OffloadPinnedAllocator<TT> > results_scratch
private

result scratch space, dynamically resized to the maximal need

Definition at line 93 of file SplineC2ROMPTarget.h.

◆ SplineInst


The documentation for this class was generated from the following files: