QMCPACK
SoaDistanceTableABOMPTarget< T, D, SC > Class Template Reference

A derived classe from DistacneTableData, specialized for AB using a transposed form. More...

+ Inheritance diagram for SoaDistanceTableABOMPTarget< T, D, SC >:
+ Collaboration diagram for SoaDistanceTableABOMPTarget< T, D, SC >:

Classes

struct  DTABMultiWalkerMem
 multi walker shared memory buffer More...
 

Public Member Functions

 SoaDistanceTableABOMPTarget (const ParticleSet &source, ParticleSet &target)
 
 SoaDistanceTableABOMPTarget ()=delete
 
 SoaDistanceTableABOMPTarget (const SoaDistanceTableABOMPTarget &)=delete
 
 ~SoaDistanceTableABOMPTarget ()
 
void createResource (ResourceCollection &collection) const override
 initialize a shared resource and hand it to a collection More...
 
void acquireResource (ResourceCollection &collection, const RefVectorWithLeader< DistanceTable > &dt_list) const override
 acquire a shared resource from a collection More...
 
void releaseResource (ResourceCollection &collection, const RefVectorWithLeader< DistanceTable > &dt_list) const override
 return a shared resource to a collection More...
 
const T * getMultiWalkerDataPtr () const override
 return multi-walker full (all pairs) distance table data pointer More...
 
size_t getPerTargetPctlStrideSize () const override
 return stride of per target pctl data. full table data = stride * num of target particles More...
 
void evaluate (ParticleSet &P) override
 evaluate the full table More...
 
void mw_evaluate (const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list) const override
 
void mw_recompute (const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list, const std::vector< bool > &recompute) const override
 recompute multi walker internal data, recompute More...
 
void move (const ParticleSet &P, const PosType &rnew, const IndexType iat, bool prepare_old) override
 evaluate the temporary pair relations More...
 
void update (IndexType iat) override
 update the stripe for jat-th particle More...
 
int get_first_neighbor (IndexType iat, RealType &r, PosType &dr, bool newpos) const override
 
- Public Member Functions inherited from DTD_BConds< T, D, SC >
 DTD_BConds (const CrystalLattice< T, D > &lat)
 constructor: doing nothing More...
 
apply_bc (TinyVector< T, D > &displ) const
 apply BC on displ and return |displ|^2 More...
 
void apply_bc (std::vector< TinyVector< T, D >> &dr, std::vector< T > &r, std::vector< T > &rinv) const
 apply BC on dr and evaluate r and rinv More...
 
void apply_bc (std::vector< TinyVector< T, D >> &dr, std::vector< T > &r) const
 
void evaluate_rsquared (TinyVector< T, D > *restrict dr, T *restrict rr, int n)
 
- Public Member Functions inherited from DistanceTableAB
 DistanceTableAB (const ParticleSet &source, const ParticleSet &target, DTModes modes)
 constructor using source and target ParticleSet More...
 
const std::vector< DistRow > & getDistances () const
 return full table distances More...
 
const std::vector< DisplRow > & getDisplacements () const
 return full table displacements More...
 
const DistRowgetDistRow (int iel) const
 return a row of distances for a given target particle More...
 
const DisplRowgetDisplRow (int iel) const
 return a row of displacements for a given target particle More...
 
const DistRowgetTempDists () const
 return the temporary distances when a move is proposed More...
 
const DisplRowgetTempDispls () const
 return the temporary displacements when a move is proposed More...
 
- Public Member Functions inherited from DistanceTable
 DistanceTable (const ParticleSet &source, const ParticleSet &target, DTModes modes)
 constructor using source and target ParticleSet More...
 
 DistanceTable (const DistanceTable &)=delete
 copy constructor. deleted More...
 
virtual ~DistanceTable ()=default
 virutal destructor More...
 
DTModes getModes () const
 get modes More...
 
void setModes (DTModes modes)
 set modes More...
 
const std::string & getName () const
 return the name of table More...
 
const ParticleSetget_origin () const
 returns the reference the origin particleset More...
 
size_t centers () const
 returns the number of centers More...
 
size_t targets () const
 returns the number of centers More...
 
size_t sources () const
 returns the number of source particles More...
 
virtual void mw_move (const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list, const std::vector< PosType > &rnew_list, const IndexType iat, bool prepare_old=true) const
 walker batched version of move. More...
 
virtual void updatePartial (IndexType jat, bool from_temp)
 fill partially the distance table by the pair relations from the temporary or old particle position. More...
 
virtual void mw_updatePartial (const RefVectorWithLeader< DistanceTable > &dt_list, IndexType jat, const std::vector< bool > &from_temp)
 walker batched version of updatePartial. More...
 
virtual void finalizePbyP (const ParticleSet &P)
 finalize distance table calculation after particle-by-particle moves if update() doesn't make the table up-to-date during p-by-p moves finalizePbyP takes action to bring the table up-to-date More...
 
virtual void mw_finalizePbyP (const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list) const
 walker batched version of finalizePbyP If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not updated at all during p-by-p Thus, a recompute is necessary to update the whole host distance table for consumers like the Coulomb potential. More...
 
virtual int get_first_neighbor (IndexType iat, RealType &r, PosType &dr, bool newpos) const =0
 find the first nearest neighbor More...
 
void print (std::ostream &os)
 

Private Types

template<typename DT >
using OffloadPinnedVector = Vector< DT, OMPallocator< DT, PinnedAlignedAllocator< DT > >>
 

Private Member Functions

void resize ()
 

Static Private Member Functions

static void associateResource (const RefVectorWithLeader< DistanceTable > &dt_list)
 

Private Attributes

OffloadPinnedVector< RealTyper_dr_memorypool_
 accelerator output buffer for r and dr More...
 
OffloadPinnedVector< T > target_pos
 accelerator input array for a list of target particle positions, num_targets_ x D More...
 
ResourceHandle< DTABMultiWalkerMemmw_mem_handle_
 
NewTimeroffload_timer_
 timer for offload portion More...
 
NewTimerevaluate_timer_
 timer for evaluate() More...
 
NewTimermove_timer_
 timer for move() More...
 
NewTimerupdate_timer_
 timer for update() More...
 

Additional Inherited Members

- Public Types inherited from DistanceTable
using IndexType = QMCTraits::IndexType
 
using RealType = QMCTraits::RealType
 
using PosType = QMCTraits::PosType
 
using DistRow = Vector< RealType, aligned_allocator< RealType > >
 
using DisplRow = VectorSoaContainer< RealType, DIM >
 
- Static Public Attributes inherited from DistanceTable
static constexpr unsigned DIM = OHMMS_DIM
 
- Protected Attributes inherited from DistanceTableAB
std::vector< DistRowdistances_
 distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide if it is a memory view or the actual storage More...
 
std::vector< DisplRowdisplacements_
 displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes decide if it is a memory view or the actual storage More...
 
DistRow temp_r_
 temp_r More...
 
DisplRow temp_dr_
 temp_dr More...
 
- Protected Attributes inherited from DistanceTable
const ParticleSetorigin_
 
const size_t num_sources_
 
const size_t num_targets_
 
const std::string name_
 name of the table More...
 
DTModes modes_
 operation modes defined by DTModes More...
 

Detailed Description

template<typename T, unsigned D, int SC>
class qmcplusplus::SoaDistanceTableABOMPTarget< T, D, SC >

A derived classe from DistacneTableData, specialized for AB using a transposed form.

Definition at line 31 of file SoaDistanceTableABOMPTarget.h.

Member Typedef Documentation

◆ OffloadPinnedVector

Definition at line 35 of file SoaDistanceTableABOMPTarget.h.

Constructor & Destructor Documentation

◆ SoaDistanceTableABOMPTarget() [1/3]

SoaDistanceTableABOMPTarget ( const ParticleSet source,
ParticleSet target 
)
inline

Definition at line 121 of file SoaDistanceTableABOMPTarget.h.

References ParticleSet::getCoordinates(), DistanceTable::num_sources_, VectorSoaContainer< T, D, Alloc >::resize(), Vector< T, Alloc >::resize(), DistanceTableAB::temp_dr_, and DistanceTableAB::temp_r_.

122  : DTD_BConds<T, D, SC>(source.getLattice()),
123  DistanceTableAB(source, target, DTModes::ALL_OFF),
124  offload_timer_(createGlobalTimer(std::string("DTABOMPTarget::offload_") + name_, timer_level_fine)),
125  evaluate_timer_(createGlobalTimer(std::string("DTABOMPTarget::evaluate_") + name_, timer_level_fine)),
126  move_timer_(createGlobalTimer(std::string("DTABOMPTarget::move_") + name_, timer_level_fine)),
127  update_timer_(createGlobalTimer(std::string("DTABOMPTarget::update_") + name_, timer_level_fine))
128 
129  {
130  auto* coordinates_soa = dynamic_cast<const RealSpacePositionsOMPTarget*>(&source.getCoordinates());
131  if (!coordinates_soa)
132  throw std::runtime_error("Source particle set doesn't have OpenMP offload. Contact developers!");
133  PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])")
134 
135  // The padding of temp_r_ and temp_dr_ is necessary for the memory copy in the update function
136  // temp_r_ is padded explicitly while temp_dr_ is padded internally
137  const int num_padded = getAlignedSize<T>(num_sources_);
138  temp_r_.resize(num_padded);
140  }
size_t getAlignedSize(size_t n)
return size in T&#39;s of allocated aligned memory
NewTimer & offload_timer_
timer for offload portion
const std::string name_
name of the table
Definition: DistanceTable.h:57
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
DistanceTableAB(const ParticleSet &source, const ParticleSet &target, DTModes modes)
constructor using source and target ParticleSet

◆ SoaDistanceTableABOMPTarget() [2/3]

◆ SoaDistanceTableABOMPTarget() [3/3]

SoaDistanceTableABOMPTarget ( const SoaDistanceTableABOMPTarget< T, D, SC > &  )
delete

◆ ~SoaDistanceTableABOMPTarget()

Definition at line 145 of file SoaDistanceTableABOMPTarget.h.

145 { PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])") }

Member Function Documentation

◆ acquireResource()

void acquireResource ( ResourceCollection collection,
const RefVectorWithLeader< DistanceTable > &  dt_list 
) const
inlineoverridevirtual

acquire a shared resource from a collection

Reimplemented from DistanceTable.

Definition at line 152 of file SoaDistanceTableABOMPTarget.h.

References SoaDistanceTableABOMPTarget< T, D, SC >::associateResource(), RefVectorWithLeader< T >::getCastedLeader(), ResourceCollection::lendResource(), and SoaDistanceTableABOMPTarget< T, D, SC >::mw_mem_handle_.

153  {
154  auto& dt_leader = dt_list.getCastedLeader<SoaDistanceTableABOMPTarget>();
155  dt_leader.mw_mem_handle_ = collection.lendResource<DTABMultiWalkerMem>();
156  associateResource(dt_list);
157  }
static void associateResource(const RefVectorWithLeader< DistanceTable > &dt_list)

◆ associateResource()

static void associateResource ( const RefVectorWithLeader< DistanceTable > &  dt_list)
inlinestaticprivate

Definition at line 81 of file SoaDistanceTableABOMPTarget.h.

References RefVectorWithLeader< T >::getCastedElement(), RefVectorWithLeader< T >::getCastedLeader(), and DistanceTable::targets().

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::acquireResource().

82  {
83  auto& dt_leader = dt_list.getCastedLeader<SoaDistanceTableABOMPTarget>();
84 
85  // initialize memory containers and views
86  size_t count_targets = 0;
87  for (size_t iw = 0; iw < dt_list.size(); iw++)
88  {
89  auto& dt = dt_list.getCastedElement<SoaDistanceTableABOMPTarget>(iw);
90  count_targets += dt.targets();
91  dt.r_dr_memorypool_.free();
92  }
93 
94  const size_t num_sources = dt_leader.num_sources_;
95  const size_t num_padded = getAlignedSize<T>(dt_leader.num_sources_);
96  const size_t stride_size = num_padded * (D + 1);
97  const size_t total_targets = count_targets;
98  auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr;
99  mw_r_dr.resize(total_targets * stride_size);
100 
101  count_targets = 0;
102  for (size_t iw = 0; iw < dt_list.size(); iw++)
103  {
104  auto& dt = dt_list.getCastedElement<SoaDistanceTableABOMPTarget>(iw);
105  assert(num_sources == dt.num_sources_);
106 
107  dt.distances_.resize(dt.targets());
108  dt.displacements_.resize(dt.targets());
109 
110  for (int i = 0; i < dt.targets(); ++i)
111  {
112  dt.distances_[i].attachReference(mw_r_dr.data() + (i + count_targets) * stride_size, num_sources);
113  dt.displacements_[i].attachReference(num_sources, num_padded,
114  mw_r_dr.data() + (i + count_targets) * stride_size + num_padded);
115  }
116  count_targets += dt.targets();
117  }
118  }

◆ createResource()

void createResource ( ResourceCollection collection) const
inlineoverridevirtual

initialize a shared resource and hand it to a collection

Reimplemented from DistanceTable.

Definition at line 147 of file SoaDistanceTableABOMPTarget.h.

References ResourceCollection::addResource().

148  {
149  auto resource_index = collection.addResource(std::make_unique<DTABMultiWalkerMem>());
150  }

◆ evaluate()

void evaluate ( ParticleSet P)
inlineoverridevirtual

evaluate the full table

Implements DistanceTable.

Definition at line 175 of file SoaDistanceTableABOMPTarget.h.

References VectorSoaContainer< T, D, Alloc >::data(), DistanceTableAB::displacements_, DistanceTableAB::distances_, SoaDistanceTableABOMPTarget< T, D, SC >::evaluate_timer_, DynamicCoordinates::getAllParticlePos(), ParticleSet::getCoordinates(), SoaDistanceTableABOMPTarget< T, D, SC >::getPerTargetPctlStrideSize(), omptarget::min(), DistanceTable::num_sources_, DistanceTable::num_targets_, SoaDistanceTableABOMPTarget< T, D, SC >::offload_timer_, DistanceTable::origin_, ParticleSet::R, SoaDistanceTableABOMPTarget< T, D, SC >::resize(), and SoaDistanceTableABOMPTarget< T, D, SC >::target_pos.

176  {
177  resize();
178 
179  ScopedTimer local_timer(evaluate_timer_);
180  // be aware of the sign of Displacement
181  const int num_targets_local = num_targets_;
182  const int num_sources_local = num_sources_;
183  const int num_padded = getAlignedSize<T>(num_sources_);
184 
185  target_pos.resize(num_targets_ * D);
186  for (size_t iat = 0; iat < num_targets_; iat++)
187  for (size_t idim = 0; idim < D; idim++)
188  target_pos[iat * D + idim] = P.R[iat][idim];
189 
190  auto* target_pos_ptr = target_pos.data();
191  auto* source_pos_ptr = origin_.getCoordinates().getAllParticlePos().data();
192  auto* r_dr_ptr = distances_[0].data();
193  assert(distances_[0].data() + num_padded == displacements_[0].data());
194 
195  // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team.
196  const int ChunkSizePerTeam = 512;
197  const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
198  const size_t stride_size = getPerTargetPctlStrideSize();
199 
200  {
201  ScopedTimer offload(offload_timer_);
202  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(num_targets_*num_teams) \
203  map(to: source_pos_ptr[:num_padded*D]) \
204  map(always, to: target_pos_ptr[:num_targets_*D]) \
205  map(always, from: r_dr_ptr[:num_targets_*stride_size])")
206  for (int iat = 0; iat < num_targets_local; ++iat)
207  for (int team_id = 0; team_id < num_teams; team_id++)
208  {
209  const int first = ChunkSizePerTeam * team_id;
210  const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local);
211 
212  T pos[D];
213  for (int idim = 0; idim < D; idim++)
214  pos[idim] = target_pos_ptr[iat * D + idim];
215 
216  auto* r_iat_ptr = r_dr_ptr + iat * stride_size;
217  auto* dr_iat_ptr = r_iat_ptr + num_padded;
218 
219  PRAGMA_OFFLOAD("omp parallel for")
220  for (int iel = first; iel < last; iel++)
221  DTD_BConds<T, D, SC>::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr,
222  num_padded, iel);
223  }
224  }
225  }
DTD_BConds(const CrystalLattice< T, D > &lat)
constructor: doing nothing
NewTimer & offload_timer_
timer for offload portion
virtual const PosVectorSoa & getAllParticlePos() const =0
all particle position accessor
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
const DynamicCoordinates & getCoordinates() const
Definition: ParticleSet.h:246
size_t getPerTargetPctlStrideSize() const override
return stride of per target pctl data. full table data = stride * num of target particles ...
OffloadPinnedVector< T > target_pos
accelerator input array for a list of target particle positions, num_targets_ x D ...
std::vector< DisplRow > displacements_
displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes de...
const ParticleSet & origin_
Definition: DistanceTable.h:51
std::vector< DistRow > distances_
distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide ...

◆ get_first_neighbor()

int get_first_neighbor ( IndexType  iat,
RealType r,
PosType dr,
bool  newpos 
) const
inlineoverride

Definition at line 369 of file SoaDistanceTableABOMPTarget.h.

References DistanceTableAB::displacements_, DistanceTableAB::distances_, DistanceTable::num_sources_, DistanceTableAB::temp_dr_, and DistanceTableAB::temp_r_.

370  {
371  RealType min_dist = std::numeric_limits<RealType>::max();
372  int index = -1;
373  if (newpos)
374  {
375  for (int jat = 0; jat < num_sources_; ++jat)
376  if (temp_r_[jat] < min_dist)
377  {
378  min_dist = temp_r_[jat];
379  index = jat;
380  }
381  if (index >= 0)
382  {
383  r = min_dist;
384  dr = temp_dr_[index];
385  }
386  }
387  else
388  {
389  for (int jat = 0; jat < num_sources_; ++jat)
390  if (distances_[iat][jat] < min_dist)
391  {
392  min_dist = distances_[iat][jat];
393  index = jat;
394  }
395  if (index >= 0)
396  {
397  r = min_dist;
398  dr = displacements_[iat][index];
399  }
400  }
401  assert(index >= 0 && index < num_sources_);
402  return index;
403  }
QMCTraits::RealType RealType
std::vector< DisplRow > displacements_
displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes de...
std::vector< DistRow > distances_
distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide ...

◆ getMultiWalkerDataPtr()

const T* getMultiWalkerDataPtr ( ) const
inlineoverridevirtual

return multi-walker full (all pairs) distance table data pointer

Reimplemented from DistanceTableAB.

Definition at line 170 of file SoaDistanceTableABOMPTarget.h.

References SoaDistanceTableABOMPTarget< T, D, SC >::mw_mem_handle_.

170 { return mw_mem_handle_.getResource().mw_r_dr.data(); }
ResourceHandle< DTABMultiWalkerMem > mw_mem_handle_

◆ getPerTargetPctlStrideSize()

size_t getPerTargetPctlStrideSize ( ) const
inlineoverridevirtual

return stride of per target pctl data. full table data = stride * num of target particles

Reimplemented from DistanceTableAB.

Definition at line 172 of file SoaDistanceTableABOMPTarget.h.

References DistanceTable::num_sources_.

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::evaluate(), SoaDistanceTableABOMPTarget< T, D, SC >::mw_evaluate(), and SoaDistanceTableABOMPTarget< T, D, SC >::resize().

172 { return getAlignedSize<T>(num_sources_) * (D + 1); }

◆ move()

void move ( const ParticleSet P,
const PosType rnew,
const IndexType  iat,
bool  prepare_old 
)
inlineoverridevirtual

evaluate the temporary pair relations

Implements DistanceTable.

Definition at line 348 of file SoaDistanceTableABOMPTarget.h.

References Vector< T, Alloc >::data(), DistanceTableAB::displacements_, DistanceTableAB::distances_, DynamicCoordinates::getAllParticlePos(), ParticleSet::getCoordinates(), DistanceTable::modes_, SoaDistanceTableABOMPTarget< T, D, SC >::move_timer_, qmcplusplus::NEED_FULL_TABLE_ANYTIME, DistanceTable::num_sources_, DistanceTable::origin_, ParticleSet::R, DistanceTableAB::temp_dr_, and DistanceTableAB::temp_r_.

349  {
350  ScopedTimer local_timer(move_timer_);
351  DTD_BConds<T, D, SC>::computeDistances(rnew, origin_.getCoordinates().getAllParticlePos(), temp_r_.data(), temp_dr_,
352  0, num_sources_);
353  // If the full table is not ready all the time, overwrite the current value.
354  // If this step is missing, DT values can be undefined in case a move is rejected.
355  if (!(modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old)
356  DTD_BConds<T, D, SC>::computeDistances(P.R[iat], origin_.getCoordinates().getAllParticlePos(),
357  distances_[iat].data(), displacements_[iat], 0, num_sources_);
358  }
virtual const PosVectorSoa & getAllParticlePos() const =0
all particle position accessor
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
const DynamicCoordinates & getCoordinates() const
Definition: ParticleSet.h:246
whether full table needs to be ready at anytime or not during PbyP Optimization can be implemented du...
std::vector< DisplRow > displacements_
displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes de...
const ParticleSet & origin_
Definition: DistanceTable.h:51
std::vector< DistRow > distances_
distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide ...
DTModes modes_
operation modes defined by DTModes
Definition: DistanceTable.h:60

◆ mw_evaluate()

void mw_evaluate ( const RefVectorWithLeader< DistanceTable > &  dt_list,
const RefVectorWithLeader< ParticleSet > &  p_list 
) const
inlineoverridevirtual

Reimplemented from DistanceTable.

Definition at line 227 of file SoaDistanceTableABOMPTarget.h.

References SoaDistanceTableABOMPTarget< T, D, SC >::evaluate_timer_, RefVectorWithLeader< T >::getCastedElement(), RefVectorWithLeader< T >::getCastedLeader(), RefVectorWithLeader< T >::getLeader(), SoaDistanceTableABOMPTarget< T, D, SC >::getPerTargetPctlStrideSize(), omptarget::min(), DistanceTable::modes_, qmcplusplus::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST, SoaDistanceTableABOMPTarget< T, D, SC >::DTABMultiWalkerMem::mw_r_dr, DistanceTable::num_sources_, SoaDistanceTableABOMPTarget< T, D, SC >::DTABMultiWalkerMem::offload_input, and qmcplusplus::pset.

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::mw_recompute().

229  {
230  assert(this == &dt_list.getLeader());
231  auto& dt_leader = dt_list.getCastedLeader<SoaDistanceTableABOMPTarget>();
232 
233  ScopedTimer local_timer(evaluate_timer_);
234 
235  const size_t nw = dt_list.size();
236  DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
237  auto& mw_r_dr = mw_mem.mw_r_dr;
238 
239  size_t count_targets = 0;
240  for (ParticleSet& p : p_list)
241  count_targets += p.getTotalNum();
242  const size_t total_targets = count_targets;
243 
244  const int num_padded = getAlignedSize<T>(num_sources_);
245 
246 #ifndef NDEBUG
247  const int stride_size = getPerTargetPctlStrideSize();
248  count_targets = 0;
249  for (size_t iw = 0; iw < dt_list.size(); iw++)
250  {
251  auto& dt = dt_list.getCastedElement<SoaDistanceTableABOMPTarget>(iw);
252 
253  for (int i = 0; i < dt.targets(); ++i)
254  {
255  assert(dt.distances_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size);
256  assert(dt.displacements_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size + num_padded);
257  }
258  count_targets += dt.targets();
259  }
260 #endif
261 
262  // This is horrible optimization putting different data types in a single buffer but allows a single H2D transfer
263  const size_t realtype_size = sizeof(RealType);
264  const size_t int_size = sizeof(int);
265  const size_t ptr_size = sizeof(RealType*);
266  auto& offload_input = mw_mem.offload_input;
267  offload_input.resize(total_targets * D * realtype_size + total_targets * int_size + nw * ptr_size);
268  auto source_ptrs = reinterpret_cast<RealType**>(offload_input.data());
269  auto target_positions = reinterpret_cast<RealType*>(offload_input.data() + ptr_size * nw);
270  auto walker_id_ptr =
271  reinterpret_cast<int*>(offload_input.data() + ptr_size * nw + total_targets * D * realtype_size);
272 
273  count_targets = 0;
274  for (size_t iw = 0; iw < nw; iw++)
275  {
276  auto& dt = dt_list.getCastedElement<SoaDistanceTableABOMPTarget>(iw);
277  ParticleSet& pset(p_list[iw]);
278 
279  assert(dt.targets() == pset.getTotalNum());
280  assert(num_sources_ == dt.num_sources_);
281 
282  auto& RSoA_OMPTarget = static_cast<const RealSpacePositionsOMPTarget&>(dt.origin_.getCoordinates());
283  source_ptrs[iw] = const_cast<RealType*>(RSoA_OMPTarget.getDevicePtr());
284 
285  for (size_t iat = 0; iat < pset.getTotalNum(); ++iat, ++count_targets)
286  {
287  walker_id_ptr[count_targets] = iw;
288  for (size_t idim = 0; idim < D; idim++)
289  target_positions[count_targets * D + idim] = pset.R[iat][idim];
290  }
291  }
292 
293  // To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team.
294  const int ChunkSizePerTeam = 512;
295  const size_t num_teams = (num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
296 
297  auto* r_dr_ptr = mw_r_dr.data();
298  auto* input_ptr = offload_input.data();
299  const int num_sources_local = num_sources_;
300 
301  {
302  ScopedTimer offload(dt_leader.offload_timer_);
303  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(total_targets*num_teams) \
304  map(always, to: input_ptr[:offload_input.size()]) \
305  depend(out:r_dr_ptr[:mw_r_dr.size()])")
306  for (int iat = 0; iat < total_targets; ++iat)
307  for (int team_id = 0; team_id < num_teams; team_id++)
308  {
309  auto* target_pos_ptr = reinterpret_cast<RealType*>(input_ptr + ptr_size * nw);
310  const int walker_id =
311  reinterpret_cast<int*>(input_ptr + ptr_size * nw + total_targets * D * realtype_size)[iat];
312  auto* source_pos_ptr = reinterpret_cast<RealType**>(input_ptr)[walker_id];
313  auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1);
314  auto* dr_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1) + num_padded;
315 
316  const int first = ChunkSizePerTeam * team_id;
317  const int last = omptarget::min(first + ChunkSizePerTeam, num_sources_local);
318 
319  T pos[D];
320  for (int idim = 0; idim < D; idim++)
321  pos[idim] = target_pos_ptr[iat * D + idim];
322 
323  PRAGMA_OFFLOAD("omp parallel for")
324  for (int iel = first; iel < last; iel++)
325  DTD_BConds<T, D, SC>::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr,
326  num_padded, iel);
327  }
328 
330  {
331  PRAGMA_OFFLOAD(
332  "omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait")
333  }
334  // wait for computing and (optional) transferring back to host.
335  // It can potentially be moved to ParticleSet to fuse multiple similar taskwait
336  PRAGMA_OFFLOAD("omp taskwait")
337  }
338  }
DTD_BConds(const CrystalLattice< T, D > &lat)
constructor: doing nothing
QMCTraits::RealType RealType
Definition: DistanceTable.h:44
if(c->rank()==0)
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
T min(T a, T b)
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
QMCTraits::RealType RealType
size_t getPerTargetPctlStrideSize() const override
return stride of per target pctl data. full table data = stride * num of target particles ...
DTModes modes_
operation modes defined by DTModes
Definition: DistanceTable.h:60
skip data transfer back to host after mw_evalaute full distance table.

◆ mw_recompute()

void mw_recompute ( const RefVectorWithLeader< DistanceTable > &  dt_list,
const RefVectorWithLeader< ParticleSet > &  p_list,
const std::vector< bool > &  recompute 
) const
inlineoverridevirtual

recompute multi walker internal data, recompute

Parameters
dt_listthe distance table batch
p_listthe target particle set batch
recomputeif true, must recompute. Otherwise, implementation dependent.

Reimplemented from DistanceTable.

Definition at line 340 of file SoaDistanceTableABOMPTarget.h.

References SoaDistanceTableABOMPTarget< T, D, SC >::mw_evaluate().

343  {
344  mw_evaluate(dt_list, p_list);
345  }
void mw_evaluate(const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list) const override

◆ releaseResource()

void releaseResource ( ResourceCollection collection,
const RefVectorWithLeader< DistanceTable > &  dt_list 
) const
inlineoverridevirtual

return a shared resource to a collection

Reimplemented from DistanceTable.

Definition at line 159 of file SoaDistanceTableABOMPTarget.h.

References DistanceTableAB::distances_, RefVectorWithLeader< T >::getCastedElement(), RefVectorWithLeader< T >::getCastedLeader(), SoaDistanceTableABOMPTarget< T, D, SC >::mw_mem_handle_, and ResourceCollection::takebackResource().

160  {
161  collection.takebackResource(dt_list.getCastedLeader<SoaDistanceTableABOMPTarget>().mw_mem_handle_);
162  for (size_t iw = 0; iw < dt_list.size(); iw++)
163  {
164  auto& dt = dt_list.getCastedElement<SoaDistanceTableABOMPTarget>(iw);
165  dt.distances_.clear();
166  dt.displacements_.clear();
167  }
168  }

◆ resize()

void resize ( )
inlineprivate

Definition at line 59 of file SoaDistanceTableABOMPTarget.h.

References DistanceTableAB::displacements_, DistanceTableAB::distances_, SoaDistanceTableABOMPTarget< T, D, SC >::getPerTargetPctlStrideSize(), DistanceTable::num_sources_, DistanceTable::num_targets_, and SoaDistanceTableABOMPTarget< T, D, SC >::r_dr_memorypool_.

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::evaluate().

60  {
61  if (num_sources_ * num_targets_ == 0)
62  return;
63  if (distances_.size())
64  return;
65 
66  // initialize memory containers and views
67  const size_t num_padded = getAlignedSize<T>(num_sources_);
68  const size_t stride_size = getPerTargetPctlStrideSize();
69  r_dr_memorypool_.resize(stride_size * num_targets_);
70 
71  distances_.resize(num_targets_);
73  for (int i = 0; i < num_targets_; ++i)
74  {
75  distances_[i].attachReference(r_dr_memorypool_.data() + i * stride_size, num_sources_);
76  displacements_[i].attachReference(num_sources_, num_padded,
77  r_dr_memorypool_.data() + i * stride_size + num_padded);
78  }
79  }
OffloadPinnedVector< RealType > r_dr_memorypool_
accelerator output buffer for r and dr
size_t getPerTargetPctlStrideSize() const override
return stride of per target pctl data. full table data = stride * num of target particles ...
std::vector< DisplRow > displacements_
displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes de...
std::vector< DistRow > distances_
distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide ...

◆ update()

void update ( IndexType  iat)
inlineoverridevirtual

update the stripe for jat-th particle

Implements DistanceTable.

Definition at line 361 of file SoaDistanceTableABOMPTarget.h.

References qmcplusplus::syclBLAS::copy_n(), Vector< T, Alloc >::data(), VectorSoaContainer< T, D, Alloc >::data(), DistanceTableAB::displacements_, DistanceTableAB::distances_, DistanceTable::num_sources_, DistanceTableAB::temp_dr_, DistanceTableAB::temp_r_, and SoaDistanceTableABOMPTarget< T, D, SC >::update_timer_.

362  {
363  ScopedTimer local_timer(update_timer_);
365  for (int idim = 0; idim < D; ++idim)
366  std::copy_n(temp_dr_.data(idim), num_sources_, displacements_[iat].data(idim));
367  }
ScopeGuard< NewTimer > ScopedTimer
Definition: NewTimer.h:257
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
std::vector< DisplRow > displacements_
displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes de...
std::vector< DistRow > distances_
distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide ...

Member Data Documentation

◆ evaluate_timer_

◆ move_timer_

NewTimer& move_timer_
private

timer for move()

Definition at line 411 of file SoaDistanceTableABOMPTarget.h.

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::move().

◆ mw_mem_handle_

◆ offload_timer_

NewTimer& offload_timer_
private

timer for offload portion

Definition at line 407 of file SoaDistanceTableABOMPTarget.h.

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::evaluate().

◆ r_dr_memorypool_

OffloadPinnedVector<RealType> r_dr_memorypool_
private

accelerator output buffer for r and dr

Definition at line 38 of file SoaDistanceTableABOMPTarget.h.

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::resize().

◆ target_pos

OffloadPinnedVector<T> target_pos
private

accelerator input array for a list of target particle positions, num_targets_ x D

Definition at line 40 of file SoaDistanceTableABOMPTarget.h.

Referenced by SoaDistanceTableABOMPTarget< T, D, SC >::evaluate().

◆ update_timer_

NewTimer& update_timer_
private

The documentation for this class was generated from the following file: