14 #ifndef QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H 15 #define QMCPLUSPLUS_DTDIMPL_AA_OMPTARGET_H 30 template<
typename T,
unsigned D,
int SC>
62 std::unique_ptr<Resource>
makeClone()
const override {
return std::make_unique<DTAAMultiWalkerMem>(*this); }
82 throw std::runtime_error(
"Source particle set doesn't have OpenMP offload. Contact developers!");
84 PRAGMA_OFFLOAD(
"omp target enter data map(to : this[:1])")
93 const size_t num_padded = getAlignedSize<T>(
N);
94 const size_t Alignment = getAlignment<T>();
95 return (num_padded * (2 *
N - num_padded + 1) + (Alignment - 1) * num_padded) / 2;
124 auto resource_index = collection.
addResource(std::make_unique<DTAAMultiWalkerMem>());
132 const size_t nw = dt_list.size();
135 for (
int iw = 0; iw < nw; iw++)
144 auto& mw_new_old_dist_displ = dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ;
145 mw_new_old_dist_displ.resize(nw * 2 * stride_size);
146 for (
int iw = 0; iw < nw; iw++)
152 dt.old_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * (iw + nw),
num_targets_padded_);
161 const size_t nw = dt_list.size();
162 for (
int iw = 0; iw < nw; iw++)
176 constexpr T BigR = std::numeric_limits<T>::max();
190 size_t range_end)
const override 193 const size_t subset_size = range_end - range_begin;
194 if (subset_size > dt_leader.num_particls_stored)
195 throw std::runtime_error(
"not enough internal buffer");
197 ScopedTimer local_timer(dt_leader.evaluate_timer_);
202 const size_t nw = dt_list.size();
203 const auto num_sources_local = dt_leader.num_targets_;
204 const auto num_padded = dt_leader.num_targets_padded_;
207 const int ChunkSizePerTeam = 512;
208 const size_t num_teams = (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
216 PRAGMA_OFFLOAD(
"omp target teams distribute collapse(2) num_teams(nw * num_teams)")
217 for (
int iw = 0; iw < nw; ++iw)
218 for (
int team_id = 0; team_id < num_teams; team_id++)
220 auto* source_pos_ptr = rsoa_dev_list_ptr[iw];
221 const size_t first = ChunkSizePerTeam * team_id;
222 const size_t last =
omptarget::min(first + ChunkSizePerTeam, num_sources_local);
224 PRAGMA_OFFLOAD(
"omp parallel for")
225 for (
int iel = first; iel < last; iel++)
227 for (
int irow = 0; irow < subset_size; irow++)
229 T* dist = dist_ranged + (irow + subset_size * iw) * num_padded;
230 size_t id_target = irow + range_begin;
235 dx = source_pos_ptr[id_target] - source_pos_ptr[iel];
236 dy = source_pos_ptr[id_target + num_padded] - source_pos_ptr[iel + num_padded];
237 dz = source_pos_ptr[id_target + num_padded * 2] - source_pos_ptr[iel + num_padded * 2];
241 const size_t id_target_reverse = num_sources_local - 1 - id_target;
242 const size_t iel_reverse = num_sources_local - 1 - iel;
243 dx = source_pos_ptr[id_target_reverse] - source_pos_ptr[iel_reverse];
244 dy = source_pos_ptr[id_target_reverse + num_padded] - source_pos_ptr[iel_reverse + num_padded];
245 dz = source_pos_ptr[id_target_reverse + num_padded * 2] - source_pos_ptr[iel_reverse + num_padded * 2];
267 assert((prepare_old && iat >= 0 && iat <
num_targets_) || !prepare_old);
278 old_r_[iat] = std::numeric_limits<T>::max();
290 const std::vector<PosType>& rnew_list,
292 bool prepare_old =
true)
const override 300 const size_t nw = dt_list.size();
303 auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ;
305 for (
int iw = 0; iw < nw; iw++)
314 const int ChunkSizePerTeam = 512;
315 const size_t num_teams = (
num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
321 auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data();
322 auto* r_dr_ptr = mw_new_old_dist_displ.data();
323 auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data();
324 const size_t new_pos_stride = coordinates_leader.getFusedNewPosBuffer().capacity();
328 PRAGMA_OFFLOAD(
"omp target teams distribute collapse(2) num_teams(nw * num_teams) \ 329 depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])")
330 for (
int iw = 0; iw < nw; ++iw)
331 for (
int team_id = 0; team_id < num_teams; team_id++)
333 auto* source_pos_ptr = rsoa_dev_list_ptr[iw];
334 const size_t first = ChunkSizePerTeam * team_id;
335 const size_t last =
omptarget::min(first + ChunkSizePerTeam, num_sources_local);
338 auto* r_iw_ptr = r_dr_ptr + iw * stride_size;
339 auto* dr_iw_ptr = r_dr_ptr + iw * stride_size + num_padded;
342 for (
int idim = 0; idim < D; idim++)
343 pos[idim] = new_pos_ptr[idim * new_pos_stride + iw];
345 PRAGMA_OFFLOAD(
"omp parallel for")
346 for (
int iel = first; iel < last; iel++)
348 num_padded, iel, iat);
353 auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size;
354 auto* dr_iw_ptr = r_dr_ptr + (iw + nw) * stride_size + num_padded;
357 for (
int idim = 0; idim < D; idim++)
358 pos[idim] = source_pos_ptr[idim * num_padded + iat];
360 PRAGMA_OFFLOAD(
"omp parallel for")
361 for (
int iel = first; iel < last; iel++)
363 num_padded, iel, iat);
364 r_iw_ptr[iat] = std::numeric_limits<T>::max();
371 PRAGMA_OFFLOAD(
"omp target update nowait depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \ 372 from(r_dr_ptr[:mw_new_old_dist_displ.size()])")
380 RealType min_dist = std::numeric_limits<RealType>::max();
385 if (
temp_r_[jat] < min_dist && jat != iat)
395 for (
int jat = 0; jat < iat; ++jat)
407 assert(index != iat && index >= 0);
424 const int nupdate = iat;
428 for (
int idim = 0; idim < D; ++idim)
443 const int nupdate = jat;
449 for (
int idim = 0; idim < D; ++idim)
458 for (
int idim = 0; idim < D; ++idim)
465 const std::vector<bool>& from_temp)
override 471 for (
int iw = 0; iw < dt_list.size(); iw++)
void resize(size_type n, Type_t val=Type_t())
Resize the container.
DisplRow temp_dr_mem_
actual memory for temp_dr_
const size_t num_targets_
const RealType * getMultiWalkerTempDataPtr() const override
return multi walker temporary pair distance table data pointer
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
size_t getAlignedSize(size_t n)
return size in T's of allocated aligned memory
size_type capacity() const
return the physical size
std::vector< T, aligned_allocator< T > > aligned_vector
int get_first_neighbor(IndexType iat, RealType &r, PosType &dr, bool newpos) const override
NewTimer & update_timer_
timer for update()
void takebackResource(ResourceHandle< RS > &res_handle)
helper functions for EinsplineSetBuilder
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< DistanceTable > &dt_list) const override
acquire a shared resource from a collection
virtual const PosVectorSoa & getAllParticlePos() const =0
all particle position accessor
const size_t num_targets_padded_
number of targets with padding
void evaluate(ParticleSet &P) override
evaluate the full Distance Table
ResourceHandle manages the temporary resource referenced from a collection.
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< DistanceTable > &dt_list) const override
return a shared resource to a collection
SoA adaptor class for Vector<TinyVector<T,D> >
Timer accumulates time and call counts.
ResourceHandle< DTAAMultiWalkerMem > mw_mem_handle_
const auto & getMultiWalkerRSoADevicePtrs() const
aligned_vector< RealType > memory_pool_
actual memory for dist and displacements_
std::vector< DisplRow > displacements_
displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes de...
A derived classe from DistacneTableData, specialized for dense case.
const std::string name_
name of the table
NewTimer & move_timer_
timer for move()
void createResource(ResourceCollection &collection) const override
initialize a shared resource and hand it to a collection
int old_prepared_elec_id_
set to particle id after move() with prepare_old = true.
DisplRow old_dr_
old displacements
DistRow old_r_
old distances
void attachReference(T *ref, size_type n)
Specialized paritlce class for atomistic simulations.
DistRow temp_r_mem_
actual memory for temp_r_
size_type size() const
return the current size
Vector< RealType, OMPallocator< RealType, PinnedAlignedAllocator< RealType > > > mw_distances_subset
distances from a range of indics to the source.
const RealType * mw_evalDistsInRange(const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list, size_t range_begin, size_t range_end) const override
compute distances from particles in [range_begin, range_end) to all the particles.
size_t get_num_particls_stored() const override
std::unique_ptr< Resource > makeClone() const override
Introduced to handle virtual moves and ratio computations, e.g.
void mw_updatePartial(const RefVectorWithLeader< DistanceTable > &dt_list, IndexType jat, const std::vector< bool > &from_temp) override
walker batched version of updatePartial.
~SoaDistanceTableAAOMPTarget()
void mw_finalizePbyP(const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list) const override
walker batched version of finalizePbyP If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table da...
CASTTYPE & getCastedElement(size_t i) const
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
void move(const ParticleSet &P, const PosType &rnew, const IndexType iat, bool prepare_old) override
evaluate the temporary pair relations
DisplRow old_dr_mem_
actual memory for old_dr_
SoaDistanceTableAAOMPTarget(ParticleSet &target)
AA type of DistanceTable containing storage.
T * data()
return the base
const DynamicCoordinates & getCoordinates() const
Vector< RealType, OMPallocator< RealType, PinnedAlignedAllocator< RealType > > > mw_new_old_dist_displ
dist displ for temporary and old pairs
NewTimer & evaluate_timer_
timer for evaluate()
CASTTYPE & getCastedLeader() const
SoaDistanceTableAAOMPTarget()=delete
QMCTraits::IndexType IndexType
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
multi walker shared memory buffer
DistRow old_r_mem_
actual memory for old_r_
virtual void mw_evaluate(const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list) const
size_t compute_size(int N) const
handle math function mapping inside OpenMP offload regions.
void update(IndexType iat) override
After accepting the iat-th particle, update the iat-th row of distances_ and displacements_.
DTAAMultiWalkerMem(const DTAAMultiWalkerMem &)
ResourceHandle< RS > lendResource()
void updatePartial(IndexType jat, bool from_temp) override
fill partially the distance table by the pair relations from the temporary or old particle position...
void resize(size_type n)
resize myData
void mw_move(const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list, const std::vector< PosType > &rnew_list, const IndexType iat, bool prepare_old=true) const override
evaluate the temporary pair relations when a move is proposed this implementation is asynchronous and...
size_type size() const
return the physical size
void attachReference(size_type n, size_type n_padded, T *ptr)
attach to pre-allocated data
std::vector< DistRow > distances_
distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide ...
whether full table needs to be ready at anytime or not after donePbyP Optimization can be implemented...
whether temporary data set on the host is updated or not when a move is proposed. ...
NewTimer & offload_timer_
timer for offload portion
DTModes modes_
operation modes defined by DTModes
const size_t num_particls_stored
the particle count of the internal stored distances.