14 #ifndef QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H 15 #define QMCPLUSPLUS_DTDIMPL_AB_OMPTARGET_H 30 template<
typename T,
unsigned D,
int SC>
54 std::unique_ptr<Resource>
makeClone()
const override {
return std::make_unique<DTABMultiWalkerMem>(*this); }
67 const size_t num_padded = getAlignedSize<T>(
num_sources_);
86 size_t count_targets = 0;
87 for (
size_t iw = 0; iw < dt_list.size(); iw++)
91 dt.r_dr_memorypool_.free();
94 const size_t num_sources = dt_leader.num_sources_;
95 const size_t num_padded = getAlignedSize<T>(dt_leader.num_sources_);
96 const size_t stride_size = num_padded * (D + 1);
97 const size_t total_targets = count_targets;
98 auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr;
99 mw_r_dr.resize(total_targets * stride_size);
102 for (
size_t iw = 0; iw < dt_list.size(); iw++)
105 assert(num_sources == dt.num_sources_);
107 dt.distances_.resize(dt.targets());
108 dt.displacements_.resize(dt.targets());
110 for (
int i = 0; i < dt.targets(); ++i)
112 dt.distances_[i].attachReference(mw_r_dr.data() + (i + count_targets) * stride_size, num_sources);
113 dt.displacements_[i].attachReference(num_sources, num_padded,
114 mw_r_dr.data() + (i + count_targets) * stride_size + num_padded);
116 count_targets += dt.targets();
131 if (!coordinates_soa)
132 throw std::runtime_error(
"Source particle set doesn't have OpenMP offload. Contact developers!");
133 PRAGMA_OFFLOAD(
"omp target enter data map(to : this[:1])")
149 auto resource_index = collection.
addResource(std::make_unique<DTABMultiWalkerMem>());
162 for (
size_t iw = 0; iw < dt_list.size(); iw++)
166 dt.displacements_.clear();
187 for (
size_t idim = 0; idim < D; idim++)
196 const int ChunkSizePerTeam = 512;
197 const size_t num_teams = (
num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
202 PRAGMA_OFFLOAD(
"omp target teams distribute collapse(2) num_teams(num_targets_*num_teams) \ 203 map(to: source_pos_ptr[:num_padded*D]) \ 204 map(always, to: target_pos_ptr[:num_targets_*D]) \ 205 map(always, from: r_dr_ptr[:num_targets_*stride_size])")
206 for (
int iat = 0; iat < num_targets_local; ++iat)
207 for (
int team_id = 0; team_id < num_teams; team_id++)
209 const int first = ChunkSizePerTeam * team_id;
210 const int last =
omptarget::min(first + ChunkSizePerTeam, num_sources_local);
213 for (
int idim = 0; idim < D; idim++)
214 pos[idim] = target_pos_ptr[iat * D + idim];
216 auto* r_iat_ptr = r_dr_ptr + iat * stride_size;
217 auto* dr_iat_ptr = r_iat_ptr + num_padded;
219 PRAGMA_OFFLOAD(
"omp parallel for")
220 for (
int iel = first; iel < last; iel++)
235 const size_t nw = dt_list.size();
237 auto& mw_r_dr = mw_mem.
mw_r_dr;
239 size_t count_targets = 0;
241 count_targets += p.getTotalNum();
242 const size_t total_targets = count_targets;
249 for (
size_t iw = 0; iw < dt_list.size(); iw++)
253 for (
int i = 0; i < dt.targets(); ++i)
255 assert(dt.distances_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size);
256 assert(dt.displacements_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size + num_padded);
258 count_targets += dt.targets();
263 const size_t realtype_size =
sizeof(
RealType);
264 const size_t int_size =
sizeof(int);
265 const size_t ptr_size =
sizeof(
RealType*);
267 offload_input.resize(total_targets * D * realtype_size + total_targets * int_size + nw * ptr_size);
268 auto source_ptrs =
reinterpret_cast<RealType**
>(offload_input.data());
269 auto target_positions =
reinterpret_cast<RealType*
>(offload_input.data() + ptr_size * nw);
271 reinterpret_cast<int*
>(offload_input.data() + ptr_size * nw + total_targets * D * realtype_size);
274 for (
size_t iw = 0; iw < nw; iw++)
279 assert(dt.targets() ==
pset.getTotalNum());
283 source_ptrs[iw] =
const_cast<RealType*
>(RSoA_OMPTarget.getDevicePtr());
285 for (
size_t iat = 0; iat <
pset.getTotalNum(); ++iat, ++count_targets)
287 walker_id_ptr[count_targets] = iw;
288 for (
size_t idim = 0; idim < D; idim++)
289 target_positions[count_targets * D + idim] =
pset.R[iat][idim];
294 const int ChunkSizePerTeam = 512;
295 const size_t num_teams = (
num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
297 auto* r_dr_ptr = mw_r_dr.data();
298 auto* input_ptr = offload_input.data();
303 PRAGMA_OFFLOAD(
"omp target teams distribute collapse(2) num_teams(total_targets*num_teams) \ 304 map(always, to: input_ptr[:offload_input.size()]) \ 305 depend(out:r_dr_ptr[:mw_r_dr.size()])")
306 for (
int iat = 0; iat < total_targets; ++iat)
307 for (
int team_id = 0; team_id < num_teams; team_id++)
309 auto* target_pos_ptr =
reinterpret_cast<RealType*
>(input_ptr + ptr_size * nw);
310 const int walker_id =
311 reinterpret_cast<int*
>(input_ptr + ptr_size * nw + total_targets * D * realtype_size)[iat];
312 auto* source_pos_ptr =
reinterpret_cast<RealType**
>(input_ptr)[walker_id];
313 auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1);
314 auto* dr_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1) + num_padded;
316 const int first = ChunkSizePerTeam * team_id;
317 const int last =
omptarget::min(first + ChunkSizePerTeam, num_sources_local);
320 for (
int idim = 0; idim < D; idim++)
321 pos[idim] = target_pos_ptr[iat * D + idim];
323 PRAGMA_OFFLOAD(
"omp parallel for")
324 for (
int iel = first; iel < last; iel++)
332 "omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait")
336 PRAGMA_OFFLOAD(
"omp taskwait")
342 const std::vector<bool>& recompute)
const override 365 for (
int idim = 0; idim < D; ++idim)
371 RealType min_dist = std::numeric_limits<RealType>::max();
void resize(size_type n, Type_t val=Type_t())
Resize the container.
static void associateResource(const RefVectorWithLeader< DistanceTable > &dt_list)
QMCTraits::RealType RealType
const size_t num_targets_
NewTimer & evaluate_timer_
timer for evaluate()
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
NewTimer & offload_timer_
timer for offload portion
void takebackResource(ResourceHandle< RS > &res_handle)
helper functions for EinsplineSetBuilder
void evaluate(ParticleSet &P) override
evaluate the full table
multi walker shared memory buffer
DTABMultiWalkerMem(const DTABMultiWalkerMem &)
virtual const PosVectorSoa & getAllParticlePos() const =0
all particle position accessor
ResourceHandle manages the temporary resource referenced from a collection.
~SoaDistanceTableABOMPTarget()
OffloadPinnedVector< T > mw_r_dr
accelerator output array for multiple walkers, [1+D][num_targets_][num_padded] (distances, displacements)
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< DistanceTable > &dt_list) const override
return a shared resource to a collection
void createResource(ResourceCollection &collection) const override
initialize a shared resource and hand it to a collection
Timer accumulates time and call counts.
OffloadPinnedVector< RealType > r_dr_memorypool_
accelerator output buffer for r and dr
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< DistanceTable > &dt_list) const override
acquire a shared resource from a collection
void update(IndexType iat) override
update the stripe for jat-th particle
const std::string name_
name of the table
Specialized paritlce class for atomistic simulations.
const T * getMultiWalkerDataPtr() const override
return multi-walker full (all pairs) distance table data pointer
int get_first_neighbor(IndexType iat, RealType &r, PosType &dr, bool newpos) const override
AB type of DistanceTable containing storage.
size_t targets() const
returns the number of centers
Introduced to handle virtual moves and ratio computations, e.g.
CASTTYPE & getCastedElement(size_t i) const
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
T * data()
return the base
std::unique_ptr< Resource > makeClone() const override
const DynamicCoordinates & getCoordinates() const
CASTTYPE & getCastedLeader() const
whether full table needs to be ready at anytime or not during PbyP Optimization can be implemented du...
size_t getPerTargetPctlStrideSize() const override
return stride of per target pctl data. full table data = stride * num of target particles ...
QMCTraits::IndexType IndexType
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
A derived classe from DistacneTableData, specialized for AB using a transposed form.
ResourceHandle< DTABMultiWalkerMem > mw_mem_handle_
OffloadPinnedVector< T > target_pos
accelerator input array for a list of target particle positions, num_targets_ x D ...
void mw_recompute(const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list, const std::vector< bool > &recompute) const override
recompute multi walker internal data, recompute
SoaDistanceTableABOMPTarget()=delete
SoaDistanceTableABOMPTarget(const ParticleSet &source, ParticleSet &target)
handle math function mapping inside OpenMP offload regions.
ResourceHandle< RS > lendResource()
std::vector< DisplRow > displacements_
displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] - r_A1[i] Note: Derived classes de...
OffloadPinnedVector< char > offload_input
accelerator input buffer for multiple data set
void resize(size_type n)
resize myData
const ParticleSet & origin_
void mw_evaluate(const RefVectorWithLeader< DistanceTable > &dt_list, const RefVectorWithLeader< ParticleSet > &p_list) const override
NewTimer & move_timer_
timer for move()
NewTimer & update_timer_
timer for update()
void move(const ParticleSet &P, const PosType &rnew, const IndexType iat, bool prepare_old) override
evaluate the temporary pair relations
std::vector< DistRow > distances_
distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]| Note: Derived classes decide ...
DTModes modes_
operation modes defined by DTModes
skip data transfer back to host after mw_evalaute full distance table.
const size_t num_sources_