32 StorePerParticle(false),
37 app_log() <<
" Setting StructFact::SuperCellEnum=SUPERCELL_SLAB " << std::endl;
71 for (
int iw = 0; iw < sk_list.size(); iw++)
75 const size_t nw = p_list.size();
76 const size_t num_species = p_leader.groups();
77 const auto& kpts_cart = sk_leader.k_lists_.get_kpts_cart_soa();
78 const size_t nk = sk_leader.k_lists_.numk;
79 const size_t nk_padded = kpts_cart.capacity();
83 const size_t np_padded = p_leader.getCoordinates().getAllParticlePos().capacity();
85 constexpr
size_t cplx_stride = 2;
86 mw_mem.
nw_rhok.resize(nw * num_species * cplx_stride, nk_padded);
89 constexpr
size_t kblock_size = 512;
90 const size_t num_kblocks = (nk + kblock_size) / kblock_size;
92 auto* mw_rsoa_ptr = mw_rsoa_dev_ptrs.data();
93 auto* kpts_cart_ptr = kpts_cart.data();
94 auto* mw_rhok_ptr = mw_mem.
nw_rhok.data();
95 auto* group_offsets = p_leader.get_group_offsets().data();
97 PRAGMA_OFFLOAD(
"omp target teams distribute collapse(2) map(always, from : mw_rhok_ptr[:mw_mem.nw_rhok.size()])")
98 for (
int iw = 0; iw < nw; iw++)
99 for (
int ib = 0; ib < num_kblocks; ib++)
101 const size_t offset = ib * kblock_size;
102 const size_t this_block_size =
omptarget::min(kblock_size, nk - offset);
103 const auto* rsoa_ptr = mw_rsoa_ptr[iw];
105 PRAGMA_OFFLOAD(
"omp parallel for")
106 for (
int ik = 0; ik < this_block_size; ik++)
107 for (
int is = 0; is < num_species; is++)
111 for (
int ip = group_offsets[is]; ip < group_offsets[is + 1]; ip++)
114 for (
int idim = 0; idim <
DIM; idim++)
115 phase += kpts_cart_ptr[ik + offset + nk_padded * idim] * rsoa_ptr[ip + idim * np_padded];
121 mw_rhok_ptr[(iw * num_species + is) * cplx_stride * nk_padded + offset + ik] =
rhok_r;
122 mw_rhok_ptr[(iw * num_species + is) * cplx_stride * nk_padded + nk_padded + offset + ik] =
rhok_i;
126 for (
int iw = 0; iw < nw; iw++)
127 for (
int is = 0; is < num_species; is++)
129 std::copy_n(mw_mem.
nw_rhok[(iw * num_species + is) * cplx_stride], nk, sk_list[iw].rhok_r[is]);
130 std::copy_n(mw_mem.
nw_rhok[(iw * num_species + is) * cplx_stride + 1], nk, sk_list[iw].rhok_i[is]);
141 const size_t num_species = P.
groups();
143 resize(nk, num_species, num_ptcls);
150 for (
int i = 0; i < num_ptcls; ++i)
152 const auto& pos = P.
R[i];
153 auto* restrict eikr_r_ptr =
eikr_r[i];
154 auto* restrict eikr_i_ptr =
eikr_i[i];
158 for (
int ki = 0; ki < nk; ki++)
161 rhok_r_ptr[ki] += eikr_r_ptr[ki];
162 rhok_i_ptr[ki] += eikr_i_ptr[ki];
169 for (
int i = 0; i < num_ptcls; ++i)
171 const auto& pos = P.
R[i];
174 #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) 176 for (
int ki = 0; ki < nk; ki++)
185 constexpr
size_t kblock_size = 512;
186 const size_t num_kblocks = (nk + kblock_size) / kblock_size;
187 RealType phiV[kblock_size], eikr_r_temp[kblock_size], eikr_i_temp[kblock_size];
189 for (
int ib = 0; ib < num_kblocks; ib++)
191 const size_t offset = ib * kblock_size;
192 const size_t this_block_size =
std::min(kblock_size, nk - offset);
193 for (
int ki = 0; ki < this_block_size; ki++)
195 eval_e2iphi(this_block_size, phiV, eikr_r_temp, eikr_i_temp);
196 for (
int ki = 0; ki < this_block_size; ki++)
198 rhok_r_ptr[ki + offset] += eikr_r_temp[ki];
199 rhok_i_ptr[ki + offset] += eikr_i_temp[ki];
multi walker shared memory buffer
a class that defines a supercell in D-dimensional Euclean space.
std::vector< PosType > kpts_cart
K-vector in Cartesian coordinates.
Matrix< RealType > rhok_r
2-D container for the phase
helper functions for EinsplineSetBuilder
Matrix< RealType > eikr_i
const KContainer & k_lists_
K-Vector List.
size_t getTotalNum() const
Matrix< RealType > rhok_i
void resize(size_type n, size_type m)
Resize the container.
const auto & getMultiWalkerRSoADevicePtrs() const
static void mw_updateAllPart(const RefVectorWithLeader< StructFact > &sk_list, const RefVectorWithLeader< ParticleSet > &p_list, SKMultiWalkerMem &mw_mem)
Update RhoK for all particles for multiple walkers particles.
int groups() const
return the number of groups
StructFact(const ParticleLayout &lattice, const KContainer &k_lists)
Constructor - copy ParticleSet and init.
CrystalLattice< OHMMS_PRECISION, OHMMS_DIM > lattice
Specialized paritlce class for atomistic simulations.
void computeRhok(const ParticleSet &P)
Compute all rhok elements from the start.
Introduced to handle virtual moves and ratio computations, e.g.
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
bool StorePerParticle
Whether intermediate data is stored per particle.
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Matrix< RealType, OffloadPinnedAllocator< RealType > > nw_rhok
dist displ for temporary and old pairs
int SuperCellEnum
enumeration for the methods to handle mixed bconds
Matrix< RealType > eikr_r
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
Define a LRHandler with two template parameters.
handle math function mapping inside OpenMP offload regions.
void updateAllPart(const ParticleSet &P)
Update Rhok if all particles moved.
NewTimer & update_all_timer_
timer for updateAllPart
void turnOnStorePerParticle(const ParticleSet &P)
switch on the storage per particle if StorePerParticle was false, this function allocates memory and ...
void eval_e2iphi(int n, const T *restrict phi, T *restrict phase_r, T *restrict phase_i)
void resize(int nkpts, int num_species, int num_ptcls)
resize the internal data
int numk
number of k-points
static bool isQuasi2D()
return true if quasi 2D is selected
int getGroupID(int iat) const
return the group id of a given particle in the particle set.