QMCPACK
ParticleBConds3DSoa.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
8 // Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
9 //
10 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
11 //////////////////////////////////////////////////////////////////////////////////////
12 // -*- C++ -*-
13 #ifndef QMCPLUSPLUS_PARTICLE_BCONDS_3D_SOA_H
14 #define QMCPLUSPLUS_PARTICLE_BCONDS_3D_SOA_H
15 
16 #include <config.h>
17 #include <algorithm>
18 #include "CrystalLattice.h"
19 #include "ParticleBConds.h"
20 
21 namespace qmcplusplus
22 {
23 /** specialization for an open 3D
24 */
25 template<class T>
27 {
28  /** constructor: doing nothing */
29  inline DTD_BConds(const CrystalLattice<T, 3>& lat) {}
30 
31  template<typename PT, typename RSOA, typename DISPLSOA>
32  void computeDistances(const PT& pos,
33  const RSOA& R0,
34  T* restrict temp_r,
35  DISPLSOA& temp_dr,
36  int first,
37  int last,
38  int flip_ind = 0) const
39  {
40  const T x0 = pos[0];
41  const T y0 = pos[1];
42  const T z0 = pos[2];
43  const T* restrict px = R0.data(0);
44  const T* restrict py = R0.data(1);
45  const T* restrict pz = R0.data(2);
46  T* restrict dx = temp_dr.data(0);
47  T* restrict dy = temp_dr.data(1);
48  T* restrict dz = temp_dr.data(2);
49 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
50  for (int iat = first; iat < last; ++iat)
51  {
52  dx[iat] = px[iat] - x0;
53  dy[iat] = py[iat] - y0;
54  dz[iat] = pz[iat] - z0;
55  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
56  }
57  }
58 
59  void computeDistancesOffload(const T pos[3],
60  const T* restrict R0,
61  int r0_stride,
62  T* restrict temp_r,
63  T* restrict temp_dr,
64  int padded_size,
65  int iat,
66  int flip_ind = 0) const
67  {
68  const T x0 = pos[0];
69  const T y0 = pos[1];
70  const T z0 = pos[2];
71 
72  const T* restrict px = R0;
73  const T* restrict py = R0 + r0_stride;
74  const T* restrict pz = R0 + r0_stride * 2;
75 
76  T* restrict dx = temp_dr;
77  T* restrict dy = temp_dr + padded_size;
78  T* restrict dz = temp_dr + padded_size * 2;
79 
80  dx[iat] = px[iat] - x0;
81  dy[iat] = py[iat] - y0;
82  dz[iat] = pz[iat] - z0;
83  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
84  }
85 
86  T computeDist(T dx, T dy, T dz) const
87  {
88  return std::sqrt(dx * dx + dy * dy + dz * dz);
89  }
90 };
91 
92 /** specialization for a periodic 3D, orthorombic cell
93 */
94 template<class T>
95 struct DTD_BConds<T, 3, PPPO + SOA_OFFSET>
96 {
97  T Linv0, L0, Linv1, L1, Linv2, L2, r2max, dummy;
98 
99  inline DTD_BConds(const CrystalLattice<T, 3>& lat)
100  : Linv0(lat.OneOverLength[0]),
101  L0(lat.Length[0]),
102  Linv1(lat.OneOverLength[1]),
103  L1(lat.Length[1]),
104  Linv2(lat.OneOverLength[2]),
105  L2(lat.Length[2]),
106  r2max(lat.CellRadiusSq),
107  dummy(T())
108  {}
109 
110  template<typename PT, typename RSOA, typename DISPLSOA>
111  void computeDistances(const PT& pos,
112  const RSOA& R0,
113  T* restrict temp_r,
114  DISPLSOA& temp_dr,
115  int first,
116  int last,
117  int flip_ind = 0) const
118  {
119  const T x0 = pos[0];
120  const T y0 = pos[1];
121  const T z0 = pos[2];
122  const T* restrict px = R0.data(0);
123  const T* restrict py = R0.data(1);
124  const T* restrict pz = R0.data(2);
125  T* restrict dx = temp_dr.data(0);
126  T* restrict dy = temp_dr.data(1);
127  T* restrict dz = temp_dr.data(2);
128 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
129  for (int iat = first; iat < last; ++iat)
130  {
131  const T x = (px[iat] - x0) * Linv0;
132  const T y = (py[iat] - y0) * Linv1;
133  const T z = (pz[iat] - z0) * Linv2;
134  dx[iat] = L0 * (x - round(x));
135  dy[iat] = L1 * (y - round(y));
136  dz[iat] = L2 * (z - round(z));
137  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
138  }
139  }
140 
141  void computeDistancesOffload(const T pos[3],
142  const T* restrict R0,
143  int r0_stride,
144  T* restrict temp_r,
145  T* restrict temp_dr,
146  int padded_size,
147  int iat,
148  int flip_ind = 0) const
149  {
150  const T x0 = pos[0];
151  const T y0 = pos[1];
152  const T z0 = pos[2];
153 
154  const T* restrict px = R0;
155  const T* restrict py = R0 + r0_stride;
156  const T* restrict pz = R0 + r0_stride * 2;
157 
158  T* restrict dx = temp_dr;
159  T* restrict dy = temp_dr + padded_size;
160  T* restrict dz = temp_dr + padded_size * 2;
161 
162  const T x = (px[iat] - x0) * Linv0;
163  const T y = (py[iat] - y0) * Linv1;
164  const T z = (pz[iat] - z0) * Linv2;
165  dx[iat] = L0 * (x - round(x));
166  dy[iat] = L1 * (y - round(y));
167  dz[iat] = L2 * (z - round(z));
168  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
169  }
170 
171  T computeDist(T dx, T dy, T dz) const
172  {
173  T x = dx * Linv0;
174  T y = dy * Linv1;
175  T z = dz * Linv2;
176  dx = L0 * (x - round(x));
177  dy = L1 * (y - round(y));
178  dz = L2 * (z - round(z));
179  return std::sqrt(dx * dx + dy * dy + dz * dz);
180  }
181 };
182 
183 /** specialization for a periodic 3D general cell with wigner-seitz==simulation cell
184  *
185  * Skip image cells.
186  */
187 template<class T>
188 struct DTD_BConds<T, 3, PPPS + SOA_OFFSET>
189 {
190  T r00, r10, r20, r01, r11, r21, r02, r12, r22;
191  T g00, g10, g20, g01, g11, g21, g02, g12, g22;
193  : r00(lat.R(0)),
194  r10(lat.R(3)),
195  r20(lat.R(6)),
196  r01(lat.R(1)),
197  r11(lat.R(4)),
198  r21(lat.R(7)),
199  r02(lat.R(2)),
200  r12(lat.R(5)),
201  r22(lat.R(8)),
202  g00(lat.G(0)),
203  g10(lat.G(3)),
204  g20(lat.G(6)),
205  g01(lat.G(1)),
206  g11(lat.G(4)),
207  g21(lat.G(7)),
208  g02(lat.G(2)),
209  g12(lat.G(5)),
210  g22(lat.G(8))
211  {}
212 
213  template<typename PT, typename RSOA, typename DISPLSOA>
214  void computeDistances(const PT& pos,
215  const RSOA& R0,
216  T* restrict temp_r,
217  DISPLSOA& temp_dr,
218  int first,
219  int last,
220  int flip_ind = 0) const
221  {
222  const T x0 = pos[0];
223  const T y0 = pos[1];
224  const T z0 = pos[2];
225 
226  const T* restrict px = R0.data(0);
227  const T* restrict py = R0.data(1);
228  const T* restrict pz = R0.data(2);
229 
230  T* restrict dx = temp_dr.data(0);
231  T* restrict dy = temp_dr.data(1);
232  T* restrict dz = temp_dr.data(2);
233 
234 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
235  for (int iat = first; iat < last; ++iat)
236  {
237  T displ_0 = px[iat] - x0;
238  T displ_1 = py[iat] - y0;
239  T displ_2 = pz[iat] - z0;
240 
241  T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20;
242  T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21;
243  T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22;
244 
245  //put them in the box
246  ar_0 -= round(ar_0);
247  ar_1 -= round(ar_1);
248  ar_2 -= round(ar_2);
249 
250  //unit2cart
251  dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20;
252  dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21;
253  dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22;
254 
255  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
256  }
257  }
258 
259  void computeDistancesOffload(const T pos[3],
260  const T* restrict R0,
261  int r0_stride,
262  T* restrict temp_r,
263  T* restrict temp_dr,
264  int padded_size,
265  int iat,
266  int flip_ind = 0) const
267  {
268  const T x0 = pos[0];
269  const T y0 = pos[1];
270  const T z0 = pos[2];
271 
272  const T* restrict px = R0;
273  const T* restrict py = R0 + r0_stride;
274  const T* restrict pz = R0 + r0_stride * 2;
275 
276  T* restrict dx = temp_dr;
277  T* restrict dy = temp_dr + padded_size;
278  T* restrict dz = temp_dr + padded_size * 2;
279 
280  T displ_0 = px[iat] - x0;
281  T displ_1 = py[iat] - y0;
282  T displ_2 = pz[iat] - z0;
283 
284  T ar_0 = displ_0 * g00 + displ_1 * g10 + displ_2 * g20;
285  T ar_1 = displ_0 * g01 + displ_1 * g11 + displ_2 * g21;
286  T ar_2 = displ_0 * g02 + displ_1 * g12 + displ_2 * g22;
287 
288  //put them in the box
289  ar_0 -= round(ar_0);
290  ar_1 -= round(ar_1);
291  ar_2 -= round(ar_2);
292 
293  //unit2cart
294  dx[iat] = ar_0 * r00 + ar_1 * r10 + ar_2 * r20;
295  dy[iat] = ar_0 * r01 + ar_1 * r11 + ar_2 * r21;
296  dz[iat] = ar_0 * r02 + ar_1 * r12 + ar_2 * r22;
297 
298  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
299  }
300 
301  T computeDist(T dx, T dy, T dz) const
302  {
303  T ar_0 = dx * g00 + dy * g10 + dz * g20;
304  T ar_1 = dx * g01 + dy * g11 + dz * g21;
305  T ar_2 = dx * g02 + dy * g12 + dz * g22;
306 
307  //put them in the box
308  ar_0 -= round(ar_0);
309  ar_1 -= round(ar_1);
310  ar_2 -= round(ar_2);
311 
312  //unit2cart
313  dx = ar_0 * r00 + ar_1 * r10 + ar_2 * r20;
314  dy = ar_0 * r01 + ar_1 * r11 + ar_2 * r21;
315  dz = ar_0 * r02 + ar_1 * r12 + ar_2 * r22;
316 
317  return std::sqrt(dx * dx + dy * dy + dz * dz);
318  }
319 };
320 
321 
322 /** specialization for a periodic 3D general cell
323  *
324  * Wigner-Seitz cell radius > simulation cell radius
325  * Need to check image cells
326 */
327 template<class T>
328 struct DTD_BConds<T, 3, PPPG + SOA_OFFSET>
329 {
330  T g00, g10, g20, g01, g11, g21, g02, g12, g22;
331  T r00, r10, r20, r01, r11, r21, r02, r12, r22;
333 
335  {
337  rb[0] = lat.a(0);
338  rb[1] = lat.a(1);
339  rb[2] = lat.a(2);
340  find_reduced_basis(rb);
341 
342  r00 = rb[0][0];
343  r10 = rb[1][0];
344  r20 = rb[2][0];
345  r01 = rb[0][1];
346  r11 = rb[1][1];
347  r21 = rb[2][1];
348  r02 = rb[0][2];
349  r12 = rb[1][2];
350  r22 = rb[2][2];
351 
352  Tensor<T, 3> rbt;
353  for (int i = 0; i < 3; ++i)
354  for (int j = 0; j < 3; ++j)
355  rbt(i, j) = rb[i][j];
356  Tensor<T, 3> g = inverse(rbt);
357 
358  g00 = g(0);
359  g10 = g(3);
360  g20 = g(6);
361  g01 = g(1);
362  g11 = g(4);
363  g21 = g(7);
364  g02 = g(2);
365  g12 = g(5);
366  g22 = g(8);
367 
368  constexpr T minusone(-1);
369  constexpr T zero(0);
370 
371  for (int idim = 0; idim < 3; idim++)
372  {
373  auto& corners_dim = corners[idim];
374 
375  corners_dim[0] = zero;
376  corners_dim[1] = minusone * (rb[0][idim]);
377  corners_dim[2] = minusone * (rb[1][idim]);
378  corners_dim[3] = minusone * (rb[2][idim]);
379  corners_dim[4] = minusone * (rb[0][idim] + rb[1][idim]);
380  corners_dim[5] = minusone * (rb[0][idim] + rb[2][idim]);
381  corners_dim[6] = minusone * (rb[1][idim] + rb[2][idim]);
382  corners_dim[7] = minusone * (rb[0][idim] + rb[1][idim] + rb[2][idim]);
383  }
384  }
385 
386  template<typename PT, typename RSOA, typename DISPLSOA>
387  void computeDistances(const PT& pos,
388  const RSOA& R0,
389  T* restrict temp_r,
390  DISPLSOA& temp_dr,
391  int first,
392  int last,
393  int flip_ind = 0) const
394  {
395  const T x0 = pos[0];
396  const T y0 = pos[1];
397  const T z0 = pos[2];
398 
399  const T* restrict px = R0.data(0);
400  const T* restrict py = R0.data(1);
401  const T* restrict pz = R0.data(2);
402 
403  T* restrict dx = temp_dr.data(0);
404  T* restrict dy = temp_dr.data(1);
405  T* restrict dz = temp_dr.data(2);
406 
407  const auto& cellx = corners[0];
408  const auto& celly = corners[1];
409  const auto& cellz = corners[2];
410 
411  constexpr T minusone(-1);
412  constexpr T one(1);
413 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
414  for (int iat = first; iat < last; ++iat)
415  {
416  const T flip = iat < flip_ind ? one : minusone;
417  const T displ_0 = (px[iat] - x0) * flip;
418  const T displ_1 = (py[iat] - y0) * flip;
419  const T displ_2 = (pz[iat] - z0) * flip;
420 
421  const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20);
422  const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21);
423  const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22);
424 
425  const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20;
426  const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21;
427  const T delz = displ_2 + ar_0 * r02 + ar_1 * r12 + ar_2 * r22;
428 
429  T rmin = delx * delx + dely * dely + delz * delz;
430  int ic = 0;
431 #pragma unroll(7)
432  for (int c = 1; c < 8; ++c)
433  {
434  const T x = delx + cellx[c];
435  const T y = dely + celly[c];
436  const T z = delz + cellz[c];
437  const T r2 = x * x + y * y + z * z;
438  ic = (r2 < rmin) ? c : ic;
439  rmin = (r2 < rmin) ? r2 : rmin;
440  }
441 
442  temp_r[iat] = std::sqrt(rmin);
443  dx[iat] = flip * (delx + cellx[ic]);
444  dy[iat] = flip * (dely + celly[ic]);
445  dz[iat] = flip * (delz + cellz[ic]);
446  }
447  }
448 
449  void computeDistancesOffload(const T pos[3],
450  const T* restrict R0,
451  int r0_stride,
452  T* restrict temp_r,
453  T* restrict temp_dr,
454  int padded_size,
455  int iat,
456  int flip_ind = 0) const
457  {
458  const T x0 = pos[0];
459  const T y0 = pos[1];
460  const T z0 = pos[2];
461 
462  const T* restrict px = R0;
463  const T* restrict py = R0 + r0_stride;
464  const T* restrict pz = R0 + r0_stride * 2;
465 
466  T* restrict dx = temp_dr;
467  T* restrict dy = temp_dr + padded_size;
468  T* restrict dz = temp_dr + padded_size * 2;
469 
470  const auto& cellx = corners[0];
471  const auto& celly = corners[1];
472  const auto& cellz = corners[2];
473 
474  constexpr T minusone(-1);
475  constexpr T one(1);
476 
477  const T flip = iat < flip_ind ? one : minusone;
478  const T displ_0 = (px[iat] - x0) * flip;
479  const T displ_1 = (py[iat] - y0) * flip;
480  const T displ_2 = (pz[iat] - z0) * flip;
481 
482  const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10 + displ_2 * g20);
483  const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11 + displ_2 * g21);
484  const T ar_2 = -std::floor(displ_0 * g02 + displ_1 * g12 + displ_2 * g22);
485 
486  const T delx = displ_0 + ar_0 * r00 + ar_1 * r10 + ar_2 * r20;
487  const T dely = displ_1 + ar_0 * r01 + ar_1 * r11 + ar_2 * r21;
488  const T delz = displ_2 + ar_0 * r02 + ar_1 * r12 + ar_2 * r22;
489 
490  T rmin = delx * delx + dely * dely + delz * delz;
491  int ic = 0;
492 #pragma unroll(7)
493  for (int c = 1; c < 8; ++c)
494  {
495  const T x = delx + cellx[c];
496  const T y = dely + celly[c];
497  const T z = delz + cellz[c];
498  const T r2 = x * x + y * y + z * z;
499  ic = (r2 < rmin) ? c : ic;
500  rmin = (r2 < rmin) ? r2 : rmin;
501  }
502 
503  temp_r[iat] = std::sqrt(rmin);
504  dx[iat] = flip * (delx + cellx[ic]);
505  dy[iat] = flip * (dely + celly[ic]);
506  dz[iat] = flip * (delz + cellz[ic]);
507  }
508 
509  T computeDist(T dx, T dy, T dz) const
510  {
511  const auto& cellx = corners[0];
512  const auto& celly = corners[1];
513  const auto& cellz = corners[2];
514 
515  const T ar_0 = -std::floor(dx * g00 + dy * g10 + dz * g20);
516  const T ar_1 = -std::floor(dx * g01 + dy * g11 + dz * g21);
517  const T ar_2 = -std::floor(dx * g02 + dy * g12 + dz * g22);
518 
519  const T delx = dx + ar_0 * r00 + ar_1 * r10 + ar_2 * r20;
520  const T dely = dy + ar_0 * r01 + ar_1 * r11 + ar_2 * r21;
521  const T delz = dz + ar_0 * r02 + ar_1 * r12 + ar_2 * r22;
522 
523  T rmin = delx * delx + dely * dely + delz * delz;
524 #pragma unroll(7)
525  for (int c = 1; c < 8; ++c)
526  {
527  const T x = delx + cellx[c];
528  const T y = dely + celly[c];
529  const T z = delz + cellz[c];
530  const T r2 = x * x + y * y + z * z;
531  rmin = (r2 < rmin) ? r2 : rmin;
532  }
533 
534  return std::sqrt(rmin);
535  }
536 };
537 
538 
539 /** specialization for a slab, general cell
540 */
541 template<class T>
542 struct DTD_BConds<T, 3, PPNG + SOA_OFFSET>
543 {
544  T g00, g10, g01, g11;
545  T r00, r10, r01, r11;
548 
550  {
551  rb[0] = lat.a(0);
552  rb[1] = lat.a(1);
553  rb[2] = lat.a(2); //rb[2]=0.0;
554  r00 = rb[0][0];
555  r10 = rb[1][0];
556  r01 = rb[0][1];
557  r11 = rb[1][1];
558  g00 = lat.G(0);
559  g10 = lat.G(3);
560  g01 = lat.G(1);
561  g11 = lat.G(4);
562 
563  T minusone = -1.0;
564  for (int idim = 0; idim < 2; idim++)
565  {
566  auto& corners_dim = corners[idim];
567 
568  corners_dim[0] = T(0);
569  corners_dim[1] = minusone * (rb[0][idim]);
570  corners_dim[2] = minusone * (rb[1][idim]);
571  corners_dim[3] = minusone * (rb[0][idim] + rb[1][idim]);
572  }
573  }
574 
575  template<typename PT, typename RSOA, typename DISPLSOA>
576  void computeDistances(const PT& pos,
577  const RSOA& R0,
578  T* restrict temp_r,
579  DISPLSOA& temp_dr,
580  int first,
581  int last,
582  int flip_ind = 0) const
583  {
584  const T x0 = pos[0];
585  const T y0 = pos[1];
586  const T z0 = pos[2];
587 
588  const T* restrict px = R0.data(0);
589  const T* restrict py = R0.data(1);
590  const T* restrict pz = R0.data(2);
591 
592  T* restrict dx = temp_dr.data(0);
593  T* restrict dy = temp_dr.data(1);
594  T* restrict dz = temp_dr.data(2);
595 
596  const auto& cellx = corners[0];
597  const auto& celly = corners[1];
598 
599  constexpr T minusone(-1);
600  constexpr T one(1);
601 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
602  for (int iat = first; iat < last; ++iat)
603  {
604  const T flip = iat < flip_ind ? one : minusone;
605  const T displ_0 = (px[iat] - x0) * flip;
606  const T displ_1 = (py[iat] - y0) * flip;
607  const T delz = pz[iat] - z0;
608 
609  const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10);
610  const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11);
611 
612  const T delx = displ_0 + ar_0 * r00 + ar_1 * r10;
613  const T dely = displ_1 + ar_0 * r01 + ar_1 * r11;
614 
615  T rmin = delx * delx + dely * dely;
616  int ic = 0;
617 #pragma unroll(3)
618  for (int c = 1; c < 4; ++c)
619  {
620  const T x = delx + cellx[c];
621  const T y = dely + celly[c];
622  const T r2 = x * x + y * y;
623  ic = (r2 < rmin) ? c : ic;
624  rmin = (r2 < rmin) ? r2 : rmin;
625  }
626 
627  temp_r[iat] = std::sqrt(rmin + delz * delz);
628  dx[iat] = flip * (delx + cellx[ic]);
629  dy[iat] = flip * (dely + celly[ic]);
630  dz[iat] = delz;
631  }
632  }
633 
634  void computeDistancesOffload(const T pos[3],
635  const T* restrict R0,
636  int r0_stride,
637  T* restrict temp_r,
638  T* restrict temp_dr,
639  int padded_size,
640  int iat,
641  int flip_ind = 0) const
642  {
643  const T x0 = pos[0];
644  const T y0 = pos[1];
645  const T z0 = pos[2];
646 
647  const T* restrict px = R0;
648  const T* restrict py = R0 + r0_stride;
649  const T* restrict pz = R0 + r0_stride * 2;
650 
651  T* restrict dx = temp_dr;
652  T* restrict dy = temp_dr + padded_size;
653  T* restrict dz = temp_dr + padded_size * 2;
654 
655  const auto& cellx = corners[0];
656  const auto& celly = corners[1];
657 
658  constexpr T minusone(-1);
659  constexpr T one(1);
660 
661  const T flip = iat < flip_ind ? one : minusone;
662  const T displ_0 = (px[iat] - x0) * flip;
663  const T displ_1 = (py[iat] - y0) * flip;
664  const T delz = pz[iat] - z0;
665 
666  const T ar_0 = -std::floor(displ_0 * g00 + displ_1 * g10);
667  const T ar_1 = -std::floor(displ_0 * g01 + displ_1 * g11);
668 
669  const T delx = displ_0 + ar_0 * r00 + ar_1 * r10;
670  const T dely = displ_1 + ar_0 * r01 + ar_1 * r11;
671 
672  T rmin = delx * delx + dely * dely;
673  int ic = 0;
674 #pragma unroll(3)
675  for (int c = 1; c < 4; ++c)
676  {
677  const T x = delx + cellx[c];
678  const T y = dely + celly[c];
679  const T r2 = x * x + y * y;
680  ic = (r2 < rmin) ? c : ic;
681  rmin = (r2 < rmin) ? r2 : rmin;
682  }
683 
684  temp_r[iat] = std::sqrt(rmin + delz * delz);
685  dx[iat] = flip * (delx + cellx[ic]);
686  dy[iat] = flip * (dely + celly[ic]);
687  dz[iat] = delz;
688  }
689 
690  T computeDist(T dx, T dy, T dz) const
691  {
692  const auto& cellx = corners[0];
693  const auto& celly = corners[1];
694 
695  const T ar_0 = -std::floor(dx * g00 + dy * g10);
696  const T ar_1 = -std::floor(dx * g01 + dy * g11);
697 
698  const T delx = dx + ar_0 * r00 + ar_1 * r10;
699  const T dely = dy + ar_0 * r01 + ar_1 * r11;
700 
701  T rmin = delx * delx + dely * dely;
702 #pragma unroll(3)
703  for (int c = 1; c < 4; ++c)
704  {
705  const T x = delx + cellx[c];
706  const T y = dely + celly[c];
707  const T r2 = x * x + y * y;
708  rmin = (r2 < rmin) ? r2 : rmin;
709  }
710 
711  return std::sqrt(rmin + dz * dz);
712  }
713 };
714 
715 /** specialization for a slab, orthorombic cell
716 */
717 template<class T>
718 struct DTD_BConds<T, 3, PPNO + SOA_OFFSET>
719 {
720  T Linv0, L0, Linv1, L1;
721 
722  inline DTD_BConds(const CrystalLattice<T, 3>& lat)
723  : Linv0(lat.OneOverLength[0]), L0(lat.Length[0]), Linv1(lat.OneOverLength[1]), L1(lat.Length[1])
724  {}
725 
726  template<typename PT, typename RSOA, typename DISPLSOA>
727  void computeDistances(const PT& pos,
728  const RSOA& R0,
729  T* restrict temp_r,
730  DISPLSOA& temp_dr,
731  int first,
732  int last,
733  int flip_ind = 0) const
734  {
735  const T x0 = pos[0];
736  const T y0 = pos[1];
737  const T z0 = pos[2];
738  const T* restrict px = R0.data(0);
739  const T* restrict py = R0.data(1);
740  const T* restrict pz = R0.data(2);
741  T* restrict dx = temp_dr.data(0);
742  T* restrict dy = temp_dr.data(1);
743  T* restrict dz = temp_dr.data(2);
744 
745 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
746  for (int iat = first; iat < last; ++iat)
747  {
748  T x = (px[iat] - x0) * Linv0;
749  dx[iat] = L0 * (x - round(x));
750  T y = (py[iat] - y0) * Linv1;
751  dy[iat] = L1 * (y - round(y));
752  dz[iat] = pz[iat] - z0;
753  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
754  }
755  }
756 
757  void computeDistancesOffload(const T pos[3],
758  const T* restrict R0,
759  int r0_stride,
760  T* restrict temp_r,
761  T* restrict temp_dr,
762  int padded_size,
763  int iat,
764  int flip_ind = 0) const
765  {
766  const T x0 = pos[0];
767  const T y0 = pos[1];
768  const T z0 = pos[2];
769 
770  const T* restrict px = R0;
771  const T* restrict py = R0 + r0_stride;
772  const T* restrict pz = R0 + r0_stride * 2;
773 
774  T* restrict dx = temp_dr;
775  T* restrict dy = temp_dr + padded_size;
776  T* restrict dz = temp_dr + padded_size * 2;
777 
778  T x = (px[iat] - x0) * Linv0;
779  dx[iat] = L0 * (x - round(x));
780  T y = (py[iat] - y0) * Linv1;
781  dy[iat] = L1 * (y - round(y));
782  dz[iat] = pz[iat] - z0;
783  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
784  }
785 
786  T computeDist(T dx, T dy, T dz) const
787  {
788  T x = dx * Linv0;
789  T y = dy * Linv1;
790  dx = L0 * (x - round(x));
791  dy = L1 * (y - round(y));
792  return std::sqrt(dx * dx + dy * dy + dz * dz);
793  }
794 };
795 
796 /** specialization for a slab, general cell
797 */
798 template<class T>
799 struct DTD_BConds<T, 3, PPNS + SOA_OFFSET>
800 {
801  T r00, r10, r01, r11;
802  T g00, g10, g01, g11;
804  : r00(lat.R(0)),
805  r10(lat.R(3)),
806  r01(lat.R(1)),
807  r11(lat.R(4)),
808  g00(lat.G(0)),
809  g10(lat.G(3)),
810  g01(lat.G(1)),
811  g11(lat.G(4))
812  {}
813 
814  template<typename PT, typename RSOA, typename DISPLSOA>
815  void computeDistances(const PT& pos,
816  const RSOA& R0,
817  T* restrict temp_r,
818  DISPLSOA& temp_dr,
819  int first,
820  int last,
821  int flip_ind = 0) const
822  {
823  const T x0 = pos[0];
824  const T y0 = pos[1];
825  const T z0 = pos[2];
826 
827  const T* restrict px = R0.data(0);
828  const T* restrict py = R0.data(1);
829  const T* restrict pz = R0.data(2);
830 
831  T* restrict dx = temp_dr.data(0);
832  T* restrict dy = temp_dr.data(1);
833  T* restrict dz = temp_dr.data(2);
834 
835 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
836  for (int iat = first; iat < last; ++iat)
837  {
838  T displ_0 = px[iat] - x0;
839  T displ_1 = py[iat] - y0;
840 
841  T ar_0 = displ_0 * g00 + displ_1 * g10;
842  T ar_1 = displ_0 * g01 + displ_1 * g11;
843 
844  //put them in the box
845  ar_0 -= round(ar_0);
846  ar_1 -= round(ar_1);
847 
848  //unit2cart
849  dx[iat] = ar_0 * r00 + ar_1 * r10;
850  dy[iat] = ar_0 * r01 + ar_1 * r11;
851  dz[iat] = pz[iat] - z0;
852 
853  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
854  }
855  }
856 
857  void computeDistancesOffload(const T pos[3],
858  const T* restrict R0,
859  int r0_stride,
860  T* restrict temp_r,
861  T* restrict temp_dr,
862  int padded_size,
863  int iat,
864  int flip_ind = 0) const
865  {
866  const T x0 = pos[0];
867  const T y0 = pos[1];
868  const T z0 = pos[2];
869 
870  const T* restrict px = R0;
871  const T* restrict py = R0 + r0_stride;
872  const T* restrict pz = R0 + r0_stride * 2;
873 
874  T* restrict dx = temp_dr;
875  T* restrict dy = temp_dr + padded_size;
876  T* restrict dz = temp_dr + padded_size * 2;
877 
878  T displ_0 = px[iat] - x0;
879  T displ_1 = py[iat] - y0;
880 
881  T ar_0 = displ_0 * g00 + displ_1 * g10;
882  T ar_1 = displ_0 * g01 + displ_1 * g11;
883 
884  //put them in the box
885  ar_0 -= round(ar_0);
886  ar_1 -= round(ar_1);
887 
888  //unit2cart
889  dx[iat] = ar_0 * r00 + ar_1 * r10;
890  dy[iat] = ar_0 * r01 + ar_1 * r11;
891  dz[iat] = pz[iat] - z0;
892 
893  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
894  }
895 
896  T computeDist(T dx, T dy, T dz) const
897  {
898  T ar_0 = dx * g00 + dy * g10;
899  T ar_1 = dx * g01 + dy * g11;
900 
901  //put them in the box
902  ar_0 -= round(ar_0);
903  ar_1 -= round(ar_1);
904 
905  //unit2cart
906  dx = ar_0 * r00 + ar_1 * r10;
907  dy = ar_0 * r01 + ar_1 * r11;
908 
909  return std::sqrt(dx * dx + dy * dy + dz * dz);
910  }
911 };
912 
913 
914 /** specialization for a wire
915 */
916 template<class T>
918 {
919  T Linv0, L0;
920 
921  inline DTD_BConds(const CrystalLattice<T, 3>& lat) : Linv0(lat.OneOverLength[0]), L0(lat.Length[0]) {}
922  template<typename PT, typename RSOA, typename DISPLSOA>
923  void computeDistances(const PT& pos,
924  const RSOA& R0,
925  T* restrict temp_r,
926  DISPLSOA& temp_dr,
927  int first,
928  int last,
929  int flip_ind = 0) const
930  {
931  const T x0 = pos[0];
932  const T y0 = pos[1];
933  const T z0 = pos[2];
934 
935  const T* restrict px = R0.data(0);
936  const T* restrict py = R0.data(1);
937  const T* restrict pz = R0.data(2);
938 
939  T* restrict dx = temp_dr.data(0);
940  T* restrict dy = temp_dr.data(1);
941  T* restrict dz = temp_dr.data(2);
942 
943 #pragma omp simd aligned(temp_r, px, py, pz, dx, dy, dz: QMC_SIMD_ALIGNMENT)
944  for (int iat = first; iat < last; ++iat)
945  {
946  T x = (px[iat] - x0) * Linv0;
947  dx[iat] = L0 * (x - round(x));
948  dy[iat] = py[iat] - y0;
949  dz[iat] = pz[iat] - z0;
950  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
951  }
952  }
953 
954  void computeDistancesOffload(const T pos[3],
955  const T* restrict R0,
956  int r0_stride,
957  T* restrict temp_r,
958  T* restrict temp_dr,
959  int padded_size,
960  int iat,
961  int flip_ind = 0) const
962  {
963  const T x0 = pos[0];
964  const T y0 = pos[1];
965  const T z0 = pos[2];
966 
967  const T* restrict px = R0;
968  const T* restrict py = R0 + r0_stride;
969  const T* restrict pz = R0 + r0_stride * 2;
970 
971  T* restrict dx = temp_dr;
972  T* restrict dy = temp_dr + padded_size;
973  T* restrict dz = temp_dr + padded_size * 2;
974 
975  T x = (px[iat] - x0) * Linv0;
976  dx[iat] = L0 * (x - round(x));
977  dy[iat] = py[iat] - y0;
978  dz[iat] = pz[iat] - z0;
979  temp_r[iat] = std::sqrt(dx[iat] * dx[iat] + dy[iat] * dy[iat] + dz[iat] * dz[iat]);
980  }
981 
982  T computeDist(T dx, T dy, T dz) const
983  {
984  T x = dx * Linv0;
985  dx = L0 * (x - round(x));
986  return std::sqrt(dx * dx + dy * dy + dz * dz);
987  }
988 };
989 
990 /** specialization for a periodic 3D general cell
991  *
992  * Slow method and not used unless one needs to check if faster methods fail
993  */
994 template<class T>
995 struct DTD_BConds<T, 3, PPPX + SOA_OFFSET>
996 {
997  T r00, r10, r20, r01, r11, r21, r02, r12, r22;
998  T g00, g10, g20, g01, g11, g21, g02, g12, g22;
999  T r2max;
1001 
1003  : r00(lat.R(0)),
1004  r10(lat.R(3)),
1005  r20(lat.R(6)),
1006  r01(lat.R(1)),
1007  r11(lat.R(4)),
1008  r21(lat.R(7)),
1009  r02(lat.R(2)),
1010  r12(lat.R(5)),
1011  r22(lat.R(8)),
1012  g00(lat.G(0)),
1013  g10(lat.G(3)),
1014  g20(lat.G(6)),
1015  g01(lat.G(1)),
1016  g11(lat.G(4)),
1017  g21(lat.G(7)),
1018  g02(lat.G(2)),
1019  g12(lat.G(5)),
1020  g22(lat.G(8)),
1021  r2max(lat.CellRadiusSq)
1022  {
1023  const auto& cellx = nextcells[0];
1024  const auto& celly = nextcells[1];
1025  const auto& cellz = nextcells[2];
1026 
1027  int ic = 0;
1028  for (int i = -1; i <= 1; ++i)
1029  for (int j = -1; j <= 1; ++j)
1030  for (int k = -1; k <= 1; ++k)
1031  {
1032  if (i == 0 && j == 0 && j == 0)
1033  continue; //exclude zero
1034  cellx[ic] = i * r00 + j * r10 + k * r20;
1035  celly[ic] = i * r01 + j * r11 + k * r21;
1036  cellz[ic] = i * r02 + j * r12 + k * r22;
1037  ++ic;
1038  }
1039  }
1040 
1041  template<typename PT, typename RSOA, typename DISPLSOA>
1042  void computeDistances(const PT& pos,
1043  const RSOA& R0,
1044  T* restrict temp_r,
1045  DISPLSOA& temp_dr,
1046  int first,
1047  int last,
1048  int flip_ind = 0) const
1049  {
1050  APP_ABORT("DTD_BConds<T,3,PPPX> not implemented");
1051  }
1052 
1053  void computeDistancesOffload(const T pos[3],
1054  const T* restrict R0,
1055  int r0_stride,
1056  T* restrict temp_r,
1057  T* restrict temp_dr,
1058  int padded_size,
1059  int iat,
1060  int flip_ind = 0) const
1061  {
1062  //APP_ABORT("DTD_BConds<T, 3, PPPX + SOA_OFFSET>::computeDistancesOffload not implemented");
1063  }
1064 
1065  T computeDist(T dx, T dy, T dz) const
1066  {
1067  //APP_ABORT("DTD_BConds<T, 3, PPPX + SOA_OFFSET>::computeDist not implemented");
1068  }
1069 };
1070 
1071 /** specialization for a slab, general cell
1072 */
1073 template<class T>
1074 struct DTD_BConds<T, 3, PPNX + SOA_OFFSET>
1075 {
1076  T r00, r10, r01, r11;
1077  T g00, g10, g01, g11;
1080 
1082  : r00(lat.R(0)),
1083  r10(lat.R(3)),
1084  r01(lat.R(1)),
1085  r11(lat.R(4)),
1086  g00(lat.G(0)),
1087  g10(lat.G(3)),
1088  g01(lat.G(1)),
1089  g11(lat.G(4)),
1090  r2max(lat.CellRadiusSq)
1091  {
1092  const auto& cellx = nextcells[0];
1093  const auto& celly = nextcells[1];
1094  const auto& cellz = nextcells[2];
1095 
1096  int ic = 0;
1097  for (int i = -1; i <= 1; ++i)
1098  for (int j = -1; j <= 1; ++j)
1099  {
1100  if (i == 0 && j == 0)
1101  continue; //exclude zero
1102  cellx[ic] = i * r00 + j * r10;
1103  celly[ic] = i * r01 + j * r11;
1104  cellz[ic] = T();
1105  ++ic;
1106  }
1107  }
1108 
1109  template<typename PT, typename RSOA, typename DISPLSOA>
1110  void computeDistances(const PT& pos,
1111  const RSOA& R0,
1112  T* restrict temp_r,
1113  DISPLSOA& temp_dr,
1114  int first,
1115  int last,
1116  int flip_ind = 0) const
1117  {
1118  APP_ABORT("DTD_BConds<T,3,PPNX> not implemented");
1119  }
1120 
1121  void computeDistancesOffload(const T pos[3],
1122  const T* restrict R0,
1123  int r0_stride,
1124  T* restrict temp_r,
1125  T* restrict temp_dr,
1126  int padded_size,
1127  int iat,
1128  int flip_ind = 0) const
1129  {
1130  //APP_ABORT("DTD_BConds<T, 3, PPNX + SOA_OFFSET>::computeDistancesOffload not implemented");
1131  }
1132 
1133  T computeDist(T dx, T dy, T dz) const
1134  {
1135  //APP_ABORT("DTD_BConds<T, 3, PPNX + SOA_OFFSET>::computeDist not implemented");
1136  }
1137 };
1138 
1139 } // namespace qmcplusplus
1140 
1141 #endif // OHMMS_PARTICLE_BCONDS_3D_H
a class that defines a supercell in D-dimensional Euclean space.
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
Fixed-size array.
Definition: OhmmsTinyMeta.h:30
TinyVector< TinyVector< T, 8 >, 3 > nextcells
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
TinyVector< TinyVector< T, 26 >, 3 > nextcells
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
TinyVector< TinyVector< T, 4 >, 2 > corners
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
Tensor_t G
Reciprocal unit vectors. G(j,i) i=vector and j=x,y,z.
Declaration of CrystalLattice<T,D>
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
TinyVector< TinyVector< T, 8 >, 3 > corners
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
DTD_BConds(const CrystalLattice< T, 3 > &lat)
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
Tensor<T,D> class for D by D tensor.
Definition: OhmmsTinyMeta.h:32
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
void computeDistances(const PT &pos, const RSOA &R0, T *restrict temp_r, DISPLSOA &temp_dr, int first, int last, int flip_ind=0) const
DTD_BConds(const CrystalLattice< T, 3 > &lat)
constructor: doing nothing
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
SingleParticlePos a(int i) const
Provide interfaces familiar to fotran users.
#define APP_ABORT(msg)
Widely used but deprecated fatal error macros from legacy code.
Definition: AppAbort.h:27
DTD_BConds(const CrystalLattice< T, 3 > &lat)
void computeDistancesOffload(const T pos[3], const T *restrict R0, int r0_stride, T *restrict temp_r, T *restrict temp_dr, int padded_size, int iat, int flip_ind=0) const
void find_reduced_basis(TinyVector< TinyVector< T, 3 >, 3 > &rb)
MakeReturn< UnaryNode< FnSqrt, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t sqrt(const Vector< T1, C1 > &l)
Tensor< T, D > inverse(const Tensor< T, D > &a)
Definition: TensorOps.h:879
DTD_BConds(const CrystalLattice< T, 3 > &lat)
DTD_BConds(const CrystalLattice< T, 3 > &lat)
DTD_BConds(const CrystalLattice< T, 3 > &lat)
MakeReturn< UnaryNode< FnFloor, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t floor(const Vector< T1, C1 > &l)
DTD_BConds(const CrystalLattice< T, 3 > &lat)