QMCPACK
einspline_helper.hpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////
2 // (c) Copyright 2003- by Ken Esler and Jeongnim Kim //
3 //////////////////////////////////////////////////////////////////
4 // National Center for Supercomputing Applications & //
5 // Materials Computation Center //
6 // University of Illinois, Urbana-Champaign //
7 // Urbana, IL 61801 //
8 // e-mail: jnkim@ncsa.uiuc.edu //
9 // //
10 // Supported by //
11 // National Center for Supercomputing Applications, UIUC //
12 // Materials Computation Center, UIUC //
13 //////////////////////////////////////////////////////////////////
14 /** helper functions for EinsplineSetBuilder
15  */
16 #ifndef QMCPLUSPLUS_EINSPLINEBUILDER_HELPER_H
17 #define QMCPLUSPLUS_EINSPLINEBUILDER_HELPER_H
18 #include <complex>
19 #include "OhmmsPETE/TinyVector.h"
20 #include "OhmmsPETE/OhmmsVector.h"
21 #include "OhmmsPETE/OhmmsArray.h"
22 #include "CPU/BLAS.hpp"
23 #include "CPU/math.hpp"
24 
25 namespace qmcplusplus
26 {
27 /** unpack packed cG to fftbox
28  * @param cG packed vector
29  * @param gvecs g-coordinate for cG[i]
30  * @param maxg fft grid
31  * @param fftbox unpacked data to be transformed
32  */
33 template<typename T>
34 inline void unpack4fftw(const Vector<std::complex<T>>& cG,
35  const std::vector<TinyVector<int, 3>>& gvecs,
36  const TinyVector<int, 3>& maxg,
37  Array<std::complex<T>, 3>& fftbox)
38 {
39  fftbox = std::complex<T>();
40  const int upper_bound[3] = {(maxg[0] - 1) / 2, (maxg[1] - 1) / 2, (maxg[2] - 1) / 2};
41  const int lower_bound[3] = {upper_bound[0] - maxg[0] + 1, upper_bound[1] - maxg[1] + 1, upper_bound[2] - maxg[2] + 1};
42  //only coefficient indices between [lower_bound,upper_bound] are taken for FFT.
43  //this is rather unsafe
44  //#pragma omp parallel for
45  for (int iG = 0; iG < cG.size(); iG++)
46  {
47  if (gvecs[iG][0] > upper_bound[0] || gvecs[iG][0] < lower_bound[0] || gvecs[iG][1] > upper_bound[1] ||
48  gvecs[iG][1] < lower_bound[1] || gvecs[iG][2] > upper_bound[2] || gvecs[iG][2] < lower_bound[2])
49  {
50  //std::cout << "Warning: cG out of bound "
51  // << "x " << gvecs[iG][0] << " y " << gvecs[iG][1] << " z " << gvecs[iG][2] << std::endl
52  // << "xu " << upper_bound[0] << " yu " << upper_bound[1] << " zu " << upper_bound[2] << std::endl
53  // << "xd " << lower_bound[0] << " yd " << lower_bound[1] << " zd " << lower_bound[2] << std::endl;
54  continue;
55  }
56  fftbox((gvecs[iG][0] + maxg[0]) % maxg[0], (gvecs[iG][1] + maxg[1]) % maxg[1], (gvecs[iG][2] + maxg[2]) % maxg[2]) =
57  cG[iG];
58  }
59 }
60 
61 /** rotate the state after 3dfft
62  *
63  * First, add the eikr phase factor.
64  * Then, rotate the phase of the orbitals so that neither
65  * the real part nor the imaginary part are very near
66  * zero. This sometimes happens in crystals with high
67  * symmetry at special k-points.
68  */
69 template<typename T, typename T1, typename T2>
70 inline void fix_phase_rotate_c2r(Array<std::complex<T>, 3>& in,
71  Array<T1, 3>& out,
72  const TinyVector<T2, 3>& twist,
73  T& phase_r,
74  T& phase_i)
75 {
76  const T two_pi = -2.0 * M_PI;
77  const int nx = in.size(0);
78  const int ny = in.size(1);
79  const int nz = in.size(2);
80  T nx_i = 1.0 / static_cast<T>(nx);
81  T ny_i = 1.0 / static_cast<T>(ny);
82  T nz_i = 1.0 / static_cast<T>(nz);
83 
84  T rNorm = 0.0, iNorm = 0.0, riNorm = 0.0;
85 #pragma omp parallel for reduction(+ : rNorm, iNorm, riNorm)
86  for (int ix = 0; ix < nx; ix++)
87  {
88  T s, c;
89  std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
90  T rux = static_cast<T>(ix) * nx_i * twist[0];
91  for (int iy = 0; iy < ny; iy++)
92  {
93  T ruy = static_cast<T>(iy) * ny_i * twist[1];
94  for (int iz = 0; iz < nz; iz++)
95  {
96  T ruz = static_cast<T>(iz) * nz_i * twist[2];
97  qmcplusplus::sincos(-two_pi * (rux + ruy + ruz), &s, &c);
98  std::complex<T> eikr(c, s);
99  *in_ptr *= eikr;
100  rNorm += in_ptr->real() * in_ptr->real();
101  iNorm += in_ptr->imag() * in_ptr->imag();
102  riNorm += in_ptr->real() * in_ptr->imag();
103  ++in_ptr;
104  }
105  }
106  }
107 
108  const T x = (rNorm - iNorm) / riNorm;
109  const T y = 1.0 / std::sqrt(x * x + 4.0);
110  const T phs = std::sqrt(0.5 - y);
111  phase_r = phs;
112  phase_i = (x < 0) ? std::sqrt(1.0 - phs * phs) : -std::sqrt(1.0 - phs * phs);
113 
114 #pragma omp parallel for
115  for (int ix = 0; ix < nx; ix++)
116  {
117  const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
118  T1* restrict out_ptr = out.data() + ix * ny * nz;
119  for (int iy = 0; iy < ny; iy++)
120  for (int iz = 0; iz < nz; iz++)
121  {
122  *out_ptr = static_cast<T1>(phase_r * in_ptr->real() - phase_i * in_ptr->imag());
123  ++in_ptr;
124  ++out_ptr;
125  }
126  }
127 }
128 
129 template<typename T, typename T1, typename T2>
130 inline void fix_phase_rotate_c2c(const Array<std::complex<T>, 3>& in,
131  Array<std::complex<T1>, 3>& out,
132  const TinyVector<T2, 3>& twist)
133 {
134  const int nx = in.size(0);
135  const int ny = in.size(1);
136  const int nz = in.size(2);
137  T phase_r, phase_i;
138 
139  compute_phase(in, twist, phase_r, phase_i);
140 
141 #pragma omp parallel for
142  for (int ix = 0; ix < nx; ++ix)
143  {
144  const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
145  std::complex<T1>* restrict out_ptr = out.data() + ix * ny * nz;
146  for (int iy = 0; iy < ny; ++iy)
147  for (int iz = 0; iz < nz; ++iz)
148  {
149  *out_ptr = std::complex<T1>(static_cast<T1>(phase_r * in_ptr->real() - phase_i * in_ptr->imag()),
150  static_cast<T1>(phase_i * in_ptr->real() + phase_r * in_ptr->imag()));
151  ++out_ptr;
152  ++in_ptr;
153  }
154  }
155 }
156 
157 template<typename T, typename T1, typename T2>
158 inline void fix_phase_rotate_c2c(const Array<std::complex<T>, 3>& in,
159  Array<T1, 3>& out_r,
160  Array<T1, 3>& out_i,
161  const TinyVector<T2, 3>& twist,
162  T& phase_r,
163  T& phase_i)
164 {
165  const int nx = in.size(0);
166  const int ny = in.size(1);
167  const int nz = in.size(2);
168 
169  compute_phase(in, twist, phase_r, phase_i);
170 
171 #pragma omp parallel for
172  for (size_t ix = 0; ix < nx; ++ix)
173  for (size_t iy = 0; iy < ny; ++iy)
174  {
175  const size_t offset = ix * ny * nz + iy * nz;
176  const std::complex<T>* restrict in_ptr = in.data() + offset;
177  T1* restrict r_ptr = out_r.data() + offset;
178  T1* restrict i_ptr = out_i.data() + offset;
179  for (size_t iz = 0; iz < nz; ++iz)
180  {
181  r_ptr[iz] = static_cast<T1>(phase_r * in_ptr[iz].real() - phase_i * in_ptr[iz].imag());
182  i_ptr[iz] = static_cast<T1>(phase_i * in_ptr[iz].real() + phase_r * in_ptr[iz].imag());
183  }
184  }
185 }
186 
187 /** Split FFTs into real/imaginary components.
188  * @param in ffts
189  * @param out_r real component
190  * @param out_i imaginary components
191  */
192 template<typename T, typename T1>
193 inline void split_real_components_c2c(const Array<std::complex<T>, 3>& in, Array<T1, 3>& out_r, Array<T1, 3>& out_i)
194 {
195  const int nx = in.size(0);
196  const int ny = in.size(1);
197  const int nz = in.size(2);
198 
199 #pragma omp parallel for
200  for (size_t ix = 0; ix < nx; ++ix)
201  for (size_t iy = 0; iy < ny; ++iy)
202  {
203  const size_t offset = ix * ny * nz + iy * nz;
204  const std::complex<T>* restrict in_ptr = in.data() + offset;
205  T1* restrict r_ptr = out_r.data() + offset;
206  T1* restrict i_ptr = out_i.data() + offset;
207  for (size_t iz = 0; iz < nz; ++iz)
208  {
209  r_ptr[iz] = static_cast<T1>(in_ptr[iz].real());
210  i_ptr[iz] = static_cast<T1>(in_ptr[iz].imag());
211  }
212  }
213 }
214 
215 /** Compute the norm of an orbital.
216  * @param cG the plane wave coefficients
217  * @return norm of the orbital
218  */
219 template<typename T>
220 inline T compute_norm(const Vector<std::complex<T>>& cG)
221 {
222  T total_norm2(0);
223 #pragma omp parallel for reduction(+ : total_norm2)
224  for (size_t ig = 0; ig < cG.size(); ++ig)
225  total_norm2 += cG[ig].real() * cG[ig].real() + cG[ig].imag() * cG[ig].imag();
226  return std::sqrt(total_norm2);
227 }
228 
229 /** Compute the phase factor for rotation. The algorithm aims at balanced real and imaginary parts.
230  * @param in the real space orbital value on a 3D grid
231  * @param twist k-point in reduced coordinates
232  * @param phase_r output real part of the phase
233  * @param phase_i output imaginary part of the phase
234  */
235 template<typename T, typename T2>
236 inline void compute_phase(const Array<std::complex<T>, 3>& in, const TinyVector<T2, 3>& twist, T& phase_r, T& phase_i)
237 {
238  const T two_pi = -2.0 * M_PI;
239  const size_t nx = in.size(0);
240  const size_t ny = in.size(1);
241  const size_t nz = in.size(2);
242 
243  const T nx_i = 1.0 / static_cast<T>(nx);
244  const T ny_i = 1.0 / static_cast<T>(ny);
245  const T nz_i = 1.0 / static_cast<T>(nz);
246 
247  T rNorm = 0.0, iNorm = 0.0, riNorm = 0.0;
248 #pragma omp parallel for reduction(+ : rNorm, iNorm, riNorm)
249  for (size_t ix = 0; ix < nx; ++ix)
250  {
251  for (size_t iy = 0; iy < ny; ++iy)
252  {
253  const T rux = static_cast<T>(ix) * nx_i * twist[0];
254  T s, c;
255  T rsum = 0, isum = 0, risum = 0.0;
256  const T ruy = static_cast<T>(iy) * ny_i * twist[1];
257  const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz + iy * nz;
258  for (size_t iz = 0; iz < nz; ++iz)
259  {
260  const T ruz = static_cast<T>(iz) * nz_i * twist[2];
261  qmcplusplus::sincos(-two_pi * (rux + ruy + ruz), &s, &c);
262  const T re = c * in_ptr[iz].real() - s * in_ptr[iz].imag();
263  const T im = s * in_ptr[iz].real() + c * in_ptr[iz].imag();
264  rsum += re * re;
265  isum += im * im;
266  risum += re * im;
267  }
268  rNorm += rsum;
269  iNorm += isum;
270  riNorm += risum;
271  }
272  }
273 
274  const T x = (rNorm - iNorm) / riNorm;
275  const T y = 1.0 / std::sqrt(x * x + 4.0);
276  const T phs = std::sqrt(0.5 - y);
277  phase_r = phs;
278  phase_i = (x < 0) ? std::sqrt(1.0 - phs * phs) : -std::sqrt(1.0 - phs * phs);
279 }
280 
281 /** rotate the state after 3dfft
282  *
283  */
284 template<typename T>
285 inline void fix_phase_rotate(const Array<std::complex<T>, 3>& e2pi, Array<std::complex<T>, 3>& in, Array<T, 3>& out)
286 {
287  const int nx = e2pi.size(0);
288  const int ny = e2pi.size(1);
289  const int nz = e2pi.size(2);
290  T rNorm = 0.0, iNorm = 0.0;
291  //#pragma omp parallel for reduction(+:rNorm,iNorm)
292  for (int ix = 0; ix < nx; ix++)
293  {
294  T rpart = 0.0, ipart = 0.0;
295  const std::complex<T>* restrict p_ptr = e2pi.data() + ix * ny * nz;
296  std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
297  for (int iyz = 0; iyz < ny * nz; ++iyz)
298  {
299  in_ptr[iyz] *= p_ptr[iyz];
300  rpart += in_ptr[iyz].real() * in_ptr[iyz].real();
301  ipart += in_ptr[iyz].imag() * in_ptr[iyz].imag();
302  }
303  rNorm += rpart;
304  iNorm += ipart;
305  }
306 
307  //#pragma omp parallel
308  {
309  T arg = std::atan2(iNorm, rNorm);
310  T phase_i, phase_r;
311  qmcplusplus::sincos(0.125 * M_PI - 0.5 * arg, &phase_i, &phase_r);
312  //#pragma omp for
313  for (int ix = 0; ix < nx; ix++)
314  {
315  const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
316  T* restrict out_ptr = out.data() + ix * ny * nz;
317  for (int iyz = 0; iyz < ny * nz; iyz++)
318  out_ptr[iyz] = phase_r * in_ptr[iyz].real() - phase_i * in_ptr[iyz].imag();
319  }
320  }
321 }
322 
323 } // namespace qmcplusplus
324 
325 #endif
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
Type_t * data()
Definition: OhmmsArray.h:87
void fix_phase_rotate_c2c(const Array< std::complex< T >, 3 > &in, Array< std::complex< T1 >, 3 > &out, const TinyVector< T2, 3 > &twist)
float real(const float &c)
real part of a scalar. Cannot be replaced by std::real due to AFQMC specific needs.
void fix_phase_rotate(const Array< std::complex< T >, 3 > &e2pi, Array< std::complex< T >, 3 > &in, Array< T, 3 > &out)
rotate the state after 3dfft
TinyVector< T, 3 > lower_bound(const TinyVector< T, 3 > &a, const TinyVector< T, 3 > &b)
helper function to determine the lower bound of a domain (need to move up)
T compute_norm(const Vector< std::complex< T >> &cG)
Compute the norm of an orbital.
MakeReturn< BinaryNode< FnArcTan2, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t, typename CreateLeaf< Vector< T2, C2 > >::Leaf_t > >::Expression_t atan2(const Vector< T1, C1 > &l, const Vector< T2, C2 > &r)
TinyVector< T, 3 > upper_bound(const TinyVector< T, 3 > &a, const TinyVector< T, 3 > &b)
helper function to determine the upper bound of a domain (need to move up)
void fix_phase_rotate_c2r(Array< std::complex< T >, 3 > &in, Array< T1, 3 > &out, const TinyVector< T2, 3 > &twist, T &phase_r, T &phase_i)
rotate the state after 3dfft
void compute_phase(const Array< std::complex< T >, 3 > &in, const TinyVector< T2, 3 > &twist, T &phase_r, T &phase_i)
Compute the phase factor for rotation.
void split_real_components_c2c(const Array< std::complex< T >, 3 > &in, Array< T1, 3 > &out_r, Array< T1, 3 > &out_i)
Split FFTs into real/imaginary components.
MakeReturn< UnaryNode< FnSqrt, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t sqrt(const Vector< T1, C1 > &l)
Declaraton of Vector<T,Alloc> Manage memory through Alloc directly and allow referencing an existing ...
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
Definition: math.hpp:62
A D-dimensional Array class based on PETE.
Definition: OhmmsArray.h:25
void unpack4fftw(const Vector< std::complex< T >> &cG, const std::vector< TinyVector< int, 3 >> &gvecs, const TinyVector< int, 3 > &maxg, Array< std::complex< T >, 3 > &fftbox)
unpack packed cG to fftbox