QMCPACK
algorithm.hpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by:
8 //
9 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
10 //////////////////////////////////////////////////////////////////////////////////////
11 // -*- C++ -*-
12 /** @file algorithm.hpp
13  *
14  * SIMD version of functions in algorithm
15  */
16 #ifndef QMCPLUSPLUS_SIMD_ALGORITHM_HPP
17 #define QMCPLUSPLUS_SIMD_ALGORITHM_HPP
18 
19 #include <complex>
20 
21 namespace qmcplusplus
22 {
23 namespace simd
24 {
25 template<typename T1, typename T2>
26 inline T2 accumulate_n(const T1* restrict in, size_t n, T2 res)
27 {
28 #pragma omp simd reduction(+ : res)
29  for (int i = 0; i < n; ++i)
30  res += in[i];
31  return res;
32 }
33 
34 /** copy function using memcpy
35  * @param target starting address of the target
36  * @param source starting address of the source
37  * @param n size of the data to copy
38  */
39 template<typename T1, typename T2>
40 inline void copy(T1* restrict target, const T2* restrict source, size_t n)
41 {
42  for (size_t i = 0; i < n; ++i)
43  target[i] = static_cast<T1>(source[i]);
44 }
45 
46 /** copy function using memcpy
47  * @param target starting address of the target
48  * @param source starting address of the source
49  * @param n size of the data to copy
50  */
51 template<typename T>
52 inline void copy(T* restrict target, const T* restrict source, size_t n)
53 {
54  memcpy(target, source, sizeof(T) * n);
55 }
56 
57 /** copy complex to two real containers */
58 template<typename T1, typename T2>
59 inline void copy(T1* restrict target_r, T1* restrict target_i, const std::complex<T2>* restrict source, size_t n)
60 {
61  for (int i = 0; i < n; ++i)
62  {
63  *target_r++ = static_cast<T1>(source[i].real());
64  *target_i++ = static_cast<T1>(source[i].imag());
65  }
66 }
67 
68 template<typename T>
69 inline void accumulate_phases(const int& n,
70  const std::complex<T>* restrict x,
71  const std::complex<T>* restrict y,
72  T& rN,
73  T& iN,
74  T& riN)
75 {
76  for (int i = 0; i < n; ++i)
77  {
78  T tr = x[i].real() * y[i].real() - x[i].imag() * y[i].imag();
79  T ti = x[i].real() * y[i].imag() + x[i].imag() * y[i].real();
80  rN += tr * tr;
81  iN += ti * ti;
82  riN += tr * ti;
83  } //
84 }
85 
86 /** transpose of A(m,n) to B(n,m)
87  * @param A starting address, A(m,lda)
88  * @param m number of A rows
89  * @param lda stride of A's row
90  * @param B starting address B(n,ldb)
91  * @param n number of B rows
92  * @param ldb stride of B's row
93  *
94  * Blas-like interface
95  */
96 template<typename T, typename TO>
97 inline void transpose(const T* restrict A, size_t m, size_t lda, TO* restrict B, size_t n, size_t ldb)
98 {
99  for (size_t i = 0; i < n; ++i)
100  for (size_t j = 0; j < m; ++j)
101  B[i * ldb + j] = A[j * lda + i];
102 }
103 
104 /** copy of A(m,n) to B(m,n)
105  * @param A starting address, A(m,lda)
106  * @param m number of A rows
107  * @param lda stride of A's row
108  * @param B starting address B(m,ldb)
109  * @param m number of B rows
110  * @param ldb stride of B's row
111  *
112  * Blas-like interface
113  */
114 template<typename T, typename TO>
115 inline void remapCopy(size_t m, size_t n, const T* restrict A, size_t lda, TO* restrict B, size_t ldb)
116 {
117  for (size_t j = 0; j < m; ++j)
118  for (size_t i = 0; i < n; ++i)
119  B[j * ldb + i] = A[j * lda + i];
120 }
121 } // namespace simd
122 } // namespace qmcplusplus
123 #endif
T2 accumulate_n(const T1 *restrict in, size_t n, T2 res)
Definition: algorithm.hpp:26
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
void accumulate_phases(const int &n, const std::complex< T > *restrict x, const std::complex< T > *restrict y, T &rN, T &iN, T &riN)
Definition: algorithm.hpp:69
void transpose(const T *restrict A, size_t m, size_t lda, TO *restrict B, size_t n, size_t ldb)
transpose of A(m,n) to B(n,m)
Definition: algorithm.hpp:97
void copy(T1 *restrict target, const T2 *restrict source, size_t n)
copy function using memcpy
Definition: algorithm.hpp:40
void remapCopy(size_t m, size_t n, const T *restrict A, size_t lda, TO *restrict B, size_t ldb)
copy of A(m,n) to B(m,n)
Definition: algorithm.hpp:115
double B(double x, int k, int i, const std::vector< double > &t)