QMCPACK
PosTransformer.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 //
9 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
10 //////////////////////////////////////////////////////////////////////////////////////
11 // -*- C++ -*-
12 /** @file VectorOperators.h
13  * @brief Support funtions to handle position type data manged by soa
14  */
15 #ifndef QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H
16 #define QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H
17 
18 namespace qmcplusplus
19 {
20 //Need to reorg
21 #if 0
22  /** Dummy template class to be specialized
23  *
24  * - T1 the datatype to be transformed
25  * - D dimension
26  */
27  template<class T1, unsigned D> struct PosTransformer { };
28 
29  /** Specialized PosTransformer<T,3,true> using only the diagonal elements
30  */
31  template<class T>
32  struct PosTransformer<T,3>
33  {
34  using Array_t=VectorSoaContainer<T,3>;
35  using Transformer_t=Tensor<T,3>;
36 
37  inline static void
38  apply(const Array_t& pin, const Transformer_t& X, Array_t& pout, int first, int last)
39  {
40  const int n=last-first;
41  T x00=X[0],x01=X[1],x02=X[2],
42  x10=X[3],x11=X[4],x12=X[5],
43  x20=X[6],x21=X[7],x22=X[8];
44  const T* restrict x_in=pin.data(0)+first; ASSUME_ALIGNED(x_in);
45  const T* restrict y_in=pin.data(1)+first; ASSUME_ALIGNED(y_in);
46  const T* restrict z_in=pin.data(2)+first; ASSUME_ALIGNED(z_in);
47  T* restrict x_out=pout.data(0)+first; ASSUME_ALIGNED(x_out);
48  T* restrict y_out=pout.data(1)+first; ASSUME_ALIGNED(y_out);
49  T* restrict z_out=pout.data(2)+first; ASSUME_ALIGNED(z_out);
50 #pragma ivdep
51  for(int i=0; i<n; i++)
52  {
53  x_out[i]=x_in[i]*x00+y_in[i]*x10+z_in[i]*x20;
54  y_out[i]=x_in[i]*x01+y_in[i]*x11+z_in[i]*x21;
55  z_out[i]=x_in[i]*x02+y_in[i]*x12+z_in[i]*x22;
56  }
57  }
58 
59  inline static void
60  apply(const Transformer_t& X, const Array_t& pin, Array_t& pout, int first, int last)
61  {
62  ::apply(pin,X,pout,first,last);
63  }
64 
65  inline static void
66  apply(Array_t& pinout, const Transformer_t& X,int first, int last)
67  {
68  const int n=last-first;
69  T x00=X[0],x01=X[1],x02=X[2],
70  x10=X[3],x11=X[4],x12=X[5],
71  x20=X[6],x21=X[7],x22=X[8];
72  T* restrict x_inout=pinout.data(0)+first; ASSUME_ALIGNED(x_inout);
73  T* restrict y_inout=pinout.data(1)+first; ASSUME_ALIGNED(y_inout);
74  T* restrict z_inout=pinout.data(2)+first; ASSUME_ALIGNED(z_inout);
75 #pragma ivdep
76  for(int i=0; i<n; i++)
77  {
78  T x=x_inout[i]*x00+y_inout[i]*x10+z_inout[i]*x20;
79  T y=x_inout[i]*x01+y_inout[i]*x11+z_inout[i]*x21;
80  T z=x_inout[i]*x02+y_inout[i]*x12+z_inout[i]*x22;
81  x_inout[i]=x;
82  y_inout[i]=y;
83  z_inout[i]=z;
84  }
85  }
86 
87  inline static void
88  apply(const Transformer_t& X, Array_t& pinout, int first, int last)
89  {
90  ::apply(X,pinout,first,last);
91  }
92  };
93 #endif
94 
95 /** General conversion function from AoS[nrows][ncols] to SoA[ncols][ldb]
96  * @param nrows the first dimension
97  * @param ncols the second dimension
98  * @param iptr input pointer
99  * @param lda stride of iptr
100  * @param out output pointer
101  * @param lda strided of out
102  *
103  * Modeled after blas/lapack for lda/ldb
104  */
105 template<typename T>
106 void PosAoS2SoA(int nrows, int ncols, const T* restrict iptr, int lda, T* restrict out, int ldb)
107 {
108  T* restrict x = out;
109  T* restrict y = out + ldb;
110  T* restrict z = out + 2 * ldb;
111 #if !defined(__ibmxl__)
112 #pragma omp simd aligned(x, y, z: QMC_SIMD_ALIGNMENT)
113 #endif
114  for (int i = 0; i < nrows; ++i)
115  {
116  x[i] = iptr[i * ncols]; //x[i]=in[i][0];
117  y[i] = iptr[i * ncols + 1]; //y[i]=in[i][1];
118  z[i] = iptr[i * ncols + 2]; //z[i]=in[i][2];
119  }
120 }
121 
122 /** General conversion function from SoA[ncols][ldb] to AoS[nrows][ncols]
123  * @param nrows the first dimension
124  * @param ncols the second dimension
125  * @param iptr input pointer
126  * @param lda stride of iptr
127  * @param out output pointer
128  * @param lda strided of out
129  *
130  * Modeled after blas/lapack for lda/ldb
131  */
132 template<typename T>
133 void PosSoA2AoS(int nrows, int ncols, const T* restrict iptr, int lda, T* restrict out, int ldb)
134 {
135  const T* restrict x = iptr;
136  const T* restrict y = iptr + lda;
137  const T* restrict z = iptr + 2 * lda;
138 #if !defined(__ibmxl__)
139 #pragma omp simd aligned(x, y, z: QMC_SIMD_ALIGNMENT)
140 #endif
141  for (int i = 0; i < nrows; ++i)
142  {
143  out[i * ldb] = x[i]; //out[i][0]=x[i];
144  out[i * ldb + 1] = y[i]; //out[i][1]=y[i];
145  out[i * ldb + 2] = z[i]; //out[i][2]=z[i];
146  }
147 }
148 
149 #if 0
150 //#if defined(HAVE_MKL)
151  ///specialization for double AoS2SoA
152  template<>
153  void PosAoS2SoA(int nrows, int ncols, const double* restrict in, int lda, double* restrict out, int ldb)
154  {
155  const double zone={1.0};
156  mkl_domatcopy('R','T',nrows,ncols,zone,in,lda,out,ldb);
157  }
158 
159  ///specialization for float AoS2SoA
160  template<>
161  void PosAoS2SoA(int nrows, int ncols, const float* restrict in, int lda, float* restrict out, int ldb)
162  {
163  const float zone={1.0f};
164  mkl_somatcopy('R','T',nrows,ncols,zone,in,lda,out,ldb);
165  }
166 
167  ///specialization for double SoA2AoS
168  template<>
169  void PosSoA2AoS(int nrows, int ncols, const double* restrict in, int lda, double* restrict out, int ldb)
170  {
171  const double zone={1.0};
172  mkl_domatcopy('R','T',nrows,ncols,zone,in,lda,out,ldb);
173  }
174 
175  ///specialization for float SoA2AoS
176  template<>
177  void PosSoA2AoS(int nrows, int ncols, const float* restrict in, int lda, float* restrict out, int ldb)
178  {
179  const float zone={1.0f};
180  mkl_somatcopy('R','T',nrows,ncols,zone,in,lda,out,ldb);
181  }
182 #endif
183 
184 
185 } // namespace qmcplusplus
186 #endif
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
constexpr std::complex< double > zone
Definition: BLAS.hpp:52
void PosSoA2AoS(int nrows, int ncols, const T *restrict iptr, int lda, T *restrict out, int ldb)
General conversion function from SoA[ncols][ldb] to AoS[nrows][ncols].
void PosAoS2SoA(int nrows, int ncols, const T *restrict iptr, int lda, T *restrict out, int ldb)
General conversion function from AoS[nrows][ncols] to SoA[ncols][ldb].
#define ASSUME_ALIGNED(x)