15 #ifndef QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H 16 #define QMCPLUSPLUS_SOA_FAST_PARTICLE_OPERATORS_H 27 template<
class T1,
unsigned D>
struct PosTransformer { };
32 struct PosTransformer<T,3>
34 using Array_t=VectorSoaContainer<T,3>;
35 using Transformer_t=Tensor<T,3>;
38 apply(
const Array_t& pin,
const Transformer_t& X, Array_t& pout,
int first,
int last)
40 const int n=last-first;
41 T x00=X[0],x01=X[1],x02=X[2],
42 x10=X[3],x11=X[4],x12=X[5],
43 x20=X[6],x21=X[7],x22=X[8];
51 for(
int i=0; i<
n; i++)
53 x_out[i]=x_in[i]*x00+y_in[i]*x10+z_in[i]*x20;
54 y_out[i]=x_in[i]*x01+y_in[i]*x11+z_in[i]*x21;
55 z_out[i]=x_in[i]*x02+y_in[i]*x12+z_in[i]*x22;
60 apply(
const Transformer_t& X,
const Array_t& pin, Array_t& pout,
int first,
int last)
62 ::apply(pin,X,pout,first,last);
66 apply(Array_t& pinout,
const Transformer_t& X,
int first,
int last)
68 const int n=last-first;
69 T x00=X[0],x01=X[1],x02=X[2],
70 x10=X[3],x11=X[4],x12=X[5],
71 x20=X[6],x21=X[7],x22=X[8];
76 for(
int i=0; i<
n; i++)
78 T x=x_inout[i]*x00+y_inout[i]*x10+z_inout[i]*x20;
79 T y=x_inout[i]*x01+y_inout[i]*x11+z_inout[i]*x21;
80 T z=x_inout[i]*x02+y_inout[i]*x12+z_inout[i]*x22;
88 apply(
const Transformer_t& X, Array_t& pinout,
int first,
int last)
90 ::apply(X,pinout,first,last);
106 void PosAoS2SoA(
int nrows,
int ncols,
const T* restrict iptr,
int lda, T* restrict out,
int ldb)
109 T* restrict y = out + ldb;
110 T* restrict z = out + 2 * ldb;
111 #if !defined(__ibmxl__) 112 #pragma omp simd aligned(x, y, z: QMC_SIMD_ALIGNMENT) 114 for (
int i = 0; i < nrows; ++i)
116 x[i] = iptr[i * ncols];
117 y[i] = iptr[i * ncols + 1];
118 z[i] = iptr[i * ncols + 2];
133 void PosSoA2AoS(
int nrows,
int ncols,
const T* restrict iptr,
int lda, T* restrict out,
int ldb)
135 const T* restrict x = iptr;
136 const T* restrict y = iptr +
lda;
137 const T* restrict z = iptr + 2 *
lda;
138 #if !defined(__ibmxl__) 139 #pragma omp simd aligned(x, y, z: QMC_SIMD_ALIGNMENT) 141 for (
int i = 0; i < nrows; ++i)
144 out[i * ldb + 1] = y[i];
145 out[i * ldb + 2] = z[i];
153 void PosAoS2SoA(
int nrows,
int ncols,
const double* restrict in,
int lda,
double* restrict out,
int ldb)
155 const double zone={1.0};
156 mkl_domatcopy(
'R',
'T',nrows,ncols,
zone,in,
lda,out,ldb);
161 void PosAoS2SoA(
int nrows,
int ncols,
const float* restrict in,
int lda,
float* restrict out,
int ldb)
163 const float zone={1.0f};
164 mkl_somatcopy(
'R',
'T',nrows,ncols,
zone,in,
lda,out,ldb);
169 void PosSoA2AoS(
int nrows,
int ncols,
const double* restrict in,
int lda,
double* restrict out,
int ldb)
171 const double zone={1.0};
172 mkl_domatcopy(
'R',
'T',nrows,ncols,
zone,in,
lda,out,ldb);
177 void PosSoA2AoS(
int nrows,
int ncols,
const float* restrict in,
int lda,
float* restrict out,
int ldb)
179 const float zone={1.0f};
180 mkl_somatcopy(
'R',
'T',nrows,ncols,
zone,in,
lda,out,ldb);
helper functions for EinsplineSetBuilder
constexpr std::complex< double > zone
void PosSoA2AoS(int nrows, int ncols, const T *restrict iptr, int lda, T *restrict out, int ldb)
General conversion function from SoA[ncols][ldb] to AoS[nrows][ncols].
void PosAoS2SoA(int nrows, int ncols, const T *restrict iptr, int lda, T *restrict out, int ldb)
General conversion function from AoS[nrows][ncols] to SoA[ncols][ldb].
#define ASSUME_ALIGNED(x)