19 template<
typename ST,
typename TT>
23 TT* restrict results_scratch_ptr,
24 const ST* restrict offload_scratch_ptr,
25 const ST* restrict myKcart_ptr,
26 size_t myKcart_padded_size,
31 const ST* restrict kx = myKcart_ptr;
32 const ST* restrict ky = myKcart_ptr + myKcart_padded_size;
33 const ST* restrict kz = myKcart_ptr + myKcart_padded_size * 2;
35 const ST* restrict val = offload_scratch_ptr;
36 TT* restrict psi_s = results_scratch_ptr;
38 const size_t jr = index << 1;
39 const size_t ji = jr + 1;
41 ST
s, c, p = -(x * kx[index] + y * ky[index] + z * kz[index]);
44 const ST val_r = val[jr];
45 const ST val_i = val[ji];
46 const size_t psiIndex = first_spo + index +
omptarget::min(index, nComplexBands);
47 psi_s[psiIndex] = val_r * c - val_i *
s;
48 if (index < nComplexBands)
49 psi_s[psiIndex + 1] = val_i * c + val_r *
s;
54 template<
typename ST,
typename TT>
58 TT* restrict results_scratch_ptr,
59 size_t orb_padded_size,
61 const ST* restrict offload_scratch_ptr,
62 size_t spline_padded_size,
64 const ST* myKcart_ptr,
65 size_t myKcart_padded_size,
71 const ST &g00 = G[0], &g01 = G[1], &g02 = G[2], &g10 = G[3], &g11 = G[4], &g12 = G[5], &g20 = G[6], &g21 = G[7],
74 const ST* restrict k0 = myKcart_ptr;
75 const ST* restrict k1 = myKcart_ptr + myKcart_padded_size;
76 const ST* restrict k2 = myKcart_ptr + myKcart_padded_size * 2;
78 const ST* restrict val = offload_scratch_ptr + spline_padded_size *
SoAFields3D::VAL;
82 const ST* restrict lcart = offload_scratch_ptr + spline_padded_size *
SoAFields3D::LAPL;
84 const size_t jr = index << 1;
85 const size_t ji = jr + 1;
87 const ST kX = k0[index];
88 const ST kY = k1[index];
89 const ST kZ = k2[index];
90 const ST val_r = val[jr];
91 const ST val_i = val[ji];
94 ST
s, c, p = -(x * kX + y * kY + z * kZ);
98 const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
99 const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
100 const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
102 const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
103 const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
104 const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
107 const ST gX_r = dX_r + val_i * kX;
108 const ST gY_r = dY_r + val_i * kY;
109 const ST gZ_r = dZ_r + val_i * kZ;
110 const ST gX_i = dX_i - val_r * kX;
111 const ST gY_i = dY_i - val_r * kY;
112 const ST gZ_i = dZ_i - val_r * kZ;
114 const ST lap_r = lcart[jr] + mKK_ptr[index] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
115 const ST lap_i = lcart[ji] + mKK_ptr[index] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
117 TT* restrict psi = results_scratch_ptr;
118 TT* restrict dpsi_x = results_scratch_ptr + orb_padded_size;
119 TT* restrict dpsi_y = results_scratch_ptr + orb_padded_size * 2;
120 TT* restrict dpsi_z = results_scratch_ptr + orb_padded_size * 3;
121 TT* restrict d2psi = results_scratch_ptr + orb_padded_size * 4;
123 const size_t psiIndex = first_spo + index +
omptarget::min(index, nComplexBands);
125 psi[psiIndex] = c * val_r -
s * val_i;
126 d2psi[psiIndex] = c * lap_r -
s * lap_i;
127 dpsi_x[psiIndex] = c * gX_r -
s * gX_i;
128 dpsi_y[psiIndex] = c * gY_r -
s * gY_i;
129 dpsi_z[psiIndex] = c * gZ_r -
s * gZ_i;
131 if (index < nComplexBands)
133 psi[psiIndex + 1] = c * val_i +
s * val_r;
134 d2psi[psiIndex + 1] = c * lap_i +
s * lap_r;
135 dpsi_x[psiIndex + 1] = c * gX_i +
s * gX_r;
136 dpsi_y[psiIndex + 1] = c * gY_i +
s * gY_r;
137 dpsi_z[psiIndex + 1] = c * gZ_i +
s * gZ_r;
helper functions for EinsplineSetBuilder
void assign_vgl(ST x, ST y, ST z, TT *restrict results_scratch_ptr, size_t orb_padded_size, const ST *mKK_ptr, const ST *restrict offload_scratch_ptr, size_t spline_padded_size, const ST G[9], const ST *myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int nComplexBands, int index)
assign_vgl
void assign_v(ST x, ST y, ST z, TT *restrict results_scratch_ptr, const ST *restrict offload_scratch_ptr, const ST *restrict myKcart_ptr, size_t myKcart_padded_size, size_t first_spo, int nComplexBands, int index)
void sincos(T a, T *restrict s, T *restrict c)
sincos function wrapper
handle math function mapping inside OpenMP offload regions.