28 void test_gemv(
const int M_b,
const int N_b,
const char trans)
30 const int M = trans ==
'T' ? M_b : N_b;
31 const int N = trans ==
'T' ? N_b : M_b;
33 using vec_t = Vector<T, PinnedDualAllocator<T>>;
34 using mat_t = Matrix<T, PinnedDualAllocator<T>>;
44 for (
int i = 0; i <
N; i++)
47 for (
int j = 0; j < M_b; j++)
48 for (
int i = 0; i < N_b; i++)
52 for (
int i = 0; i < M; i++)
65 syclBLAS::gemv(handle, trans, N_b, M_b, alpha,
B.device_data(), N_b,
A.device_data(), 1, beta,
C.device_data(), 1)
74 for (
int index = 0; index < M; index++)
75 CHECK(
C[index] == D[index]);
79 void test_gemv_batched(
const int M_b,
const int N_b,
const char trans,
const int batch_count)
81 const int M = trans ==
'T' ? M_b : N_b;
82 const int N = trans ==
'T' ? N_b : M_b;
84 using vec_t = Vector<T, PinnedDualAllocator<T>>;
85 using mat_t = Matrix<T, PinnedDualAllocator<T>>;
90 std::vector<vec_t> As;
91 Vector<const T*, PinnedDualAllocator<const T*>> Aptrs;
94 std::vector<mat_t> Bs;
95 Vector<const T*, PinnedDualAllocator<const T*>> Bptrs;
98 std::vector<vec_t> Cs;
99 Vector<T*, PinnedDualAllocator<T*>> Cptrs;
102 std::vector<vec_t> Ds;
103 Vector<T*, PinnedDualAllocator<T*>> Dptrs;
106 Aptrs.resize(batch_count);
107 Bptrs.resize(batch_count);
108 Cptrs.resize(batch_count);
109 Dptrs.resize(batch_count);
112 As.resize(batch_count);
113 Bs.resize(batch_count);
114 Cs.resize(batch_count);
115 Ds.resize(batch_count);
118 for (
int batch = 0; batch < batch_count; batch++)
121 Aptrs[batch] = As[batch].device_data();
123 Bs[batch].resize(M_b, N_b);
124 Bptrs[batch] = Bs[batch].device_data();
127 Cptrs[batch] = Cs[batch].device_data();
130 Dptrs[batch] = Ds[batch].data();
132 for (
int i = 0; i <
N; i++)
135 for (
int j = 0; j < M_b; j++)
136 for (
int i = 0; i < N_b; i++)
137 Bs[batch][j][i] = i + j * 2;
139 for (
int i = 0; i < M; i++)
140 Cs[batch][i] = Ds[batch][i] = T(0);
142 As[batch].updateTo();
143 Bs[batch].updateTo();
151 Vector<T, PinnedDualAllocator<T>> alpha(batch_count);
152 Vector<T, PinnedDualAllocator<T>> beta(batch_count);
153 Vector<T, PinnedDualAllocator<T>> beta1(batch_count);
155 for (
int batch = 0; batch < batch_count; batch++)
157 alpha[batch] = T(0.5);
167 syclBLAS::gemv_batched(handle, trans, N_b, M_b, alpha.device_data(), Bptrs.device_data(), N_b, Aptrs.device_data(), 1,
168 beta.device_data(), Cptrs.device_data(), 1, batch_count)
171 syclBLAS::gemv_batched(handle, trans, N_b, M_b, alpha.device_data(), Bptrs.device_data(), N_b, Aptrs.device_data(), 1,
172 beta1.device_data(), Cptrs.device_data(), 1, batch_count)
175 for (
int batch = 0; batch < batch_count; batch++)
177 Cs[batch].updateFrom();
179 BLAS::gemv_trans(M_b, N_b, Bs[batch].data(), As[batch].data(), Ds[batch].data());
181 BLAS::gemv(M_b, N_b, Bs[batch].data(), As[batch].data(), Ds[batch].data());
184 for (
int index = 0; index < M; index++)
185 CHECK(Cs[batch][index] == Ds[batch][index]);
193 const int batch_count = 23;
196 std::cout <<
"Testing TRANS gemv" << std::endl;
197 test_gemv<float>(M,
N,
'T');
198 test_gemv<double>(M,
N,
'T');
199 #if defined(QMC_COMPLEX) 200 test_gemv<std::complex<float>>(
N, M,
'T');
201 test_gemv<std::complex<double>>(
N, M,
'T');
204 std::cout <<
"Testing TRANS gemv_batched" << std::endl;
205 test_gemv_batched<float>(M,
N,
'T', batch_count);
206 test_gemv_batched<double>(M,
N,
'T', batch_count);
207 #if defined(QMC_COMPLEX) 208 test_gemv_batched<std::complex<float>>(
N, M,
'T', batch_count);
209 test_gemv_batched<std::complex<double>>(
N, M,
'T', batch_count);
217 const int batch_count = 23;
220 std::cout <<
"Testing NOTRANS gemv" << std::endl;
221 test_gemv<float>(M,
N,
'N');
222 test_gemv<double>(M,
N,
'N');
223 #if defined(QMC_COMPLEX) 224 test_gemv<std::complex<float>>(
N, M,
'N');
225 test_gemv<std::complex<double>>(
N, M,
'N');
228 std::cout <<
"Testing NOTRANS gemv_batched" << std::endl;
229 test_gemv_batched<float>(M,
N,
'N', batch_count);
230 test_gemv_batched<double>(M,
N,
'N', batch_count);
231 #if defined(QMC_COMPLEX) 232 test_gemv_batched<std::complex<float>>(
N, M,
'N', batch_count);
233 test_gemv_batched<std::complex<double>>(
N, M,
'N', batch_count);
240 using vec_t = Vector<T, PinnedDualAllocator<T>>;
241 using mat_t = Matrix<T, PinnedDualAllocator<T>>;
246 std::vector<vec_t> Xs;
247 Vector<const T*, PinnedDualAllocator<const T*>> Xptrs;
248 std::vector<vec_t> Ys;
249 Vector<const T*, PinnedDualAllocator<const T*>> Yptrs;
252 std::vector<mat_t> Ahs;
253 Vector<T*, PinnedDualAllocator<T*>> Ahptrs;
254 std::vector<mat_t> Ads;
255 Vector<T*, PinnedDualAllocator<T*>> Adptrs;
258 Xptrs.resize(batch_count);
259 Yptrs.resize(batch_count);
260 Ahptrs.resize(batch_count);
261 Adptrs.resize(batch_count);
264 Xs.resize(batch_count);
265 Ys.resize(batch_count);
266 Ahs.resize(batch_count);
267 Ads.resize(batch_count);
270 for (
int batch = 0; batch < batch_count; batch++)
273 Xptrs[batch] = Xs[batch].device_data();
276 Yptrs[batch] = Ys[batch].device_data();
278 Ads[batch].resize(M,
N);
279 Adptrs[batch] = Ads[batch].device_data();
281 Ahs[batch].resize(M,
N);
282 Ahptrs[batch] = Ahs[batch].data();
285 for (
int i = 0; i < M; i++)
287 for (
int i = 0; i <
N; i++)
288 Ys[batch][i] =
N - i;
290 for (
int j = 0; j < M; j++)
291 for (
int i = 0; i <
N; i++)
293 Ads[batch][j][i] = i + j * 2;
294 Ahs[batch][j][i] = i + j * 2;
297 Xs[batch].updateTo();
298 Ys[batch].updateTo();
299 Ads[batch].updateTo();
307 Vector<T, PinnedDualAllocator<T>> alpha(batch_count);
309 for (
int batch = 0; batch < batch_count; batch++)
317 Adptrs.device_data(), M, batch_count)
320 for (
int batch = 0; batch < batch_count; batch++)
322 Ads[batch].updateFrom();
323 BLAS::ger(M,
N, alpha[batch], Xs[batch].data(), 1, Ys[batch].data(), 1, Ahs[batch].data(), M);
326 for (
int j = 0; j < M; j++)
327 for (
int i = 0; i <
N; i++)
328 CHECK(Ads[batch][j][i] == Ahs[batch][j][i]);
336 const int batch_count = 23;
339 std::cout <<
"Testing ger_batched" << std::endl;
340 test_ger_batched<float>(M,
N, batch_count);
341 test_ger_batched<double>(M,
N, batch_count);
342 #if defined(QMC_COMPLEX) 343 test_ger_batched<std::complex<float>>(
N, M, batch_count);
344 test_ger_batched<std::complex<double>>(
N, M, batch_count);
void test_ger_batched(const int M, const int N, const int batch_count)
sycl::queue & getSYCLDefaultDeviceDefaultQueue()
return a reference to the per-device default queue
static void gemv_trans(int n, int m, const double *restrict amat, const double *restrict x, double *restrict y)
TinyVector< double, 3 > vec_t
helper functions for EinsplineSetBuilder
sycl::event ger_batched(sycl::queue &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const size_t batch_count, const std::vector< sycl::event > &events={})
in-house version of ger_batch implemented in SYCL. Can be dropped if we have vendor optimized version...
TEST_CASE("complex_helper", "[type_traits]")
static void gemv(int n, int m, const double *restrict amat, const double *restrict x, double *restrict y)
sycl::event gemv_batched(sycl::queue &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const size_t batch_count, const std::vector< sycl::event > &events={})
in-house version of gemv_batch implemented in SYCL. Can be dropped if we have vendor optimized versio...
These allocators are to make code that should be generic with the respect to accelerator code flavor ...
static void ger(int m, int n, double alpha, const double *x, int incx, const double *y, int incy, double *a, int lda)
sycl::event gemv(sycl::queue &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy, const std::vector< sycl::event > &events)
Declaraton of Vector<T,Alloc> Manage memory through Alloc directly and allow referencing an existing ...
CHECK(log_values[0]==ComplexApprox(std::complex< double >{ 5.603777579195571, -6.1586603331188225 }))
void test_gemv_batched(const int M_b, const int N_b, const char trans, const int batch_count)
double B(double x, int k, int i, const std::vector< double > &t)
void test_gemv(const int M_b, const int N_b, const char trans)