QMCPACK
ompBLAS.hpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2020 QMCPACK developers.
6 //
7 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 
13 #ifndef QMCPLUSPLUS_OMPBLAS_H
14 #define QMCPLUSPLUS_OMPBLAS_H
15 
16 #include <complex>
17 
18 namespace qmcplusplus
19 {
20 /** Implement selected batched and non-batched BLAS2 calls using OpenMP offload for different data types S/C/D/Z
21  * 1) column major like the BLAS fortran API
22  * 2) all the functions are synchronous, expected to be changed to asynchronous in the future.
23  * 3) all the pointer arguments are expected as device pointers.
24  * 4) in batched APIs, alpha and beta are **not** scalars but pointers to array of batch size.
25  */
26 namespace ompBLAS
27 {
28 
29 using ompBLAS_status = int;
30 using ompBLAS_handle = int;
31 
32 template<typename T>
34  const char transa,
35  const char transb,
36  const int M,
37  const int N,
38  const int K,
39  const T& alpha,
40  const T* const A,
41  const int lda,
42  const T* const B,
43  const int ldb,
44  const T& beta,
45  T* const C,
46  const int ldc);
47 
48 template<typename T>
50  const char transa,
51  const char transb,
52  const int M,
53  const int N,
54  const int K,
55  const T& alpha,
56  const T* const A[],
57  const int lda,
58  const T* const B[],
59  const int ldb,
60  const T& beta,
61  T* const C[],
62  const int ldc,
63  const int batch_count);
64 
65 template<typename T>
67  const char trans,
68  const int m,
69  const int n,
70  const T alpha,
71  const T* const A,
72  const int lda,
73  const T* const x,
74  const int incx,
75  const T beta,
76  T* const y,
77  const int incy);
78 
79 template<typename T>
81  const char trans,
82  const int m,
83  const int n,
84  const T* alpha,
85  const T* const A[],
86  const int lda,
87  const T* const x[],
88  const int incx,
89  const T* beta,
90  T* const y[],
91  const int incy,
92  const int batch_count);
93 
94 template<typename T>
96  const int m,
97  const int n,
98  const T alpha,
99  const T* const x,
100  const int incx,
101  const T* const y,
102  const int incy,
103  T* const A,
104  const int lda);
105 
106 template<typename T>
108  const int m,
109  const int n,
110  const T* alpha,
111  const T* const x[],
112  const int incx,
113  const T* const y[],
114  const int incy,
115  T* const A[],
116  const int lda,
117  const int batch_count);
118 
119 /**
120  * @brief copy device data from x to y
121  *
122  * for b_i in [0,batch_count)
123  * for i in [0,n)
124  * y[b_i][i*incy] = x[b_i][i*incx]
125  *
126  * @param n number of elements to copy for each group in the batch
127  * @param x,y arrays with length `batch_count`; device pointers to start of data to be copied from(x)/to(y)
128  * @param incx,incy storage spacing between elements of x/y to be copied from/to
129  * @param batch_count number of batches to process
130  */
131 template<typename T>
133  const int n,
134  const T* const x[],
135  const int incx,
136  T* const y[],
137  const int incy,
138  const int batch_count);
139 
140 /**
141  * @brief copy device data from x to y with additional offset applied to array of device pointers
142  *
143  * for b_i in [0,batch_count)
144  * for i in [0,n)
145  * y[b_i][y_offset + i*incy] = x[b_i][x_offset + i*incx]
146  *
147  * useful for copying from/to a single row/column of a batch of matrices when a list of device pointers
148  * to the start of the matrices is already available
149  *
150  * @param n number of elements to copy for each group in the batch
151  * @param x,y arrays with length `batch_count`; device pointers to start of data to be copied from(x)/to(y)
152  * @param x_offset,y_offset distance (in number of elements) from pointer given in x/y to location of first element to be copied
153  * @param incx,incy storage spacing between elements of x/y to be copied from/to
154  * @param batch_count number of batches to process
155  */
156 template<typename T>
158  const int n,
159  const T* const x[],
160  const int x_offset,
161  const int incx,
162  T* const y[],
163  const int y_offset,
164  const int incy,
165  const int batch_count);
166 
167 template<typename T>
168 ompBLAS_status copy(ompBLAS_handle& handle, const int n, const T* const x, const int incx, T* const y, const int incy);
169 
170 } // namespace ompBLAS
171 
172 } // namespace qmcplusplus
173 #endif // QMCPLUSPLUS_OMPBLAS_H
ompBLAS_status gemm(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A, const int lda, const T *const B, const int ldb, const T &beta, T *const C, const int ldc)
ompBLAS_status gemv(ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
ompBLAS_status gemv_batched(ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
ompBLAS_status gemm_batched(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A[], const int lda, const T *const B[], const int ldb, const T &beta, T *const C[], const int ldc, const int batch_count)
ompBLAS_status copy_batched_offset(ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
copy device data from x to y with additional offset applied to array of device pointers ...
ompBLAS_status ger_batched(ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
ompBLAS_status ger(ompBLAS_handle &handle, const int m, const int n, const T alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
ompBLAS_status copy(ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
double B(double x, int k, int i, const std::vector< double > &t)
ompBLAS_status copy_batched(ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
copy device data from x to y