QMCPACK
qmcplusplus::compute::BLAS Namespace Reference

Functions

void gemm (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const float &alpha, const float *A, int lda, const float *B, int ldb, const float &beta, float *C, int ldc)
 
void gemm (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const double &alpha, const double *A, int lda, const double *B, int ldb, const double &beta, double *C, int ldc)
 
void gemm (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const std::complex< float > &alpha, const std::complex< float > *A, int lda, const std::complex< float > *B, int ldb, const std::complex< float > &beta, std::complex< float > *C, int ldc)
 
void gemm (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const std::complex< double > &alpha, const std::complex< double > *A, int lda, const std::complex< double > *B, int ldb, const std::complex< double > &beta, std::complex< double > *C, int ldc)
 
void gemv (BLASHandle< PlatformKind::CUDA > &handle, const char trans, const int m, const int n, const float &alpha, const float *const A, const int lda, const float *const x, const int incx, const float &beta, float *const y, const int incy)
 
void gemv (BLASHandle< PlatformKind::CUDA > &handle, const char trans, const int m, const int n, const double &alpha, const double *const A, const int lda, const double *const x, const int incx, const double &beta, double *const y, const int incy)
 
void gemv (BLASHandle< PlatformKind::CUDA > &handle, const char trans, const int m, const int n, const std::complex< float > &alpha, const std::complex< float > *A, const int lda, const std::complex< float > *x, const int incx, const std::complex< float > &beta, std::complex< float > *y, const int incy)
 
void gemv (BLASHandle< PlatformKind::CUDA > &handle, const char trans, const int m, const int n, const std::complex< double > &alpha, const std::complex< double > *A, const int lda, const std::complex< double > *x, const int incx, const std::complex< double > &beta, std::complex< double > *y, const int incy)
 
template<typename T >
void gemv_batched (BLASHandle< PlatformKind::CUDA > &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
 
void ger (BLASHandle< PlatformKind::CUDA > &handle, const int m, const int n, const float &alpha, const float *const x, const int incx, const float *const y, const int incy, float *const A, const int lda)
 
void ger (BLASHandle< PlatformKind::CUDA > &handle, const int m, const int n, const double &alpha, const double *const x, const int incx, const double *const y, const int incy, double *const A, const int lda)
 
void ger (BLASHandle< PlatformKind::CUDA > &handle, const int m, const int n, const std::complex< float > &alpha, const std::complex< float > *x, const int incx, const std::complex< float > *y, const int incy, std::complex< float > *A, const int lda)
 
void ger (BLASHandle< PlatformKind::CUDA > &handle, const int m, const int n, const std::complex< double > &alpha, const std::complex< double > *x, const int incx, const std::complex< double > *y, const int incy, std::complex< double > *A, const int lda)
 
template<typename T >
void ger_batched (BLASHandle< PlatformKind::CUDA > &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
 
template<typename T >
void copy_batched (BLASHandle< PlatformKind::CUDA > &handle, const int n, const T *const in[], const int incx, T *const out[], const int incy, const int batch_count)
 
void gemm_batched (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const float &alpha, const float *const A[], int lda, const float *const B[], int ldb, const float &beta, float *const C[], int ldc, int batchCount)
 
void gemm_batched (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const std::complex< float > &alpha, const std::complex< float > *const A[], int lda, const std::complex< float > *const B[], int ldb, const std::complex< float > &beta, std::complex< float > *const C[], int ldc, int batchCount)
 
void gemm_batched (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const double &alpha, const double *const A[], int lda, const double *const B[], int ldb, const double &beta, double *const C[], int ldc, int batchCount)
 
void gemm_batched (BLASHandle< PlatformKind::CUDA > &handle, const char transa, const char transb, int m, int n, int k, const std::complex< double > &alpha, const std::complex< double > *const A[], int lda, const std::complex< double > *const B[], int ldb, const std::complex< double > &beta, std::complex< double > *const C[], int ldc, int batchCount)
 
template<typename T >
void gemm (BLASHandle< PlatformKind::OMPTARGET > &handle, const char transa, const char transb, int m, int n, int k, const T &alpha, const T *A, int lda, const T *B, int ldb, const T &beta, T *C, int ldc)
 
template<typename T >
void gemm_batched (BLASHandle< PlatformKind::OMPTARGET > &handle, const char transa, const char transb, int m, int n, int k, const T &alpha, const T *const A[], int lda, const T *const B[], int ldb, const T &beta, T *const C[], int ldc, int batchCount)
 
template<typename T >
void gemv (BLASHandle< PlatformKind::OMPTARGET > &handle, const char trans, const int m, const int n, const T &alpha, const T *const A, const int lda, const T *const x, const int incx, const T &beta, T *const y, const int incy)
 
template<typename T >
void gemv_batched (BLASHandle< PlatformKind::OMPTARGET > &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
 
template<typename T >
void ger (BLASHandle< PlatformKind::OMPTARGET > &handle, const int m, const int n, const T &alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
 
template<typename T >
void ger_batched (BLASHandle< PlatformKind::OMPTARGET > &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
 
template<typename T >
void copy_batched (BLASHandle< PlatformKind::OMPTARGET > &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
 
template<typename T >
void gemm (BLASHandle< PlatformKind::SYCL > &handle, const char transa, const char transb, int m, int n, int k, const T &alpha, const T *A, int lda, const T *B, int ldb, const T &beta, T *C, int ldc)
 
template<typename T >
void gemv (BLASHandle< PlatformKind::SYCL > &handle, const char trans, const int m, const int n, const T &alpha, const T *const A, const int lda, const T *const x, const int incx, const T &beta, T *const y, const int incy)
 
template<typename T >
void gemv_batched (BLASHandle< PlatformKind::SYCL > &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const size_t batch_count)
 
template<typename T >
void ger (BLASHandle< PlatformKind::SYCL > &handle, const int m, const int n, const T &alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
 
template<typename T >
void ger_batched (BLASHandle< PlatformKind::SYCL > &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const size_t batch_count)
 
template<typename T >
void copy_batched (BLASHandle< PlatformKind::SYCL > &handle, syclBLAS::syclBLAS_int n, const T *const in[], syclBLAS::syclBLAS_int incx, T *const out[], syclBLAS::syclBLAS_int incy, const size_t batch_count)
 
template<typename T >
void gemm_batched (BLASHandle< PlatformKind::SYCL > &handle, const char transa, const char transb, syclBLAS::syclBLAS_int m, syclBLAS::syclBLAS_int n, syclBLAS::syclBLAS_int k, const T &alpha, const T *const A[], syclBLAS::syclBLAS_int lda, const T *const B[], syclBLAS::syclBLAS_int ldb, const T &beta, T *const C[], syclBLAS::syclBLAS_int ldc, const size_t batch_count)
 

Function Documentation

◆ copy_batched() [1/3]

void qmcplusplus::compute::BLAS::copy_batched ( BLASHandle< PlatformKind::OMPTARGET > &  handle,
const int  n,
const T *const  x[],
const int  incx,
T *const  y[],
const int  incy,
const int  batch_count 
)
inline

Definition at line 147 of file AccelBLAS_OMPTarget.hpp.

References qmcplusplus::ompBLAS::copy_batched(), BLASHandle< PlatformKind::OMPTARGET >::h_ompblas, and qmcplusplus::n.

154 {
155  if (ompBLAS::copy_batched(handle.h_ompblas, n, x, incx, y, incy, batch_count) != 0)
156  throw std::runtime_error("ompBLAS::copy_batched failed!");
157 }
void copy_batched(BLASHandle< PlatformKind::OMPTARGET > &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)

◆ copy_batched() [2/3]

void qmcplusplus::compute::BLAS::copy_batched ( BLASHandle< PlatformKind::SYCL > &  handle,
syclBLAS::syclBLAS_int  n,
const T *const  in[],
syclBLAS::syclBLAS_int  incx,
T *const  out[],
syclBLAS::syclBLAS_int  incy,
const size_t  batch_count 
)
inline

Definition at line 155 of file AccelBLAS_SYCL.hpp.

References qmcplusplus::Units::charge::e, qmcplusplus::n, and BLASHandle< PlatformKind::SYCL >::queue_.

162 {
163  try
164  {
165  syclBLAS::syclBLAS_int bc = batch_count;
166  oneapi::mkl::blas::copy_batch(handle.queue_, &n, const_cast<const T**>(in), &incx, const_cast<T**>(out), &incy, 1,
167  &bc);
168  }
169  catch (oneapi::mkl::exception& e)
170  {
171  throw std::runtime_error(std::string("AccelBLAS::copy_batch exception: ") + e.what());
172  }
173 }
std::int64_t syclBLAS_int
Definition: syclBLAS.hpp:24

◆ copy_batched() [3/3]

void qmcplusplus::compute::BLAS::copy_batched ( BLASHandle< PlatformKind::CUDA > &  handle,
const int  n,
const T *const  in[],
const int  incx,
T *const  out[],
const int  incy,
const int  batch_count 
)
inline

Definition at line 303 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::cuBLAS_MFs::copy_batched(), qmcplusplus::cudaErrorCheck(), BLASHandle< PlatformKind::CUDA >::h_stream, and qmcplusplus::n.

Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), and DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow().

310 {
311  cudaErrorCheck(cuBLAS_MFs::copy_batched(handle.h_stream, n, in, incx, out, incy, batch_count),
312  "cuBLAS_MFs::copy_batched failed!");
313 }
#define cudaErrorCheck(ans, cause)
Definition: CUDAerror.h:21
void copy_batched(BLASHandle< PlatformKind::CUDA > &handle, const int n, const T *const in[], const int incx, T *const out[], const int incy, const int batch_count)

◆ gemm() [1/6]

void qmcplusplus::compute::BLAS::gemm ( BLASHandle< PlatformKind::SYCL > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const T &  alpha,
const T *  A,
int  lda,
const T *  B,
int  ldb,
const T &  beta,
T *  C,
int  ldc 
)
inline

Definition at line 33 of file AccelBLAS_SYCL.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::syclBLAS::convertTransEnum(), qmcplusplus::Units::charge::e, qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::n, and BLASHandle< PlatformKind::SYCL >::queue_.

47 {
48  try
49  {
51  k, alpha, A, lda, B, ldb, beta, C, ldc);
52  }
53  catch (oneapi::mkl::exception& e)
54  {
55  throw std::runtime_error(std::string("AccelBLAS::gemm exception: ") + e.what());
56  }
57 }
oneapi::mkl::transpose convertTransEnum(char trans)
Definition: syclBLAS.hpp:28
void gemm(BLASHandle< PlatformKind::SYCL > &handle, const char transa, const char transb, int m, int n, int k, const T &alpha, const T *A, int lda, const T *B, int ldb, const T &beta, T *C, int ldc)
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm() [2/6]

void qmcplusplus::compute::BLAS::gemm ( BLASHandle< PlatformKind::OMPTARGET > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const T &  alpha,
const T *  A,
int  lda,
const T *  B,
int  ldb,
const T &  beta,
T *  C,
int  ldc 
)
inline

Definition at line 34 of file AccelBLAS_OMPTarget.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::ompBLAS::gemm(), BLASHandle< PlatformKind::OMPTARGET >::h_ompblas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

48 {
49  if (ompBLAS::gemm(handle.h_ompblas, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) != 0)
50  throw std::runtime_error("ompBLAS::gemm failed!");
51 }
void gemm(BLASHandle< PlatformKind::OMPTARGET > &handle, const char transa, const char transb, int m, int n, int k, const T &alpha, const T *A, int lda, const T *B, int ldb, const T &beta, T *C, int ldc)
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm() [3/6]

void qmcplusplus::compute::BLAS::gemm ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const float &  alpha,
const float *  A,
int  lda,
const float *  B,
int  ldb,
const float &  beta,
float *  C,
int  ldc 
)
inline

◆ gemm() [4/6]

void qmcplusplus::compute::BLAS::gemm ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const double &  alpha,
const double *  A,
int  lda,
const double *  B,
int  ldb,
const double &  beta,
double *  C,
int  ldc 
)
inline

Definition at line 69 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::cuBLAS::convertOperation(), cublasDgemm, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

83 {
85  n, k, &alpha, A, lda, B, ldb, &beta, C, ldc),
86  "cublasDgemm failed!");
87 }
#define cublasDgemm
Definition: cuda2hip.h:52
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
double B(double x, int k, int i, const std::vector< double > &t)
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemm() [5/6]

void qmcplusplus::compute::BLAS::gemm ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const std::complex< float > &  alpha,
const std::complex< float > *  A,
int  lda,
const std::complex< float > *  B,
int  ldb,
const std::complex< float > &  beta,
std::complex< float > *  C,
int  ldc 
)
inline

Definition at line 89 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, castNativeType, qmcplusplus::cuBLAS::convertOperation(), cublasCgemm, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

103 {
105  n, k, castNativeType(&alpha), castNativeType(A), lda, castNativeType(B), ldb,
106  castNativeType(&beta), castNativeType(C), ldc),
107  "cublasCgemm failed!");
108 }
#define cublasCgemm
Definition: cuda2hip.h:45
#define castNativeType
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
double B(double x, int k, int i, const std::vector< double > &t)
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemm() [6/6]

void qmcplusplus::compute::BLAS::gemm ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const std::complex< double > &  alpha,
const std::complex< double > *  A,
int  lda,
const std::complex< double > *  B,
int  ldb,
const std::complex< double > &  beta,
std::complex< double > *  C,
int  ldc 
)
inline

Definition at line 110 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, castNativeType, qmcplusplus::cuBLAS::convertOperation(), cublasErrorCheck, cublasZgemm, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

124 {
126  n, k, castNativeType(&alpha), castNativeType(A), lda, castNativeType(B), ldb,
127  castNativeType(&beta), castNativeType(C), ldc),
128  "cublasZgemm failed!");
129 }
#define castNativeType
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
double B(double x, int k, int i, const std::vector< double > &t)
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34
#define cublasZgemm
Definition: cuda2hip.h:66

◆ gemm_batched() [1/6]

void qmcplusplus::compute::BLAS::gemm_batched ( BLASHandle< PlatformKind::OMPTARGET > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const T &  alpha,
const T *const  A[],
int  lda,
const T *const  B[],
int  ldb,
const T &  beta,
T *const  C[],
int  ldc,
int  batchCount 
)
inline

Definition at line 54 of file AccelBLAS_OMPTarget.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::ompBLAS::gemm_batched(), BLASHandle< PlatformKind::OMPTARGET >::h_ompblas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

69 {
70  if (ompBLAS::gemm_batched(handle.h_ompblas, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc,
71  batchCount) != 0)
72  throw std::runtime_error("ompBLAS::gemm_batched failed!");
73 }
void gemm_batched(BLASHandle< PlatformKind::OMPTARGET > &handle, const char transa, const char transb, int m, int n, int k, const T &alpha, const T *const A[], int lda, const T *const B[], int ldb, const T &beta, T *const C[], int ldc, int batchCount)
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm_batched() [2/6]

void qmcplusplus::compute::BLAS::gemm_batched ( BLASHandle< PlatformKind::SYCL > &  handle,
const char  transa,
const char  transb,
syclBLAS::syclBLAS_int  m,
syclBLAS::syclBLAS_int  n,
syclBLAS::syclBLAS_int  k,
const T &  alpha,
const T *const  A[],
syclBLAS::syclBLAS_int  lda,
const T *const  B[],
syclBLAS::syclBLAS_int  ldb,
const T &  beta,
T *const  C[],
syclBLAS::syclBLAS_int  ldc,
const size_t  batch_count 
)
inline

Definition at line 176 of file AccelBLAS_SYCL.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::syclBLAS::convertTransEnum(), qmcplusplus::Units::charge::e, qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::n, and BLASHandle< PlatformKind::SYCL >::queue_.

191 {
192  auto trans_a = syclBLAS::convertTransEnum(transa);
193  auto trans_b = syclBLAS::convertTransEnum(transb);
194  try
195  {
196 #if defined(GEMM_BATCH_SPAN)
197  sycl::span alpha_span(sycl::malloc_shared<T>(1, handle.queue_), 1);
198  alpha_span[0] = alpha;
199  sycl::span beta_span(sycl::malloc_shared<T>(1, handle.queue_), 1);
200  beta_span[0] = beta;
201 
202  oneapi::mkl::blas::gemm_batch(handle.queue_, sycl::span{&trans_a, 1}, sycl::span{&trans_b, 1}, sycl::span{&m, 1},
203  sycl::span{&n, 1}, sycl::span{&k, 1}, alpha_span,
204  sycl::span{const_cast<const T**>(A), batch_count}, sycl::span{&lda, 1},
205  sycl::span{const_cast<const T**>(B), batch_count}, sycl::span{&ldb, 1}, beta_span,
206  sycl::span{const_cast<T**>(C), batch_count}, sycl::span{&ldc, 1}, 1,
207  sycl::span{const_cast<size_t*>(&batch_count), 1});
208  sycl::free(alpha_span.data(), handle.queue_);
209  sycl::free(beta_span.data(), handle.queue_);
210 #else
211  syclBLAS::syclBLAS_int bc = batch_count;
212  oneapi::mkl::blas::gemm_batch(handle.queue_, &trans_a, &trans_b, &m, &n, &k, const_cast<const T*>(&alpha),
213  const_cast<const T**>(A), &lda, const_cast<const T**>(B), &ldb,
214  const_cast<const T*>(&beta), const_cast<T**>(C), &ldc, 1, &bc);
215 #endif
216  }
217  catch (oneapi::mkl::exception& e)
218  {
219  throw std::runtime_error(std::string("AccelBLAS::gemm_batched exception: ") + e.what());
220  }
221 }
oneapi::mkl::transpose convertTransEnum(char trans)
Definition: syclBLAS.hpp:28
double B(double x, int k, int i, const std::vector< double > &t)
std::int64_t syclBLAS_int
Definition: syclBLAS.hpp:24

◆ gemm_batched() [3/6]

void qmcplusplus::compute::BLAS::gemm_batched ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const float &  alpha,
const float *const  A[],
int  lda,
const float *const  B[],
int  ldb,
const float &  beta,
float *const  C[],
int  ldc,
int  batchCount 
)
inline

Definition at line 315 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::cuBLAS::convertOperation(), cublasErrorCheck, cublasSgemmBatched, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat(), and qmcplusplus::test_one_gemm().

330 {
332  cuBLAS::convertOperation(transb), m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
333  batchCount),
334  "cublasSgemmBatched failed!");
335 }
#define cublasSgemmBatched
Definition: cuda2hip.h:60
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
double B(double x, int k, int i, const std::vector< double > &t)
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemm_batched() [4/6]

void qmcplusplus::compute::BLAS::gemm_batched ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const std::complex< float > &  alpha,
const std::complex< float > *const  A[],
int  lda,
const std::complex< float > *const  B[],
int  ldb,
const std::complex< float > &  beta,
std::complex< float > *const  C[],
int  ldc,
int  batchCount 
)
inline

Definition at line 337 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, castNativeType, qmcplusplus::cuBLAS::convertOperation(), cublasCgemmBatched, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

352 {
353  // This is necessary to not break the complex CUDA type mapping semantics while
354  // dealing with the const cuComplex * A[] style API of cuBLAS
355  // C++ makes you jump through some hoops to remove the bottom const on a double pointer.
356  // see typetraits/type_manipulation.hpp
357  auto non_const_A = const_cast<BottomConstRemoved<decltype(A)>::type>(A);
358  auto non_const_B = const_cast<BottomConstRemoved<decltype(B)>::type>(B);
359  auto non_const_C = const_cast<BottomConstRemoved<decltype(C)>::type>(C);
360 
362  cuBLAS::convertOperation(transb), m, n, k, castNativeType(&alpha),
363  castNativeType(non_const_A), lda, castNativeType(non_const_B), ldb,
364  castNativeType(&beta), castNativeType(non_const_C), ldc, batchCount),
365  "cublasCgemmBatched failed!");
366 }
#define castNativeType
typename std::add_pointer< typename std::remove_const< typename std::remove_pointer< CT >::type >::type >::type type
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
double B(double x, int k, int i, const std::vector< double > &t)
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34
#define cublasCgemmBatched
Definition: cuda2hip.h:46

◆ gemm_batched() [5/6]

void qmcplusplus::compute::BLAS::gemm_batched ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const double &  alpha,
const double *const  A[],
int  lda,
const double *const  B[],
int  ldb,
const double &  beta,
double *const  C[],
int  ldc,
int  batchCount 
)
inline

Definition at line 368 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::cuBLAS::convertOperation(), cublasDgemmBatched, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

383 {
385  cuBLAS::convertOperation(transb), m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc,
386  batchCount),
387  "cublasDgemmBatched failed!");
388 }
#define cublasDgemmBatched
Definition: cuda2hip.h:53
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
double B(double x, int k, int i, const std::vector< double > &t)
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemm_batched() [6/6]

void qmcplusplus::compute::BLAS::gemm_batched ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  transa,
const char  transb,
int  m,
int  n,
int  k,
const std::complex< double > &  alpha,
const std::complex< double > *const  A[],
int  lda,
const std::complex< double > *const  B[],
int  ldb,
const std::complex< double > &  beta,
std::complex< double > *const  C[],
int  ldc,
int  batchCount 
)
inline

Definition at line 390 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, castNativeType, qmcplusplus::cuBLAS::convertOperation(), cublasErrorCheck, cublasZgemmBatched, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

405 {
406  auto non_const_A = const_cast<BottomConstRemoved<decltype(A)>::type>(A);
407  auto non_const_B = const_cast<BottomConstRemoved<decltype(B)>::type>(B);
408  auto non_const_C = const_cast<BottomConstRemoved<decltype(C)>::type>(C);
409 
411  cuBLAS::convertOperation(transb), m, n, k, castNativeType(&alpha),
412  castNativeType(non_const_A), lda, castNativeType(non_const_B), ldb,
413  castNativeType(&beta), castNativeType(non_const_C), ldc, batchCount),
414  "cublasZgemmBatched failed!");
415 }
#define castNativeType
typename std::add_pointer< typename std::remove_const< typename std::remove_pointer< CT >::type >::type >::type type
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
double B(double x, int k, int i, const std::vector< double > &t)
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34
#define cublasZgemmBatched
Definition: cuda2hip.h:67

◆ gemv() [1/6]

void qmcplusplus::compute::BLAS::gemv ( BLASHandle< PlatformKind::SYCL > &  handle,
const char  trans,
const int  m,
const int  n,
const T &  alpha,
const T *const  A,
const int  lda,
const T *const  x,
const int  incx,
const T &  beta,
T *const  y,
const int  incy 
)
inline

Definition at line 60 of file AccelBLAS_SYCL.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::syclBLAS::convertTransEnum(), qmcplusplus::Units::charge::e, qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::n, and BLASHandle< PlatformKind::SYCL >::queue_.

72 {
73  try
74  {
75  oneapi::mkl::blas::gemv(handle.queue_, syclBLAS::convertTransEnum(trans), m, n, alpha, A, lda, x, incx, beta, y,
76  incy);
77  }
78  catch (oneapi::mkl::exception& e)
79  {
80  throw std::runtime_error(std::string("AccelBLAS::gemv exception: ") + e.what());
81  }
82 }
oneapi::mkl::transpose convertTransEnum(char trans)
Definition: syclBLAS.hpp:28
void gemv(BLASHandle< PlatformKind::SYCL > &handle, const char trans, const int m, const int n, const T &alpha, const T *const A, const int lda, const T *const x, const int incx, const T &beta, T *const y, const int incy)

◆ gemv() [2/6]

void qmcplusplus::compute::BLAS::gemv ( BLASHandle< PlatformKind::OMPTARGET > &  handle,
const char  trans,
const int  m,
const int  n,
const T &  alpha,
const T *const  A,
const int  lda,
const T *const  x,
const int  incx,
const T &  beta,
T *const  y,
const int  incy 
)
inline

Definition at line 77 of file AccelBLAS_OMPTarget.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::ompBLAS::gemv(), BLASHandle< PlatformKind::OMPTARGET >::h_ompblas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

89 {
90  if (ompBLAS::gemv(handle.h_ompblas, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) != 0)
91  throw std::runtime_error("ompBLAS::gemv_batched failed!");
92 }
void gemv(BLASHandle< PlatformKind::OMPTARGET > &handle, const char trans, const int m, const int n, const T &alpha, const T *const A, const int lda, const T *const x, const int incx, const T &beta, T *const y, const int incy)

◆ gemv() [3/6]

void qmcplusplus::compute::BLAS::gemv ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  trans,
const int  m,
const int  n,
const float &  alpha,
const float *const  A,
const int  lda,
const float *const  x,
const int  incx,
const float &  beta,
float *const  y,
const int  incy 
)
inline

Definition at line 131 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::cuBLAS::convertOperation(), cublasErrorCheck, cublasSgemv, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by qmcplusplus::test_one_gemv().

143 {
144  cublasErrorCheck(cublasSgemv(handle.h_cublas, cuBLAS::convertOperation(trans), m, n, &alpha, A, lda, x, incx, &beta,
145  y, incy),
146  "cublasSgemv failed!");
147 }
#define cublasSgemv
Definition: cuda2hip.h:57
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemv() [4/6]

void qmcplusplus::compute::BLAS::gemv ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  trans,
const int  m,
const int  n,
const double &  alpha,
const double *const  A,
const int  lda,
const double *const  x,
const int  incx,
const double &  beta,
double *const  y,
const int  incy 
)
inline

Definition at line 149 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::cuBLAS::convertOperation(), cublasDgemv, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

161 {
162  cublasErrorCheck(cublasDgemv(handle.h_cublas, cuBLAS::convertOperation(trans), m, n, &alpha, A, lda, x, incx, &beta,
163  y, incy),
164  "cublasDgemv failed!");
165 }
#define cublasDgemv
Definition: cuda2hip.h:50
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemv() [5/6]

void qmcplusplus::compute::BLAS::gemv ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  trans,
const int  m,
const int  n,
const std::complex< float > &  alpha,
const std::complex< float > *  A,
const int  lda,
const std::complex< float > *  x,
const int  incx,
const std::complex< float > &  beta,
std::complex< float > *  y,
const int  incy 
)
inline

Definition at line 167 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, castNativeType, qmcplusplus::cuBLAS::convertOperation(), cublasCgemv, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

179 {
180  cublasErrorCheck(cublasCgemv(handle.h_cublas, cuBLAS::convertOperation(trans), m, n, castNativeType(&alpha),
181  castNativeType(A), lda, castNativeType(x), incx, castNativeType(&beta),
182  castNativeType(y), incy),
183  "cublasCgemv failed!");
184 }
#define cublasCgemv
Definition: cuda2hip.h:43
#define castNativeType
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemv() [6/6]

void qmcplusplus::compute::BLAS::gemv ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  trans,
const int  m,
const int  n,
const std::complex< double > &  alpha,
const std::complex< double > *  A,
const int  lda,
const std::complex< double > *  x,
const int  incx,
const std::complex< double > &  beta,
std::complex< double > *  y,
const int  incy 
)
inline

Definition at line 186 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, castNativeType, qmcplusplus::cuBLAS::convertOperation(), cublasErrorCheck, cublasZgemv, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

198 {
199  cublasErrorCheck(cublasZgemv(handle.h_cublas, cuBLAS::convertOperation(trans), m, n, castNativeType(&alpha),
200  castNativeType(A), lda, castNativeType(x), incx, castNativeType(&beta),
201  castNativeType(y), incy),
202  "cublasZgemv failed!");
203 }
#define cublasZgemv
Definition: cuda2hip.h:64
#define castNativeType
cublasOperation_t convertOperation(const char trans)
Definition: cuBLAS.hpp:96
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ gemv_batched() [1/3]

void qmcplusplus::compute::BLAS::gemv_batched ( BLASHandle< PlatformKind::SYCL > &  handle,
const char  trans,
const int  m,
const int  n,
const T *  alpha,
const T *const  A[],
const int  lda,
const T *const  x[],
const int  incx,
const T *  beta,
T *const  y[],
const int  incy,
const size_t  batch_count 
)
inline

Definition at line 85 of file AccelBLAS_SYCL.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::Units::charge::e, qmcplusplus::syclBLAS::gemv_batched(), qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::n, and BLASHandle< PlatformKind::SYCL >::queue_.

98 {
99  try
100  { // calling makeshift version for now due to the lack of vendor optimized versions
101  syclBLAS::gemv_batched(handle.queue_, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count);
102  }
103  catch (sycl::exception& e)
104  {
105  throw std::runtime_error(std::string("AccelBLAS::gemv_batch exception: ") + e.what());
106  }
107 }
void gemv_batched(BLASHandle< PlatformKind::SYCL > &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const size_t batch_count)

◆ gemv_batched() [2/3]

void qmcplusplus::compute::BLAS::gemv_batched ( BLASHandle< PlatformKind::OMPTARGET > &  handle,
const char  trans,
const int  m,
const int  n,
const T *  alpha,
const T *const  A[],
const int  lda,
const T *const  x[],
const int  incx,
const T *  beta,
T *const  y[],
const int  incy,
const int  batch_count 
)
inline

Definition at line 95 of file AccelBLAS_OMPTarget.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::ompBLAS::gemv_batched(), BLASHandle< PlatformKind::OMPTARGET >::h_ompblas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

108 {
109  if (ompBLAS::gemv_batched(handle.h_ompblas, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count) != 0)
110  throw std::runtime_error("ompBLAS::gemv_batched failed!");
111 }
void gemv_batched(BLASHandle< PlatformKind::OMPTARGET > &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)

◆ gemv_batched() [3/3]

void qmcplusplus::compute::BLAS::gemv_batched ( BLASHandle< PlatformKind::CUDA > &  handle,
const char  trans,
const int  m,
const int  n,
const T *  alpha,
const T *const  A[],
const int  lda,
const T *const  x[],
const int  incx,
const T *  beta,
T *const  y[],
const int  incy,
const int  batch_count 
)
inline

Definition at line 206 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::cudaErrorCheck(), qmcplusplus::cuBLAS_MFs::gemv_batched(), BLASHandle< PlatformKind::CUDA >::h_stream, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), DelayedUpdateBatched< PL, VALUE >::mw_updateRow(), and qmcplusplus::test_one_gemv().

219 {
220  cudaErrorCheck(cuBLAS_MFs::gemv_batched(handle.h_stream, trans, m, n, alpha, A, lda, x, incx, beta, y, incy,
221  batch_count),
222  "cuBLAS_MFs::gemv_batched failed!");
223 }
#define cudaErrorCheck(ans, cause)
Definition: CUDAerror.h:21
void gemv_batched(BLASHandle< PlatformKind::CUDA > &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)

◆ ger() [1/6]

void qmcplusplus::compute::BLAS::ger ( BLASHandle< PlatformKind::SYCL > &  handle,
const int  m,
const int  n,
const T &  alpha,
const T *const  x,
const int  incx,
const T *const  y,
const int  incy,
T *const  A,
const int  lda 
)
inline

Definition at line 110 of file AccelBLAS_SYCL.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::Units::charge::e, qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::n, and BLASHandle< PlatformKind::SYCL >::queue_.

120 {
121  try
122  {
123  oneapi::mkl::blas::ger(handle.queue_, m, n, alpha, x, incx, y, incy, A, lda);
124  }
125  catch (oneapi::mkl::exception& e)
126  {
127  throw std::runtime_error(std::string("AccelBLAS::ger exception: ") + e.what());
128  }
129 }
void ger(BLASHandle< PlatformKind::SYCL > &handle, const int m, const int n, const T &alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)

◆ ger() [2/6]

void qmcplusplus::compute::BLAS::ger ( BLASHandle< PlatformKind::OMPTARGET > &  handle,
const int  m,
const int  n,
const T &  alpha,
const T *const  x,
const int  incx,
const T *const  y,
const int  incy,
T *const  A,
const int  lda 
)
inline

Definition at line 114 of file AccelBLAS_OMPTarget.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::ompBLAS::ger(), BLASHandle< PlatformKind::OMPTARGET >::h_ompblas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

124 {
125  if (ompBLAS::ger(handle.h_ompblas, m, n, alpha, x, incx, y, incy, A, lda) != 0)
126  throw std::runtime_error("ompBLAS::ger_batched failed!");
127 }
void ger(BLASHandle< PlatformKind::OMPTARGET > &handle, const int m, const int n, const T &alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)

◆ ger() [3/6]

void qmcplusplus::compute::BLAS::ger ( BLASHandle< PlatformKind::CUDA > &  handle,
const int  m,
const int  n,
const float &  alpha,
const float *const  x,
const int  incx,
const float *const  y,
const int  incy,
float *const  A,
const int  lda 
)
inline

Definition at line 225 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, cublasErrorCheck, cublasSger, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by qmcplusplus::test_one_ger().

235 {
236  cublasErrorCheck(cublasSger(handle.h_cublas, m, n, &alpha, x, incx, y, incy, A, lda), "cublasSger failed!");
237 }
#define cublasSger
Definition: cuda2hip.h:58
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ ger() [4/6]

void qmcplusplus::compute::BLAS::ger ( BLASHandle< PlatformKind::CUDA > &  handle,
const int  m,
const int  n,
const double &  alpha,
const double *const  x,
const int  incx,
const double *const  y,
const int  incy,
double *const  A,
const int  lda 
)
inline

Definition at line 239 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, cublasDger, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

249 {
250  cublasErrorCheck(cublasDger(handle.h_cublas, m, n, &alpha, x, incx, y, incy, A, lda), "cublasDger failed!");
251 }
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34
#define cublasDger
Definition: cuda2hip.h:51

◆ ger() [5/6]

void qmcplusplus::compute::BLAS::ger ( BLASHandle< PlatformKind::CUDA > &  handle,
const int  m,
const int  n,
const std::complex< float > &  alpha,
const std::complex< float > *  x,
const int  incx,
const std::complex< float > *  y,
const int  incy,
std::complex< float > *  A,
const int  lda 
)
inline

Definition at line 253 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, castNativeType, cublasCgeru, cublasErrorCheck, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

263 {
264  cublasErrorCheck(cublasCgeru(handle.h_cublas, m, n, castNativeType(&alpha), castNativeType(x), incx,
265  castNativeType(y), incy, castNativeType(A), lda),
266  "cublasCger failed!");
267 }
#define castNativeType
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34
#define cublasCgeru
Definition: cuda2hip.h:44

◆ ger() [6/6]

void qmcplusplus::compute::BLAS::ger ( BLASHandle< PlatformKind::CUDA > &  handle,
const int  m,
const int  n,
const std::complex< double > &  alpha,
const std::complex< double > *  x,
const int  incx,
const std::complex< double > *  y,
const int  incy,
std::complex< double > *  A,
const int  lda 
)
inline

Definition at line 269 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, castNativeType, cublasErrorCheck, cublasZgeru, BLASHandle< PlatformKind::CUDA >::h_cublas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

279 {
280  cublasErrorCheck(cublasZgeru(handle.h_cublas, m, n, castNativeType(&alpha), castNativeType(x), incx,
281  castNativeType(y), incy, castNativeType(A), lda),
282  "cublasZger failed!");
283 }
#define cublasZgeru
Definition: cuda2hip.h:65
#define castNativeType
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34

◆ ger_batched() [1/3]

void qmcplusplus::compute::BLAS::ger_batched ( BLASHandle< PlatformKind::OMPTARGET > &  handle,
const int  m,
const int  n,
const T *  alpha,
const T *const  x[],
const int  incx,
const T *const  y[],
const int  incy,
T *const  A[],
const int  lda,
const int  batch_count 
)
inline

Definition at line 130 of file AccelBLAS_OMPTarget.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::ompBLAS::ger_batched(), BLASHandle< PlatformKind::OMPTARGET >::h_ompblas, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

141 {
142  if (ompBLAS::ger_batched(handle.h_ompblas, m, n, alpha, x, incx, y, incy, A, lda, batch_count) != 0)
143  throw std::runtime_error("ompBLAS::ger_batched failed!");
144 }
void ger_batched(BLASHandle< PlatformKind::OMPTARGET > &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)

◆ ger_batched() [2/3]

void qmcplusplus::compute::BLAS::ger_batched ( BLASHandle< PlatformKind::SYCL > &  handle,
const int  m,
const int  n,
const T *  alpha,
const T *const  x[],
const int  incx,
const T *const  y[],
const int  incy,
T *const  A[],
const int  lda,
const size_t  batch_count 
)
inline

Definition at line 132 of file AccelBLAS_SYCL.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::Units::charge::e, qmcplusplus::syclBLAS::ger_batched(), qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::n, and BLASHandle< PlatformKind::SYCL >::queue_.

143 {
144  try
145  { // calling makeshift version for now due to the lack of vendor optimized versions
146  syclBLAS::ger_batched(handle.queue_, m, n, alpha, x, incx, y, incy, A, lda, batch_count);
147  }
148  catch (sycl::exception& e)
149  {
150  throw std::runtime_error(std::string("AccelBLAS::ger_batched exception: ") + e.what());
151  }
152 }
void ger_batched(BLASHandle< PlatformKind::SYCL > &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const size_t batch_count)

◆ ger_batched() [3/3]

void qmcplusplus::compute::BLAS::ger_batched ( BLASHandle< PlatformKind::CUDA > &  handle,
const int  m,
const int  n,
const T *  alpha,
const T *const  x[],
const int  incx,
const T *const  y[],
const int  incy,
T *const  A[],
const int  lda,
const int  batch_count 
)
inline

Definition at line 286 of file AccelBLAS_CUDA.hpp.

References qmcplusplus::Units::distance::A, qmcplusplus::cudaErrorCheck(), qmcplusplus::cuBLAS_MFs::ger_batched(), BLASHandle< PlatformKind::CUDA >::h_stream, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_updateRow(), and qmcplusplus::test_one_ger().

297 {
298  cudaErrorCheck(cuBLAS_MFs::ger_batched(handle.h_stream, m, n, alpha, x, incx, y, incy, A, lda, batch_count),
299  "cuBLAS_MFs::ger_batched failed!");
300 }
#define cudaErrorCheck(ans, cause)
Definition: CUDAerror.h:21
void ger_batched(BLASHandle< PlatformKind::CUDA > &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)