QMCPACK
qmcplusplus::ompBLAS Namespace Reference

Implement selected batched and non-batched BLAS2 calls using OpenMP offload for different data types S/C/D/Z 1) column major like the BLAS fortran API 2) all the functions are synchronous, expected to be changed to asynchronous in the future. More...

Typedefs

using ompBLAS_status = int
 
using ompBLAS_handle = int
 

Functions

template<typename T >
ompBLAS_status gemm_impl (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A, const int lda, const T *const B, const int ldb, const T &beta, T *const C, const int ldc)
 
template<>
ompBLAS_status gemm< float > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const float &alpha, const float *const A, const int lda, const float *const B, const int ldb, const float &beta, float *const C, const int ldc)
 
template<>
ompBLAS_status gemm< double > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const double &alpha, const double *const A, const int lda, const double *const B, const int ldb, const double &beta, double *const C, const int ldc)
 
template<>
ompBLAS_status gemm< std::complex< float > > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const std::complex< float > &alpha, const std::complex< float > *const A, const int lda, const std::complex< float > *const B, const int ldb, const std::complex< float > &beta, std::complex< float > *const C, const int ldc)
 
template<>
ompBLAS_status gemm< std::complex< double > > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const std::complex< double > &alpha, const std::complex< double > *const A, const int lda, const std::complex< double > *const B, const int ldb, const std::complex< double > &beta, std::complex< double > *const C, const int ldc)
 
template<typename T >
ompBLAS_status gemm_batched_impl (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T alpha, const T *const Aarray[], const int lda, const T *const Barray[], const int ldb, const T beta, T *const Carray[], const int ldc, const int batch_count)
 
template<>
ompBLAS_status gemm_batched< float > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const float &alpha, const float *const A[], const int lda, const float *const B[], const int ldb, const float &beta, float *const C[], const int ldc, const int batch_count)
 
template<>
ompBLAS_status gemm_batched< double > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const double &alpha, const double *const A[], const int lda, const double *const B[], const int ldb, const double &beta, double *const C[], const int ldc, const int batch_count)
 
template<>
ompBLAS_status gemm_batched< std::complex< float > > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const std::complex< float > &alpha, const std::complex< float > *const A[], const int lda, const std::complex< float > *const B[], const int ldb, const std::complex< float > &beta, std::complex< float > *const C[], const int ldc, const int batch_count)
 
template<>
ompBLAS_status gemm_batched< std::complex< double > > (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const std::complex< double > &alpha, const std::complex< double > *const A[], const int lda, const std::complex< double > *const B[], const int ldb, const std::complex< double > &beta, std::complex< double > *const C[], const int ldc, const int batch_count)
 
template<typename T >
ompBLAS_status gemv_impl (ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
 
template<>
ompBLAS_status gemv< float > (ompBLAS_handle &handle, const char trans, const int m, const int n, const float alpha, const float *const A, const int lda, const float *const x, const int incx, const float beta, float *const y, const int incy)
 
template<>
ompBLAS_status gemv< double > (ompBLAS_handle &handle, const char trans, const int m, const int n, const double alpha, const double *const A, const int lda, const double *const x, const int incx, const double beta, double *const y, const int incy)
 
template<>
ompBLAS_status gemv< std::complex< float > > (ompBLAS_handle &handle, const char trans, const int m, const int n, const std::complex< float > alpha, const std::complex< float > *const A, const int lda, const std::complex< float > *const x, const int incx, const std::complex< float > beta, std::complex< float > *const y, const int incy)
 
template<>
ompBLAS_status gemv< std::complex< double > > (ompBLAS_handle &handle, const char trans, const int m, const int n, const std::complex< double > alpha, const std::complex< double > *const A, const int lda, const std::complex< double > *const x, const int incx, const std::complex< double > beta, std::complex< double > *const y, const int incy)
 
template<typename T >
ompBLAS_status gemv_batched_impl (ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status gemv_batched< float > (ompBLAS_handle &handle, const char trans, const int m, const int n, const float *alpha, const float *const A[], const int lda, const float *const x[], const int incx, const float *beta, float *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status gemv_batched< double > (ompBLAS_handle &handle, const char trans, const int m, const int n, const double *alpha, const double *const A[], const int lda, const double *const x[], const int incx, const double *beta, double *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status gemv_batched< std::complex< float > > (ompBLAS_handle &handle, const char trans, const int m, const int n, const std::complex< float > *alpha, const std::complex< float > *const A[], const int lda, const std::complex< float > *const x[], const int incx, const std::complex< float > *beta, std::complex< float > *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status gemv_batched< std::complex< double > > (ompBLAS_handle &handle, const char trans, const int m, const int n, const std::complex< double > *alpha, const std::complex< double > *const A[], const int lda, const std::complex< double > *const x[], const int incx, const std::complex< double > *beta, std::complex< double > *const y[], const int incy, const int batch_count)
 
template<typename T >
ompBLAS_status ger_impl (ompBLAS_handle &handle, const int m, const int n, const T alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
 
template<>
ompBLAS_status ger< float > (ompBLAS_handle &handle, const int m, const int n, const float alpha, const float *const x, const int incx, const float *const y, const int incy, float *const A, const int lda)
 
template<>
ompBLAS_status ger< double > (ompBLAS_handle &handle, const int m, const int n, const double alpha, const double *const x, const int incx, const double *const y, const int incy, double *const A, const int lda)
 
template<>
ompBLAS_status ger< std::complex< float > > (ompBLAS_handle &handle, const int m, const int n, const std::complex< float > alpha, const std::complex< float > *const x, const int incx, const std::complex< float > *const y, const int incy, std::complex< float > *const A, const int lda)
 
template<>
ompBLAS_status ger< std::complex< double > > (ompBLAS_handle &handle, const int m, const int n, const std::complex< double > alpha, const std::complex< double > *const x, const int incx, const std::complex< double > *const y, const int incy, std::complex< double > *const A, const int lda)
 
template<typename T >
ompBLAS_status ger_batched_impl (ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
 
template<>
ompBLAS_status ger_batched< float > (ompBLAS_handle &handle, const int m, const int n, const float *alpha, const float *const x[], const int incx, const float *const y[], const int incy, float *const A[], const int lda, const int batch_count)
 
template<>
ompBLAS_status ger_batched< double > (ompBLAS_handle &handle, const int m, const int n, const double *alpha, const double *const x[], const int incx, const double *const y[], const int incy, double *const A[], const int lda, const int batch_count)
 
template<>
ompBLAS_status ger_batched< std::complex< float > > (ompBLAS_handle &handle, const int m, const int n, const std::complex< float > *alpha, const std::complex< float > *const x[], const int incx, const std::complex< float > *const y[], const int incy, std::complex< float > *const A[], const int lda, const int batch_count)
 
template<>
ompBLAS_status ger_batched< std::complex< double > > (ompBLAS_handle &handle, const int m, const int n, const std::complex< double > *alpha, const std::complex< double > *const x[], const int incx, const std::complex< double > *const y[], const int incy, std::complex< double > *const A[], const int lda, const int batch_count)
 
template<typename T >
ompBLAS_status copy_batched_impl (ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched< float > (ompBLAS_handle &handle, const int n, const float *const x[], const int incx, float *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched< double > (ompBLAS_handle &handle, const int n, const double *const x[], const int incx, double *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched< std::complex< float > > (ompBLAS_handle &handle, const int n, const std::complex< float > *const x[], const int incx, std::complex< float > *const y[], const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched< std::complex< double > > (ompBLAS_handle &handle, const int n, const std::complex< double > *const x[], const int incx, std::complex< double > *const y[], const int incy, const int batch_count)
 
template<typename T >
ompBLAS_status copy_batched_offset_impl (ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched_offset< float > (ompBLAS_handle &handle, const int n, const float *const x[], const int x_offset, const int incx, float *const y[], const int y_offset, const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched_offset< double > (ompBLAS_handle &handle, const int n, const double *const x[], const int x_offset, const int incx, double *const y[], const int y_offset, const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched_offset< std::complex< float > > (ompBLAS_handle &handle, const int n, const std::complex< float > *const x[], const int x_offset, const int incx, std::complex< float > *const y[], const int y_offset, const int incy, const int batch_count)
 
template<>
ompBLAS_status copy_batched_offset< std::complex< double > > (ompBLAS_handle &handle, const int n, const std::complex< double > *const x[], const int x_offset, const int incx, std::complex< double > *const y[], const int y_offset, const int incy, const int batch_count)
 
template<typename T >
ompBLAS_status copy_impl (ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
 
template<>
ompBLAS_status copy< float > (ompBLAS_handle &handle, const int n, const float *const x, const int incx, float *const y, const int incy)
 
template<>
ompBLAS_status copy< double > (ompBLAS_handle &handle, const int n, const double *const x, const int incx, double *const y, const int incy)
 
template<>
ompBLAS_status copy< std::complex< float > > (ompBLAS_handle &handle, const int n, const std::complex< float > *const x, const int incx, std::complex< float > *const y, const int incy)
 
template<>
ompBLAS_status copy< std::complex< double > > (ompBLAS_handle &handle, const int n, const std::complex< double > *const x, const int incx, std::complex< double > *const y, const int incy)
 
template<typename T >
ompBLAS_status gemm (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A, const int lda, const T *const B, const int ldb, const T &beta, T *const C, const int ldc)
 
template<typename T >
ompBLAS_status gemm_batched (ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A[], const int lda, const T *const B[], const int ldb, const T &beta, T *const C[], const int ldc, const int batch_count)
 
template<typename T >
ompBLAS_status gemv (ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
 
template<typename T >
ompBLAS_status gemv_batched (ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
 
template<typename T >
ompBLAS_status ger (ompBLAS_handle &handle, const int m, const int n, const T alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
 
template<typename T >
ompBLAS_status ger_batched (ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
 
template<typename T >
ompBLAS_status copy_batched (ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
 copy device data from x to y More...
 
template<typename T >
ompBLAS_status copy_batched_offset (ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
 copy device data from x to y with additional offset applied to array of device pointers More...
 
template<typename T >
ompBLAS_status copy (ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
 

Detailed Description

Implement selected batched and non-batched BLAS2 calls using OpenMP offload for different data types S/C/D/Z 1) column major like the BLAS fortran API 2) all the functions are synchronous, expected to be changed to asynchronous in the future.

3) all the pointer arguments are expected as device pointers. 4) in batched APIs, alpha and beta are not scalars but pointers to array of batch size.

Typedef Documentation

◆ ompBLAS_handle

using ompBLAS_handle = int

Definition at line 30 of file ompBLAS.hpp.

◆ ompBLAS_status

using ompBLAS_status = int

Definition at line 29 of file ompBLAS.hpp.

Function Documentation

◆ copy()

ompBLAS_status qmcplusplus::ompBLAS::copy ( ompBLAS_handle handle,
const int  n,
const T *const  x,
const int  incx,
T *const  y,
const int  incy 
)

◆ copy< double >()

ompBLAS_status qmcplusplus::ompBLAS::copy< double > ( ompBLAS_handle handle,
const int  n,
const double *const  x,
const int  incx,
double *const  y,
const int  incy 
)

Definition at line 990 of file ompBLAS.cpp.

References copy_impl(), and qmcplusplus::n.

996 {
997  return copy_impl(handle, n, x, incx, y, incy);
998 }
ompBLAS_status copy_impl(ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
Definition: ompBLAS.cpp:963

◆ copy< float >()

ompBLAS_status qmcplusplus::ompBLAS::copy< float > ( ompBLAS_handle handle,
const int  n,
const float *const  x,
const int  incx,
float *const  y,
const int  incy 
)

Definition at line 979 of file ompBLAS.cpp.

References copy_impl(), and qmcplusplus::n.

985 {
986  return copy_impl(handle, n, x, incx, y, incy);
987 }
ompBLAS_status copy_impl(ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
Definition: ompBLAS.cpp:963

◆ copy< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::copy< std::complex< double > > ( ompBLAS_handle handle,
const int  n,
const std::complex< double > *const  x,
const int  incx,
std::complex< double > *const  y,
const int  incy 
)

Definition at line 1012 of file ompBLAS.cpp.

References copy_impl(), and qmcplusplus::n.

1018 {
1019  return copy_impl(handle, n, x, incx, y, incy);
1020 }
ompBLAS_status copy_impl(ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
Definition: ompBLAS.cpp:963

◆ copy< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::copy< std::complex< float > > ( ompBLAS_handle handle,
const int  n,
const std::complex< float > *const  x,
const int  incx,
std::complex< float > *const  y,
const int  incy 
)

Definition at line 1001 of file ompBLAS.cpp.

References copy_impl(), and qmcplusplus::n.

1007 {
1008  return copy_impl(handle, n, x, incx, y, incy);
1009 }
ompBLAS_status copy_impl(ompBLAS_handle &handle, const int n, const T *const x, const int incx, T *const y, const int incy)
Definition: ompBLAS.cpp:963

◆ copy_batched()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched ( ompBLAS_handle handle,
const int  n,
const T *const  x[],
const int  incx,
T *const  y[],
const int  incy,
const int  batch_count 
)

copy device data from x to y

for b_i in [0,batch_count) for i in [0,n) y[b_i][i*incy] = x[b_i][i*incx]

Parameters
nnumber of elements to copy for each group in the batch
x,yarrays with length batch_count; device pointers to start of data to be copied from(x)/to(y)
incx,incystorage spacing between elements of x/y to be copied from/to
batch_countnumber of batches to process

Referenced by qmcplusplus::compute::BLAS::copy_batched(), MultiDiracDeterminant::mw_evaluateDetsAndGradsForPtclMove(), MultiDiracDeterminant::mw_evaluateDetsForPtclMove(), and MultiDiracDeterminant::mw_evaluateGrads().

◆ copy_batched< double >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched< double > ( ompBLAS_handle handle,
const int  n,
const double *const  x[],
const int  incx,
double *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 846 of file ompBLAS.cpp.

References copy_batched_impl(), and qmcplusplus::n.

853 {
854  return copy_batched_impl(handle, n, x, incx, y, incy, batch_count);
855 }
ompBLAS_status copy_batched_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:815

◆ copy_batched< float >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched< float > ( ompBLAS_handle handle,
const int  n,
const float *const  x[],
const int  incx,
float *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 834 of file ompBLAS.cpp.

References copy_batched_impl(), and qmcplusplus::n.

841 {
842  return copy_batched_impl(handle, n, x, incx, y, incy, batch_count);
843 }
ompBLAS_status copy_batched_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:815

◆ copy_batched< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched< std::complex< double > > ( ompBLAS_handle handle,
const int  n,
const std::complex< double > *const  x[],
const int  incx,
std::complex< double > *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 871 of file ompBLAS.cpp.

References copy_batched_impl(), and qmcplusplus::n.

878 {
879  return copy_batched_impl(handle, n, x, incx, y, incy, batch_count);
880 }
ompBLAS_status copy_batched_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:815

◆ copy_batched< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched< std::complex< float > > ( ompBLAS_handle handle,
const int  n,
const std::complex< float > *const  x[],
const int  incx,
std::complex< float > *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 859 of file ompBLAS.cpp.

References copy_batched_impl(), and qmcplusplus::n.

866 {
867  return copy_batched_impl(handle, n, x, incx, y, incy, batch_count);
868 }
ompBLAS_status copy_batched_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int incx, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:815

◆ copy_batched_impl()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched_impl ( ompBLAS_handle handle,
const int  n,
const T *const  x[],
const int  incx,
T *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 815 of file ompBLAS.cpp.

References qmcplusplus::n.

Referenced by copy_batched< double >(), copy_batched< float >(), copy_batched< std::complex< double > >(), and copy_batched< std::complex< float > >().

822 {
823  if (n == 0 || batch_count == 0)
824  return 0;
825 
826  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) is_device_ptr(x, y)")
827  for (uint32_t ib = 0; ib < batch_count; ib++)
828  for (uint32_t i = 0; i < n; i++)
829  y[ib][i * incy] = x[ib][i * incx];
830  return 0;
831 }
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])

◆ copy_batched_offset()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched_offset ( ompBLAS_handle handle,
const int  n,
const T *const  x[],
const int  x_offset,
const int  incx,
T *const  y[],
const int  y_offset,
const int  incy,
const int  batch_count 
)

copy device data from x to y with additional offset applied to array of device pointers

for b_i in [0,batch_count) for i in [0,n) y[b_i][y_offset + i*incy] = x[b_i][x_offset + i*incx]

useful for copying from/to a single row/column of a batch of matrices when a list of device pointers to the start of the matrices is already available

Parameters
nnumber of elements to copy for each group in the batch
x,yarrays with length batch_count; device pointers to start of data to be copied from(x)/to(y)
x_offset,y_offsetdistance (in number of elements) from pointer given in x/y to location of first element to be copied
incx,incystorage spacing between elements of x/y to be copied from/to
batch_countnumber of batches to process

Referenced by MultiDiracDeterminant::mw_evaluateDetsAndGradsForPtclMove(), MultiDiracDeterminant::mw_evaluateDetsForPtclMove(), and MultiDiracDeterminant::mw_InverseUpdateByColumn().

◆ copy_batched_offset< double >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched_offset< double > ( ompBLAS_handle handle,
const int  n,
const double *const  x[],
const int  x_offset,
const int  incx,
double *const  y[],
const int  y_offset,
const int  incy,
const int  batch_count 
)

Definition at line 919 of file ompBLAS.cpp.

References copy_batched_offset_impl(), and qmcplusplus::n.

928 {
929  return copy_batched_offset_impl(handle, n, x, x_offset, incx, y, y_offset, incy, batch_count);
930 }
ompBLAS_status copy_batched_offset_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
Definition: ompBLAS.cpp:884

◆ copy_batched_offset< float >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched_offset< float > ( ompBLAS_handle handle,
const int  n,
const float *const  x[],
const int  x_offset,
const int  incx,
float *const  y[],
const int  y_offset,
const int  incy,
const int  batch_count 
)

Definition at line 905 of file ompBLAS.cpp.

References copy_batched_offset_impl(), and qmcplusplus::n.

914 {
915  return copy_batched_offset_impl(handle, n, x, x_offset, incx, y, y_offset, incy, batch_count);
916 }
ompBLAS_status copy_batched_offset_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
Definition: ompBLAS.cpp:884

◆ copy_batched_offset< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched_offset< std::complex< double > > ( ompBLAS_handle handle,
const int  n,
const std::complex< double > *const  x[],
const int  x_offset,
const int  incx,
std::complex< double > *const  y[],
const int  y_offset,
const int  incy,
const int  batch_count 
)

Definition at line 948 of file ompBLAS.cpp.

References copy_batched_offset_impl(), and qmcplusplus::n.

957 {
958  return copy_batched_offset_impl(handle, n, x, x_offset, incx, y, y_offset, incy, batch_count);
959 }
ompBLAS_status copy_batched_offset_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
Definition: ompBLAS.cpp:884

◆ copy_batched_offset< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched_offset< std::complex< float > > ( ompBLAS_handle handle,
const int  n,
const std::complex< float > *const  x[],
const int  x_offset,
const int  incx,
std::complex< float > *const  y[],
const int  y_offset,
const int  incy,
const int  batch_count 
)

Definition at line 934 of file ompBLAS.cpp.

References copy_batched_offset_impl(), and qmcplusplus::n.

943 {
944  return copy_batched_offset_impl(handle, n, x, x_offset, incx, y, y_offset, incy, batch_count);
945 }
ompBLAS_status copy_batched_offset_impl(ompBLAS_handle &handle, const int n, const T *const x[], const int x_offset, const int incx, T *const y[], const int y_offset, const int incy, const int batch_count)
Definition: ompBLAS.cpp:884

◆ copy_batched_offset_impl()

ompBLAS_status qmcplusplus::ompBLAS::copy_batched_offset_impl ( ompBLAS_handle handle,
const int  n,
const T *const  x[],
const int  x_offset,
const int  incx,
T *const  y[],
const int  y_offset,
const int  incy,
const int  batch_count 
)

Definition at line 884 of file ompBLAS.cpp.

References qmcplusplus::n.

Referenced by copy_batched_offset< double >(), copy_batched_offset< float >(), copy_batched_offset< std::complex< double > >(), and copy_batched_offset< std::complex< float > >().

893 {
894  if (n == 0 || batch_count == 0)
895  return 0;
896 
897  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) is_device_ptr(x, y)")
898  for (uint32_t ib = 0; ib < batch_count; ib++)
899  for (uint32_t i = 0; i < n; i++)
900  y[ib][y_offset + i * incy] = x[ib][x_offset + i * incx];
901  return 0;
902 }
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])

◆ copy_impl()

ompBLAS_status qmcplusplus::ompBLAS::copy_impl ( ompBLAS_handle handle,
const int  n,
const T *const  x,
const int  incx,
T *const  y,
const int  incy 
)

Definition at line 963 of file ompBLAS.cpp.

References qmcplusplus::n.

Referenced by copy< double >(), copy< float >(), copy< std::complex< double > >(), and copy< std::complex< float > >().

969 {
970  if (n == 0)
971  return 0;
972  PRAGMA_OFFLOAD("omp target teams distribute parallel for is_device_ptr(x, y)")
973  for (size_t i = 0; i < n; i++)
974  y[i * incy] = x[i * incx];
975  return 0;
976 }
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])

◆ gemm()

ompBLAS_status qmcplusplus::ompBLAS::gemm ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const T &  alpha,
const T *const  A,
const int  lda,
const T *const  B,
const int  ldb,
const T &  beta,
T *const  C,
const int  ldc 
)

◆ gemm< double >()

ompBLAS_status qmcplusplus::ompBLAS::gemm< double > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const double &  alpha,
const double *const  A,
const int  lda,
const double *const  B,
const int  ldb,
const double &  beta,
double *const  C,
const int  ldc 
)

Definition at line 119 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

133 {
134  return gemm_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
135 }
ompBLAS_status gemm_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A, const int lda, const T *const B, const int ldb, const T &beta, T *const C, const int ldc)
Definition: ompBLAS.cpp:27
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm< float >()

ompBLAS_status qmcplusplus::ompBLAS::gemm< float > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const float &  alpha,
const float *const  A,
const int  lda,
const float *const  B,
const int  ldb,
const float &  beta,
float *const  C,
const int  ldc 
)

Definition at line 100 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

114 {
115  return gemm_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
116 }
ompBLAS_status gemm_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A, const int lda, const T *const B, const int ldb, const T &beta, T *const C, const int ldc)
Definition: ompBLAS.cpp:27
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::gemm< std::complex< double > > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const std::complex< double > &  alpha,
const std::complex< double > *const  A,
const int  lda,
const std::complex< double > *const  B,
const int  ldb,
const std::complex< double > &  beta,
std::complex< double > *const  C,
const int  ldc 
)

Definition at line 158 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

172 {
173  return gemm_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
174 }
ompBLAS_status gemm_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A, const int lda, const T *const B, const int ldb, const T &beta, T *const C, const int ldc)
Definition: ompBLAS.cpp:27
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::gemm< std::complex< float > > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const std::complex< float > &  alpha,
const std::complex< float > *const  A,
const int  lda,
const std::complex< float > *const  B,
const int  ldb,
const std::complex< float > &  beta,
std::complex< float > *const  C,
const int  ldc 
)

Definition at line 139 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

153 {
154  return gemm_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
155 }
ompBLAS_status gemm_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T &alpha, const T *const A, const int lda, const T *const B, const int ldb, const T &beta, T *const C, const int ldc)
Definition: ompBLAS.cpp:27
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm_batched()

ompBLAS_status qmcplusplus::ompBLAS::gemm_batched ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const T &  alpha,
const T *const  A[],
const int  lda,
const T *const  B[],
const int  ldb,
const T &  beta,
T *const  C[],
const int  ldc,
const int  batch_count 
)

◆ gemm_batched< double >()

ompBLAS_status qmcplusplus::ompBLAS::gemm_batched< double > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const double &  alpha,
const double *const  A[],
const int  lda,
const double *const  B[],
const int  ldb,
const double &  beta,
double *const  C[],
const int  ldc,
const int  batch_count 
)

Definition at line 300 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_batched_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

315 {
316  return gemm_batched_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, batch_count);
317 }
ompBLAS_status gemm_batched_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T alpha, const T *const Aarray[], const int lda, const T *const Barray[], const int ldb, const T beta, T *const Carray[], const int ldc, const int batch_count)
Definition: ompBLAS.cpp:178
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm_batched< float >()

ompBLAS_status qmcplusplus::ompBLAS::gemm_batched< float > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const float &  alpha,
const float *const  A[],
const int  lda,
const float *const  B[],
const int  ldb,
const float &  beta,
float *const  C[],
const int  ldc,
const int  batch_count 
)

Definition at line 280 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_batched_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

295 {
296  return gemm_batched_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, batch_count);
297 }
ompBLAS_status gemm_batched_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T alpha, const T *const Aarray[], const int lda, const T *const Barray[], const int ldb, const T beta, T *const Carray[], const int ldc, const int batch_count)
Definition: ompBLAS.cpp:178
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm_batched< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::gemm_batched< std::complex< double > > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const std::complex< double > &  alpha,
const std::complex< double > *const  A[],
const int  lda,
const std::complex< double > *const  B[],
const int  ldb,
const std::complex< double > &  beta,
std::complex< double > *const  C[],
const int  ldc,
const int  batch_count 
)

Definition at line 341 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_batched_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

356 {
357  return gemm_batched_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, batch_count);
358 }
ompBLAS_status gemm_batched_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T alpha, const T *const Aarray[], const int lda, const T *const Barray[], const int ldb, const T beta, T *const Carray[], const int ldc, const int batch_count)
Definition: ompBLAS.cpp:178
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm_batched< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::gemm_batched< std::complex< float > > ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const std::complex< float > &  alpha,
const std::complex< float > *const  A[],
const int  lda,
const std::complex< float > *const  B[],
const int  ldb,
const std::complex< float > &  beta,
std::complex< float > *const  C[],
const int  ldc,
const int  batch_count 
)

Definition at line 321 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, gemm_batched_impl(), qmcplusplus::Units::energy::K, qmcplusplus::lda, and qmcplusplus::Units::force::N.

336 {
337  return gemm_batched_impl(handle, transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, batch_count);
338 }
ompBLAS_status gemm_batched_impl(ompBLAS_handle &handle, const char transa, const char transb, const int M, const int N, const int K, const T alpha, const T *const Aarray[], const int lda, const T *const Barray[], const int ldb, const T beta, T *const Carray[], const int ldc, const int batch_count)
Definition: ompBLAS.cpp:178
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm_batched_impl()

ompBLAS_status qmcplusplus::ompBLAS::gemm_batched_impl ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const T  alpha,
const T *const  Aarray[],
const int  lda,
const T *const  Barray[],
const int  ldb,
const T  beta,
T *const  Carray[],
const int  ldc,
const int  batch_count 
)

Definition at line 178 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::Units::energy::K, qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::Units::force::N, and qmcplusplus::n.

Referenced by gemm_batched< double >(), gemm_batched< float >(), gemm_batched< std::complex< double > >(), and gemm_batched< std::complex< float > >().

193 {
194  if (M == 0 || N == 0 || K == 0 || batch_count == 0)
195  return 0;
196 
197  if (transa == 'T' && transb == 'N') //A(ji) * B(jk) -> C(ik)
198  {
199  PRAGMA_OFFLOAD("omp target teams distribute is_device_ptr(Aarray, Barray, Carray)")
200  for (size_t iw = 0; iw < batch_count; iw++)
201  {
202  auto A = Aarray[iw];
203  auto B = Barray[iw];
204  auto C = Carray[iw];
205  PRAGMA_OFFLOAD("omp parallel for collapse(2)")
206  for (size_t m = 0; m < M; m++)
207  for (size_t n = 0; n < N; n++)
208  {
209  T sum(0);
210  for (size_t k = 0; k < K; k++)
211  sum += A[lda * m + k] * B[ldb * n + k];
212  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
213  }
214  }
215  }
216  else if (transa == 'T' && transb == 'T')
217  {
218  PRAGMA_OFFLOAD("omp target teams distribute is_device_ptr(Aarray, Barray, Carray)")
219  for (size_t iw = 0; iw < batch_count; iw++)
220  {
221  auto A = Aarray[iw];
222  auto B = Barray[iw];
223  auto C = Carray[iw];
224  PRAGMA_OFFLOAD("omp parallel for collapse(2)")
225  for (size_t m = 0; m < M; m++)
226  for (size_t n = 0; n < N; n++)
227  {
228  T sum(0);
229  for (size_t k = 0; k < K; k++)
230  sum += A[lda * m + k] * B[ldb * k + n];
231  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
232  }
233  }
234  }
235  else if (transa == 'N' && transb == 'T')
236  {
237  PRAGMA_OFFLOAD("omp target teams distribute is_device_ptr(Aarray, Barray, Carray)")
238  for (size_t iw = 0; iw < batch_count; iw++)
239  {
240  auto A = Aarray[iw];
241  auto B = Barray[iw];
242  auto C = Carray[iw];
243  PRAGMA_OFFLOAD("omp parallel for collapse(2)")
244  for (size_t m = 0; m < M; m++)
245  for (size_t n = 0; n < N; n++)
246  {
247  T sum(0);
248  for (size_t k = 0; k < K; k++)
249  sum += A[lda * k + m] * B[ldb * k + n];
250  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
251  }
252  }
253  }
254  else if (transa == 'N' && transb == 'N')
255  {
256  PRAGMA_OFFLOAD("omp target teams distribute is_device_ptr(Aarray, Barray, Carray)")
257  for (size_t iw = 0; iw < batch_count; iw++)
258  {
259  auto A = Aarray[iw];
260  auto B = Barray[iw];
261  auto C = Carray[iw];
262  PRAGMA_OFFLOAD("omp parallel for collapse(2)")
263  for (size_t n = 0; n < N; n++)
264  for (size_t m = 0; m < M; m++)
265  {
266  T sum(0);
267  for (size_t k = 0; k < K; k++)
268  sum += A[lda * k + m] * B[ldb * n + k];
269  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
270  }
271  }
272  }
273  else
274  throw std::runtime_error("Error: trans=='C' not yet implemented for ompBLAS::gemm.");
275 
276  return 0;
277 }
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemm_impl()

ompBLAS_status qmcplusplus::ompBLAS::gemm_impl ( ompBLAS_handle handle,
const char  transa,
const char  transb,
const int  M,
const int  N,
const int  K,
const T &  alpha,
const T *const  A,
const int  lda,
const T *const  B,
const int  ldb,
const T &  beta,
T *const  C,
const int  ldc 
)

Definition at line 27 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, B(), qmcplusplus::Units::charge::C, qmcplusplus::Units::energy::K, qmcplusplus::lda, qmcplusplus::Units::distance::m, qmcplusplus::Units::force::N, and qmcplusplus::n.

Referenced by gemm< double >(), gemm< float >(), gemm< std::complex< double > >(), and gemm< std::complex< float > >().

41 {
42  if (M == 0 || N == 0 || K == 0)
43  return 0;
44 
45  if (transa == 'T' && transb == 'N') //A(ji) * B(jk) -> C(ik)
46  {
47  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) is_device_ptr(A, B, C)")
48  for (size_t m = 0; m < M; m++)
49  for (size_t n = 0; n < N; n++)
50  {
51  T sum(0);
52  for (size_t k = 0; k < K; k++)
53  sum += A[lda * m + k] * B[ldb * n + k];
54  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
55  }
56  }
57  else if (transa == 'T' && transb == 'T')
58  {
59  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) is_device_ptr(A, B, C)")
60  for (size_t m = 0; m < M; m++)
61  for (size_t n = 0; n < N; n++)
62  {
63  T sum(0);
64  for (size_t k = 0; k < K; k++)
65  sum += A[lda * m + k] * B[ldb * k + n];
66  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
67  }
68  }
69  else if (transa == 'N' && transb == 'T')
70  {
71  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) is_device_ptr(A, B, C)")
72  for (size_t m = 0; m < M; m++)
73  for (size_t n = 0; n < N; n++)
74  {
75  T sum(0);
76  for (size_t k = 0; k < K; k++)
77  sum += A[lda * k + m] * B[ldb * k + n];
78  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
79  }
80  }
81  else if (transa == 'N' && transb == 'N')
82  {
83  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) is_device_ptr(A, B, C)")
84  for (size_t n = 0; n < N; n++)
85  for (size_t m = 0; m < M; m++)
86  {
87  T sum(0);
88  for (size_t k = 0; k < K; k++)
89  sum += A[lda * k + m] * B[ldb * n + k];
90  C[n * ldc + m] = alpha * sum + (beta == T(0) ? T(0) : C[n * ldc + m] * beta);
91  }
92  }
93  else
94  throw std::runtime_error("Error: trans=='C' not yet implemented for ompBLAS::gemm.");
95 
96  return 0;
97 }
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])
double B(double x, int k, int i, const std::vector< double > &t)

◆ gemv()

ompBLAS_status qmcplusplus::ompBLAS::gemv ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const T  alpha,
const T *const  A,
const int  lda,
const T *const  x,
const int  incx,
const T  beta,
T *const  y,
const int  incy 
)

◆ gemv< double >()

ompBLAS_status qmcplusplus::ompBLAS::gemv< double > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const double  alpha,
const double *const  A,
const int  lda,
const double *const  x,
const int  incx,
const double  beta,
double *const  y,
const int  incy 
)

Definition at line 438 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

450 {
451  return gemv_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
452 }
ompBLAS_status gemv_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
Definition: ompBLAS.cpp:362

◆ gemv< float >()

ompBLAS_status qmcplusplus::ompBLAS::gemv< float > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const float  alpha,
const float *const  A,
const int  lda,
const float *const  x,
const int  incx,
const float  beta,
float *const  y,
const int  incy 
)

Definition at line 421 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

433 {
434  return gemv_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
435 }
ompBLAS_status gemv_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
Definition: ompBLAS.cpp:362

◆ gemv< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::gemv< std::complex< double > > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const std::complex< double >  alpha,
const std::complex< double > *const  A,
const int  lda,
const std::complex< double > *const  x,
const int  incx,
const std::complex< double >  beta,
std::complex< double > *const  y,
const int  incy 
)

Definition at line 473 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

485 {
486  return gemv_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
487 }
ompBLAS_status gemv_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
Definition: ompBLAS.cpp:362

◆ gemv< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::gemv< std::complex< float > > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const std::complex< float >  alpha,
const std::complex< float > *const  A,
const int  lda,
const std::complex< float > *const  x,
const int  incx,
const std::complex< float >  beta,
std::complex< float > *const  y,
const int  incy 
)

Definition at line 456 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

468 {
469  return gemv_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
470 }
ompBLAS_status gemv_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
Definition: ompBLAS.cpp:362

◆ gemv_batched()

ompBLAS_status qmcplusplus::ompBLAS::gemv_batched ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const T *  alpha,
const T *const  A[],
const int  lda,
const T *const  x[],
const int  incx,
const T *  beta,
T *const  y[],
const int  incy,
const int  batch_count 
)

◆ gemv_batched< double >()

ompBLAS_status qmcplusplus::ompBLAS::gemv_batched< double > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const double *  alpha,
const double *const  A[],
const int  lda,
const double *const  x[],
const int  incx,
const double *  beta,
double *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 574 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

587 {
588  return gemv_batched_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count);
589 }
ompBLAS_status gemv_batched_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:492

◆ gemv_batched< float >()

ompBLAS_status qmcplusplus::ompBLAS::gemv_batched< float > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const float *  alpha,
const float *const  A[],
const int  lda,
const float *const  x[],
const int  incx,
const float *  beta,
float *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 556 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

569 {
570  return gemv_batched_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count);
571 }
ompBLAS_status gemv_batched_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:492

◆ gemv_batched< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::gemv_batched< std::complex< double > > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const std::complex< double > *  alpha,
const std::complex< double > *const  A[],
const int  lda,
const std::complex< double > *const  x[],
const int  incx,
const std::complex< double > *  beta,
std::complex< double > *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 611 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

624 {
625  return gemv_batched_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count);
626 }
ompBLAS_status gemv_batched_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:492

◆ gemv_batched< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::gemv_batched< std::complex< float > > ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const std::complex< float > *  alpha,
const std::complex< float > *const  A[],
const int  lda,
const std::complex< float > *const  x[],
const int  incx,
const std::complex< float > *  beta,
std::complex< float > *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 593 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, gemv_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

606 {
607  return gemv_batched_impl(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy, batch_count);
608 }
ompBLAS_status gemv_batched_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T *alpha, const T *const A[], const int lda, const T *const x[], const int incx, const T *beta, T *const y[], const int incy, const int batch_count)
Definition: ompBLAS.cpp:492

◆ gemv_batched_impl()

ompBLAS_status qmcplusplus::ompBLAS::gemv_batched_impl ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const T *  alpha,
const T *const  A[],
const int  lda,
const T *const  x[],
const int  incx,
const T *  beta,
T *const  y[],
const int  incy,
const int  batch_count 
)

Definition at line 492 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by gemv_batched< double >(), gemv_batched< float >(), gemv_batched< std::complex< double > >(), and gemv_batched< std::complex< float > >().

505 {
506  if (m == 0 || n == 0 || batch_count == 0)
507  return 0;
508 
509  if (trans == 'T')
510  {
511  if (incx != 1)
512  throw std::runtime_error("incx!=1 are not implemented in ompBLAS::gemv_batched_impl trans='T'!");
513 
514  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(batch_count * n) \
515  is_device_ptr(A, x, y, alpha, beta)")
516  for (uint32_t ib = 0; ib < batch_count; ib++)
517  for (uint32_t i = 0; i < n; i++)
518  {
519  T dot_sum(0);
520  PRAGMA_OFFLOAD("omp parallel for simd reduction(+: dot_sum)")
521  for (uint32_t j = 0; j < m; j++)
522  dot_sum += x[ib][j] * A[ib][i * lda + j];
523  if (beta[ib] == T(0))
524  y[ib][i * incy] = alpha[ib] * dot_sum; // protecting NaN from y
525  else
526  y[ib][i * incy] = alpha[ib] * dot_sum + beta[ib] * y[ib][i * incy];
527  }
528  return 0;
529  }
530  else if (trans == 'N')
531  {
532  if (incx != 1)
533  throw std::runtime_error("incx!=1 are not implemented in ompBLAS::gemv_batched_impl trans='N'!");
534 
535  PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(batch_count * n) \
536  is_device_ptr(A, x, y, alpha, beta)")
537  for (uint32_t ib = 0; ib < batch_count; ib++)
538  for (uint32_t i = 0; i < m; i++)
539  {
540  T dot_sum(0);
541  PRAGMA_OFFLOAD("omp parallel for simd reduction(+: dot_sum)")
542  for (uint32_t j = 0; j < n; j++)
543  dot_sum += x[ib][j] * A[ib][j * lda + i];
544  if (beta[ib] == T(0))
545  y[ib][i * incy] = alpha[ib] * dot_sum; // protecting NaN from y
546  else
547  y[ib][i * incy] = alpha[ib] * dot_sum + beta[ib] * y[ib][i * incy];
548  }
549  return 0;
550  }
551  else
552  throw std::runtime_error("Error: trans=='C' not yet implemented for ompBLAS::gemv_impl.");
553 }
if(c->rank()==0)
ompBLAS_status gemv_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
Definition: ompBLAS.cpp:362
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])

◆ gemv_impl()

ompBLAS_status qmcplusplus::ompBLAS::gemv_impl ( ompBLAS_handle handle,
const char  trans,
const int  m,
const int  n,
const T  alpha,
const T *const  A,
const int  lda,
const T *const  x,
const int  incx,
const T  beta,
T *const  y,
const int  incy 
)

Definition at line 362 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by gemv< double >(), gemv< float >(), gemv< std::complex< double > >(), and gemv< std::complex< float > >().

374 {
375  if (m == 0 || n == 0)
376  return 0;
377 
378  if (trans == 'T')
379  {
380  if (incx != 1 || incy != 1)
381  throw std::runtime_error("incx!=1 or incy!=1 are not implemented in ompBLAS::gemv_impl trans='T'!");
382 
383  PRAGMA_OFFLOAD("omp target teams distribute num_teams(n) is_device_ptr(A, x, y)")
384  for (uint32_t i = 0; i < n; i++)
385  {
386  T dot_sum(0);
387  PRAGMA_OFFLOAD("omp parallel for simd reduction(+: dot_sum)")
388  for (uint32_t j = 0; j < m; j++)
389  dot_sum += x[j] * A[i * lda + j];
390  if (beta == T(0))
391  y[i] = alpha * dot_sum; // protecting NaN from y
392  else
393  y[i] = alpha * dot_sum + beta * y[i];
394  }
395  return 0;
396  }
397  else if (trans == 'N')
398  {
399  if (incx != 1 || incy != 1)
400  throw std::runtime_error("incx !=1 or incy != 1 are not implemented in ompBLAS::gemv_impl trans='N'!");
401 
402  PRAGMA_OFFLOAD("omp target teams distribute num_teams(m) is_device_ptr(A, x, y)")
403  for (uint32_t i = 0; i < m; i++)
404  {
405  T dot_sum(0);
406  PRAGMA_OFFLOAD("omp parallel for simd reduction(+: dot_sum)")
407  for (uint32_t j = 0; j < n; j++)
408  dot_sum += x[j] * A[j * lda + i];
409  if (beta == T(0))
410  y[i] = alpha * dot_sum; // protecting NaN from y
411  else
412  y[i] = alpha * dot_sum + beta * y[i];
413  }
414  return 0;
415  }
416  else
417  throw std::runtime_error("Error: trans=='C' not yet implemented for ompBLAS::gemv_impl.");
418 }
if(c->rank()==0)
ompBLAS_status gemv_impl(ompBLAS_handle &handle, const char trans, const int m, const int n, const T alpha, const T *const A, const int lda, const T *const x, const int incx, const T beta, T *const y, const int incy)
Definition: ompBLAS.cpp:362
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])

◆ ger()

ompBLAS_status qmcplusplus::ompBLAS::ger ( ompBLAS_handle handle,
const int  m,
const int  n,
const T  alpha,
const T *const  x,
const int  incx,
const T *const  y,
const int  incy,
T *const  A,
const int  lda 
)

◆ ger< double >()

ompBLAS_status qmcplusplus::ompBLAS::ger< double > ( ompBLAS_handle handle,
const int  m,
const int  n,
const double  alpha,
const double *const  x,
const int  incx,
const double *const  y,
const int  incy,
double *const  A,
const int  lda 
)

Definition at line 672 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

682 {
683  return ger_impl(handle, m, n, alpha, x, incx, y, incy, A, lda);
684 }
ompBLAS_status ger_impl(ompBLAS_handle &handle, const int m, const int n, const T alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
Definition: ompBLAS.cpp:631

◆ ger< float >()

ompBLAS_status qmcplusplus::ompBLAS::ger< float > ( ompBLAS_handle handle,
const int  m,
const int  n,
const float  alpha,
const float *const  x,
const int  incx,
const float *const  y,
const int  incy,
float *const  A,
const int  lda 
)

Definition at line 657 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

667 {
668  return ger_impl(handle, m, n, alpha, x, incx, y, incy, A, lda);
669 }
ompBLAS_status ger_impl(ompBLAS_handle &handle, const int m, const int n, const T alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
Definition: ompBLAS.cpp:631

◆ ger< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::ger< std::complex< double > > ( ompBLAS_handle handle,
const int  m,
const int  n,
const std::complex< double >  alpha,
const std::complex< double > *const  x,
const int  incx,
const std::complex< double > *const  y,
const int  incy,
std::complex< double > *const  A,
const int  lda 
)

Definition at line 703 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

713 {
714  return ger_impl(handle, m, n, alpha, x, incx, y, incy, A, lda);
715 }
ompBLAS_status ger_impl(ompBLAS_handle &handle, const int m, const int n, const T alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
Definition: ompBLAS.cpp:631

◆ ger< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::ger< std::complex< float > > ( ompBLAS_handle handle,
const int  m,
const int  n,
const std::complex< float >  alpha,
const std::complex< float > *const  x,
const int  incx,
const std::complex< float > *const  y,
const int  incy,
std::complex< float > *const  A,
const int  lda 
)

Definition at line 688 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

698 {
699  return ger_impl(handle, m, n, alpha, x, incx, y, incy, A, lda);
700 }
ompBLAS_status ger_impl(ompBLAS_handle &handle, const int m, const int n, const T alpha, const T *const x, const int incx, const T *const y, const int incy, T *const A, const int lda)
Definition: ompBLAS.cpp:631

◆ ger_batched()

ompBLAS_status qmcplusplus::ompBLAS::ger_batched ( ompBLAS_handle handle,
const int  m,
const int  n,
const T *  alpha,
const T *const  x[],
const int  incx,
const T *const  y[],
const int  incy,
T *const  A[],
const int  lda,
const int  batch_count 
)

◆ ger_batched< double >()

ompBLAS_status qmcplusplus::ompBLAS::ger_batched< double > ( ompBLAS_handle handle,
const int  m,
const int  n,
const double *  alpha,
const double *const  x[],
const int  incx,
const double *const  y[],
const int  incy,
double *const  A[],
const int  lda,
const int  batch_count 
)

Definition at line 764 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

775 {
776  return ger_batched_impl(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count);
777 }
ompBLAS_status ger_batched_impl(ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
Definition: ompBLAS.cpp:720

◆ ger_batched< float >()

ompBLAS_status qmcplusplus::ompBLAS::ger_batched< float > ( ompBLAS_handle handle,
const int  m,
const int  n,
const float *  alpha,
const float *const  x[],
const int  incx,
const float *const  y[],
const int  incy,
float *const  A[],
const int  lda,
const int  batch_count 
)

Definition at line 748 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

759 {
760  return ger_batched_impl(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count);
761 }
ompBLAS_status ger_batched_impl(ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
Definition: ompBLAS.cpp:720

◆ ger_batched< std::complex< double > >()

ompBLAS_status qmcplusplus::ompBLAS::ger_batched< std::complex< double > > ( ompBLAS_handle handle,
const int  m,
const int  n,
const std::complex< double > *  alpha,
const std::complex< double > *const  x[],
const int  incx,
const std::complex< double > *const  y[],
const int  incy,
std::complex< double > *const  A[],
const int  lda,
const int  batch_count 
)

Definition at line 797 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

808 {
809  return ger_batched_impl(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count);
810 }
ompBLAS_status ger_batched_impl(ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
Definition: ompBLAS.cpp:720

◆ ger_batched< std::complex< float > >()

ompBLAS_status qmcplusplus::ompBLAS::ger_batched< std::complex< float > > ( ompBLAS_handle handle,
const int  m,
const int  n,
const std::complex< float > *  alpha,
const std::complex< float > *const  x[],
const int  incx,
const std::complex< float > *const  y[],
const int  incy,
std::complex< float > *const  A[],
const int  lda,
const int  batch_count 
)

Definition at line 781 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, ger_batched_impl(), qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

792 {
793  return ger_batched_impl(handle, m, n, alpha, x, incx, y, incy, A, lda, batch_count);
794 }
ompBLAS_status ger_batched_impl(ompBLAS_handle &handle, const int m, const int n, const T *alpha, const T *const x[], const int incx, const T *const y[], const int incy, T *const A[], const int lda, const int batch_count)
Definition: ompBLAS.cpp:720

◆ ger_batched_impl()

ompBLAS_status qmcplusplus::ompBLAS::ger_batched_impl ( ompBLAS_handle handle,
const int  m,
const int  n,
const T *  alpha,
const T *const  x[],
const int  incx,
const T *const  y[],
const int  incy,
T *const  A[],
const int  lda,
const int  batch_count 
)

Definition at line 720 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by ger_batched< double >(), ger_batched< float >(), ger_batched< std::complex< double > >(), and ger_batched< std::complex< float > >().

731 {
732  if (m == 0 || n == 0 || batch_count == 0)
733  return 0;
734 
735 
736  if (incx != 1)
737  throw std::runtime_error("incx!=1 are not implemented in ompBLAS::ger_batched_impl!");
738 
739  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(3) is_device_ptr(A, x, y, alpha)")
740  for (uint32_t ib = 0; ib < batch_count; ib++)
741  for (uint32_t i = 0; i < n; i++)
742  for (uint32_t j = 0; j < m; j++)
743  A[ib][i * lda + j] += alpha[ib] * x[ib][j] * y[ib][i * incy];
744  return 0;
745 }
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])

◆ ger_impl()

ompBLAS_status qmcplusplus::ompBLAS::ger_impl ( ompBLAS_handle handle,
const int  m,
const int  n,
const T  alpha,
const T *const  x,
const int  incx,
const T *const  y,
const int  incy,
T *const  A,
const int  lda 
)

Definition at line 631 of file ompBLAS.cpp.

References qmcplusplus::Units::distance::A, qmcplusplus::lda, qmcplusplus::Units::distance::m, and qmcplusplus::n.

Referenced by ger< double >(), ger< float >(), ger< std::complex< double > >(), and ger< std::complex< float > >().

641 {
642  if (m == 0 || n == 0)
643  return 0;
644 
645  if (incx != 1 || incy != 1)
646  throw std::runtime_error("incx !=1 or incy != 1 are not implemented in ompBLAS::ger_impl!");
647 
648  //BLAS::ger(m, n, alpha, x, incx, y, incy, A, lda);
649  PRAGMA_OFFLOAD("omp target teams distribute parallel for collapse(2) is_device_ptr(A, x, y)")
650  for (uint32_t i = 0; i < n; i++)
651  for (uint32_t j = 0; j < m; j++)
652  A[i * lda + j] += alpha * x[j] * y[i];
653  return 0;
654 }
for(int i=0;i< size_test;++i) CHECK(Approx(gauss_random_vals[offset_for_rs+i])