QMCPACK
qmcplusplus::cuBLAS_LU Namespace Reference

Functions

template<typename T >
void computeInverseAndDetLog_batched (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T *Ms[], T *Cs[], T *LU_diags, int *pivots, int *host_infos, int *infos, std::complex< double > *log_dets, const int batch_size)
 Takes PsiM in column major layout and uses LU factorization to compute the log determinant and invPsiM. More...
 
template<typename T >
void computeGetrf_batched (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T *Ms[], int *pivots, int *host_infos, int *infos, const int batch_size)
 
template<typename T >
void computeLogDet_batched (cudaStream_t &hstream, const int n, const int lda, T **Ms, const int *pivots, std::complex< double > *logdets, const int batch_size)
 
template<typename T >
void computeGetri_batched (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T *Ms[], T *Cs[], int *pivots, int *host_infos, int *infos, const int batch_size)
 
template void computeInverseAndDetLog_batched< double > (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, double *Ms[], double *Cs[], double *LU_diags, int *pivots, int *host_infos, int *infos, std::complex< double > *log_dets, const int batch_size)
 
template void computeInverseAndDetLog_batched< std::complex< double > > (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, std::complex< double > *Ms[], std::complex< double > *Cs[], std::complex< double > *LU_diags, int *pivots, int *host_infos, int *infos, std::complex< double > *log_dets, const int batch_size)
 

Function Documentation

◆ computeGetrf_batched()

void qmcplusplus::cuBLAS_LU::computeGetrf_batched ( cublasHandle_t h_cublas,
cudaStream_t hstream,
const int  n,
const int  lda,
T *  Ms[],
int *  pivots,
int *  host_infos,
int *  infos,
const int  batch_size 
)

Referenced by qmcplusplus::TEST_CASE().

◆ computeGetri_batched()

void qmcplusplus::cuBLAS_LU::computeGetri_batched ( cublasHandle_t h_cublas,
cudaStream_t hstream,
const int  n,
const int  lda,
T *  Ms[],
T *  Cs[],
int *  pivots,
int *  host_infos,
int *  infos,
const int  batch_size 
)

Referenced by qmcplusplus::TEST_CASE().

◆ computeInverseAndDetLog_batched()

void qmcplusplus::cuBLAS_LU::computeInverseAndDetLog_batched ( cublasHandle_t h_cublas,
cudaStream_t hstream,
const int  n,
const int  lda,
T *  Ms[],
T *  Cs[],
T *  LU_diags,
int *  pivots,
int *  host_infos,
int *  infos,
std::complex< double > *  log_dets,
const int  batch_size 
)

Takes PsiM in column major layout and uses LU factorization to compute the log determinant and invPsiM.

This is the call the QMCPACK should use.

Parameters
[in,out]Ms- device pointers to pointers to Ms on input and to LU matrices on output
[out]Cs- device pointers to memory space same size as M which over written with invM
[in]pivots- pointer to n * nw ints allocated in device memory for pivots array.
[in]host_infos- pointer to nw ints allocated in pinned host memory for factorization infos
[in]infos- pointer to nw ints allocated in device memory factorization infos
[out]log_dets- pointer device memory for nw log determinant values to be returned will be zeroed.
[in]batch_size- if this changes over run a huge performance hit will be taken as memory allocation syncs device.

The host infos is an exception to this that may be changed in the future. The logic for this should probably be in the next class up. This would obviously split the computeInverseAndDetLog_batched call.

Referenced by DiracMatrixComputeCUDA< VALUE_FP >::mw_computeInvertAndLog(), and DiracMatrixComputeCUDA< VALUE_FP >::mw_computeInvertAndLog_stride().

◆ computeInverseAndDetLog_batched< double >()

template void qmcplusplus::cuBLAS_LU::computeInverseAndDetLog_batched< double > ( cublasHandle_t h_cublas,
cudaStream_t hstream,
const int  n,
const int  lda,
double *  Ms[],
double *  Cs[],
double *  LU_diags,
int *  pivots,
int *  host_infos,
int *  infos,
std::complex< double > *  log_dets,
const int  batch_size 
)

◆ computeInverseAndDetLog_batched< std::complex< double > >()

template void qmcplusplus::cuBLAS_LU::computeInverseAndDetLog_batched< std::complex< double > > ( cublasHandle_t h_cublas,
cudaStream_t hstream,
const int  n,
const int  lda,
std::complex< double > *  Ms[],
std::complex< double > *  Cs[],
std::complex< double > *  LU_diags,
int *  pivots,
int *  host_infos,
int *  infos,
std::complex< double > *  log_dets,
const int  batch_size 
)

◆ computeLogDet_batched()

void qmcplusplus::cuBLAS_LU::computeLogDet_batched ( cudaStream_t hstream,
const int  n,
const int  lda,
T **  Ms,
const int *  pivots,
std::complex< double > *  logdets,
const int  batch_size 
)

Referenced by qmcplusplus::TEST_CASE().