12 #ifndef QMCPLUSPLUS_CUBLAS_LU_HPP 13 #define QMCPLUSPLUS_CUBLAS_LU_HPP 16 #include <type_traits> 20 #include <cublas_v2.h> 21 #include <cuComplex.h> 23 #include <hipblas/hipblas.h> 24 #include <hip/hip_complex.h> 61 std::complex<double>* log_dets,
81 std::complex<double>* logdets,
106 std::complex<double>* log_dets,
109 extern template void computeInverseAndDetLog_batched<std::complex<double>>(
cublasHandle_t& h_cublas,
113 std::complex<double>*
Ms[],
114 std::complex<double>* Cs[],
115 std::complex<double>* LU_diags,
119 std::complex<double>* log_dets,
helper functions for EinsplineSetBuilder
handle CUDA/HIP runtime selection.
template void computeInverseAndDetLog_batched< double >(cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, double *Ms[], double *Cs[], double *LU_diags, int *pivots, int *host_infos, int *infos, std::complex< double > *log_dets, const int batch_size)
void computeInverseAndDetLog_batched(cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T *Ms[], T *Cs[], T *LU_diags, int *pivots, int *host_infos, int *infos, std::complex< double > *log_dets, const int batch_size)
Takes PsiM in column major layout and uses LU factorization to compute the log determinant and invPsi...
void computeGetrf_batched(cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T *Ms[], int *pivots, int *host_infos, int *infos, const int batch_size)
void computeLogDet_batched(cudaStream_t &hstream, const int n, const int lda, T **Ms, const int *pivots, std::complex< double > *logdets, const int batch_size)
std::vector< int, CUDAHostAllocator< int > > pivots
std::vector< int, CUDAHostAllocator< int > > infos(8, 1.0)
std::vector< double *, CUDAHostAllocator< double * > > Ms
void computeGetri_batched(cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T *Ms[], T *Cs[], int *pivots, int *host_infos, int *infos, const int batch_size)