12 #ifndef QMCPLUSPLUS_SYCL_MKL_SOLVERINVERTOR_H 13 #define QMCPLUSPLUS_SYCL_MKL_SOLVERINVERTOR_H 28 template<
typename T_FP>
50 getrf_ws = syclSolver::getrf_scratchpad_size<T_FP>(m_queue, norb, norb, norb);
51 getri_ws = syclSolver::getri_scratchpad_size<T_FP>(m_queue, norb, norb);
61 template<typename TMAT, typename TREAL, typename = std::enable_if_t<std::is_same<TMAT, T_FP>::value>>
65 std::complex<TREAL>& log_value,
68 const int norb = logdetT.
rows();
71 m_queue.memcpy(
Mat1_gpu.data(), logdetT.
data(), logdetT.
size() *
sizeof(TMAT));
79 catch (sycl::exception
const& ex)
81 std::ostringstream err;
82 err <<
"\t\tCaught synchronous SYCL exception during getrf:\n" 83 << ex.what() <<
" status: " << ex.code() << std::endl;
84 std::cerr << err.str();
85 throw std::runtime_error(err.str());
88 log_value = computeLogDet_sycl<TREAL>(m_queue, norb, Ainv_gpu.cols(), Ainv_gpu.data(),
ipiv.data());
92 m_queue.memcpy(Ainv.
data(), Ainv_gpu.data(), Ainv.
size() *
sizeof(TMAT)).wait();
99 template<typename TMAT, typename TREAL, typename = std::enable_if_t<!std::is_same<TMAT, T_FP>::value>>
103 std::complex<TREAL>& log_value,
106 const int norb = logdetT.
rows();
109 m_queue.memcpy(Ainv_gpu.data(), logdetT.
data(), logdetT.
size() *
sizeof(TMAT));
120 catch (sycl::exception
const& ex)
122 std::ostringstream err;
123 err <<
"\t\tCaught synchronous SYCL exception during getrf:\n" 124 << ex.what() <<
" status: " << ex.code() << std::endl;
125 std::cerr << err.str();
126 throw std::runtime_error(err.str());
129 log_value = computeLogDet_sycl<TREAL>(m_queue, norb,
Mat1_gpu.cols(),
Mat1_gpu.data(),
ipiv.data());
135 m_queue.memcpy(Ainv.
data(), Ainv_gpu.data(), Ainv.
size() *
sizeof(TMAT)).wait();
137 for(
int i = 0; i < norb; i++)
139 throw std::runtime_error(
"Ainv[i][i] is NaN. i = " + std::to_string(i));
144 #endif // QMCPLUSPLUS_CUSOLVERINVERTOR_H
helper functions for EinsplineSetBuilder
allocator for SYCL device memory T data type ALIGN alignment in bytes
Matrix< T_FP, SYCLAllocator< T_FP > > Mat1_gpu
scratch memory for cusolverDN
std::enable_if_t<!std::is_same< TMAT, T_FP >::value > invert_transpose(const Matrix< TMAT > &logdetT, Matrix< TMAT > &Ainv, Matrix< TMAT, SYCLAllocator< TMAT >> &Ainv_gpu, std::complex< TREAL > &log_value, sycl::queue &m_queue)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
implements matrix inversion via cuSolverDN
sycl::event transpose(sycl::queue &q, const T1 *restrict in, int m, int lda, T2 *restrict out, int n, int ldb, const std::vector< sycl::event > &events)
double norm(const zVec &c)
rocblas_status getri(rocblas_handle &handle, int n, double *A, int lda, int *ipiv, int *info)
Vector< T_FP, SYCLAllocator< T_FP > > workspace
workspace
Declaraton of Vector<T,Alloc> Manage memory through Alloc directly and allow referencing an existing ...
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
this file provides three C++ memory allocators using SYCL specific memory allocation functions...
std::enable_if_t< std::is_same< TMAT, T_FP >::value > invert_transpose(const Matrix< TMAT > &logdetT, Matrix< TMAT > &Ainv, Matrix< TMAT, SYCLAllocator< TMAT >> &Ainv_gpu, std::complex< TREAL > &log_value, sycl::queue &m_queue)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
void resize(int norb, sycl::queue &m_queue)
resize the internal storage
Vector< std::int64_t, SYCLAllocator< std::int64_t > > ipiv
pivot array + info
cusolverStatus_t getrf(cusolverDnHandle_t &handle, int m, int n, double *A, int lda, double *work, int *ipiv, int *info)
bool isnan(float a)
return true if the value is NaN.