Functions
template<typename T >
void	computeInverseAndDetLog_batched (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T Ms[], T Cs[], T LU_diags, int pivots, int host_infos, int infos, std::complex< double > *log_dets, const int batch_size)
	Takes PsiM in column major layout and uses LU factorization to compute the log determinant and invPsiM. More...

template<typename T >
void	computeGetrf_batched (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T Ms[], int pivots, int host_infos, int infos, const int batch_size)

template<typename T >
void	computeLogDet_batched (cudaStream_t &hstream, const int n, const int lda, T *Ms, const int pivots, std::complex< double > *logdets, const int batch_size)

template<typename T >
void	computeGetri_batched (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, T Ms[], T Cs[], int pivots, int host_infos, int *infos, const int batch_size)

template void	computeInverseAndDetLog_batched< double > (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, double Ms[], double Cs[], double LU_diags, int pivots, int host_infos, int infos, std::complex< double > *log_dets, const int batch_size)

template void	computeInverseAndDetLog_batched< std::complex< double > > (cublasHandle_t &h_cublas, cudaStream_t &hstream, const int n, const int lda, std::complex< double > Ms[], std::complex< double > Cs[], std::complex< double > LU_diags, int pivots, int host_infos, int infos, std::complex< double > *log_dets, const int batch_size)

Function Documentation

◆ computeGetrf_batched()

void qmcplusplus::cuBLAS_LU::computeGetrf_batched	(	cublasHandle_t &	h_cublas,
		cudaStream_t &	hstream,
		const int	n,
		const int	lda,
		T *	Ms[],
		int *	pivots,
		int *	host_infos,
		int *	infos,
		const int	batch_size
	)

Referenced by qmcplusplus::TEST_CASE().

◆ computeGetri_batched()

void qmcplusplus::cuBLAS_LU::computeGetri_batched	(	cublasHandle_t &	h_cublas,
		cudaStream_t &	hstream,
		const int	n,
		const int	lda,
		T *	Ms[],
		T *	Cs[],
		int *	pivots,
		int *	host_infos,
		int *	infos,
		const int	batch_size
	)

Referenced by qmcplusplus::TEST_CASE().

◆ computeInverseAndDetLog_batched()

void qmcplusplus::cuBLAS_LU::computeInverseAndDetLog_batched	(	cublasHandle_t &	h_cublas,
		cudaStream_t &	hstream,
		const int	n,
		const int	lda,
		T *	Ms[],
		T *	Cs[],
		T *	LU_diags,
		int *	pivots,
		int *	host_infos,
		int *	infos,
		std::complex< double > *	log_dets,
		const int	batch_size
	)

Takes PsiM in column major layout and uses LU factorization to compute the log determinant and invPsiM.

This is the call the QMCPACK should use.

Parameters

[in,out]	Ms	- device pointers to pointers to Ms on input and to LU matrices on output
[out]	Cs	- device pointers to memory space same size as M which over written with invM
[in]	pivots	- pointer to n * nw ints allocated in device memory for pivots array.
[in]	host_infos	- pointer to nw ints allocated in pinned host memory for factorization infos
[in]	infos	- pointer to nw ints allocated in device memory factorization infos
[out]	log_dets	- pointer device memory for nw log determinant values to be returned will be zeroed.
[in]	batch_size	- if this changes over run a huge performance hit will be taken as memory allocation syncs device.

The host infos is an exception to this that may be changed in the future. The logic for this should probably be in the next class up. This would obviously split the computeInverseAndDetLog_batched call.

Referenced by DiracMatrixComputeCUDA< VALUE_FP >::mw_computeInvertAndLog(), and DiracMatrixComputeCUDA< VALUE_FP >::mw_computeInvertAndLog_stride().

◆ computeInverseAndDetLog_batched< double >()

template void qmcplusplus::cuBLAS_LU::computeInverseAndDetLog_batched< double >	(	cublasHandle_t &	h_cublas,
		cudaStream_t &	hstream,
		const int	n,
		const int	lda,
		double *	Ms[],
		double *	Cs[],
		double *	LU_diags,
		int *	pivots,
		int *	host_infos,
		int *	infos,
		std::complex< double > *	log_dets,
		const int	batch_size
	)

◆ computeInverseAndDetLog_batched< std::complex< double > >()

template void qmcplusplus::cuBLAS_LU::computeInverseAndDetLog_batched< std::complex< double > >	(	cublasHandle_t &	h_cublas,
		cudaStream_t &	hstream,
		const int	n,
		const int	lda,
		std::complex< double > *	Ms[],
		std::complex< double > *	Cs[],
		std::complex< double > *	LU_diags,
		int *	pivots,
		int *	host_infos,
		int *	infos,
		std::complex< double > *	log_dets,
		const int	batch_size
	)

◆ computeLogDet_batched()

void qmcplusplus::cuBLAS_LU::computeLogDet_batched	(	cudaStream_t &	hstream,
		const int	n,
		const int	lda,
		T **	Ms,
		const int *	pivots,
		std::complex< double > *	logdets,
		const int	batch_size
	)

Referenced by qmcplusplus::TEST_CASE().

Functions

Function Documentation

◆ computeGetrf_batched()

◆ computeGetri_batched()

◆ computeInverseAndDetLog_batched()

◆ computeInverseAndDetLog_batched< double >()

◆ computeInverseAndDetLog_batched< std::complex< double > >()

◆ computeLogDet_batched()