These are unit tests for the low level LU factorization used by the full inversion and calculation of log determinant for dirac determinants. More...

Include dependency graph for test_cuBLAS_LU.cpp:

Classes
struct	CUDAHandles
	Doesn't depend on the resource management scheme thats out of scope for unit tests. More...

Namespaces
	qmcplusplus
	helper functions for EinsplineSetBuilder

	qmcplusplus::testing
	SpaceGrid refactored for use with batched estimator design NE should be dropped when QMCHamiltonian/SpaceGrid has been deleted.

Typedefs
using	StdComp = std::complex< double >

Functions
	TEST_CASE ("cuBLAS_LU::computeLogDet", "[wavefunction][CUDA]")
	Single double computeLogDet. More...

	TEST_CASE ("cuBLAS_LU::computeLogDet_complex", "[wavefunction][CUDA]")

	TEST_CASE ("cuBLAS_LU::computeLogDet_float", "[wavefunction][CUDA]")
	while this working is a good test, in production code its likely we want to widen the matrix M to double and thereby the LU matrix as well. More...

std::vector< StdComp, CUDAHostAllocator< StdComp > >	dev_lu (lu.size())

std::vector< StdComp, CUDAHostAllocator< StdComp > >	dev_lu2 (lu2.size())

std::vector< StdComp , CUDAHostAllocator< StdComp > >	lus (batch_size)

std::vector< StdComp , CUDAAllocator< StdComp > >	dev_lus (batch_size)

std::vector< StdComp, CUDAHostAllocator< StdComp > >	log_values (batch_size)

std::vector< StdComp, CUDAAllocator< StdComp > >	dev_log_values (batch_size)

std::vector< int, CUDAAllocator< int > >	dev_pivots (pivots.size())

	cudaErrorCheck (cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")

	cudaErrorCheck (cudaMemcpyAsync(dev_lu2.data(), lu2.data(), sizeof(decltype(lu2)::value_type) *lu2.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")

	cudaErrorCheck (cudaMemcpyAsync(dev_lus.data(), lus.data(), sizeof(decltype(lus)::value_type) *lus.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")

	cudaErrorCheck (cudaMemcpyAsync(dev_pivots.data(), pivots.data(), sizeof(int) *pivots.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")

	cudaErrorCheck (cudaMemcpyAsync(log_values.data(), dev_log_values.data(), sizeof(std::complex< double >) *2, cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying log_values from device")

	cudaErrorCheck (cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!")

	CHECK (log_values[0]==ComplexApprox(std::complex< double >{ 5.603777579195571, -6.1586603331188225 }))

	CHECK (log_values[1]==ComplexApprox(std::complex< double >{ 5.531331998282581, -8.805487075984523 }))

	TEST_CASE ("cuBLAS_LU::getrf_batched_complex", "[wavefunction][CUDA]")

std::vector< double, CUDAAllocator< double > >	devM_vec (M_vec.size())

std::vector< double, CUDAAllocator< double > >	devM2_vec (M2_vec.size())

std::vector< double , CUDAAllocator< double > >	devMs (Ms.size())

std::vector< int, CUDAHostAllocator< int > >	pivots (8, -1.0)

std::vector< int, CUDAHostAllocator< int > >	infos (8, 1.0)

std::vector< int, CUDAAllocator< int > >	dev_infos (pivots.size())

	cudaErrorCheck (cudaMemcpyAsync(devM_vec.data(), M_vec.data(), sizeof(decltype(M_vec)::value_type) *M_vec.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying M to device")

	cudaErrorCheck (cudaMemcpyAsync(devM2_vec.data(), M2_vec.data(), sizeof(decltype(M2_vec)::value_type) *M2_vec.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying M2 to device")

	cudaErrorCheck (cudaMemcpyAsync(devMs.data(), Ms.data(), sizeof(decltype(Ms)::value_type) *Ms.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying Ms to device")

	cudaErrorCheck (cudaMemcpyAsync(M_vec.data(), devM_vec.data(), sizeof(decltype(M_vec)::value_type) *M_vec.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying invM from device")

	cudaErrorCheck (cudaMemcpyAsync(M2_vec.data(), devM2_vec.data(), sizeof(decltype(M2_vec)::value_type) *M2_vec.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying invM from device")

	cudaErrorCheck (cudaMemcpyAsync(pivots.data(), dev_pivots.data(), sizeof(int) *pivots.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying pivots from device")

testing::MatrixAccessor< double >	M_mat (M_vec.data(), 4, 4)

testing::MatrixAccessor< double >	lu_mat (lu.data(), 4, 4)

testing::MatrixAccessor< double >	M2_mat (M2_vec.data(), 4, 4)

testing::MatrixAccessor< double >	lu2_mat (lu2.data(), 4, 4)

	checkArray (real_pivot, pivots, 8)

	CHECKED_ELSE (check_matrix_result.result)

	TEST_CASE ("cuBLAS_LU::getri_batched", "[wavefunction][CUDA]")

Variables
int	n = 4

int	lda = 4

auto &	hstream = cuda_handles->hstream

int	batch_size = 2

std::vector< StdComp, CUDAHostAllocator< StdComp > >	lu

std::vector< StdComp, CUDAHostAllocator< StdComp > >	lu2

	lus [0] = dev_lu.data()

std::vector< int, CUDAHostAllocator< int > >	pivots = {3, 4, 3, 4, 3, 4, 4, 4}

std::vector< double, CUDAHostAllocator< double > >	M_vec {2, 5, 7, 5, 5, 2, 5, 4, 8, 2, 6, 4, 7, 8, 6, 8}

std::vector< double, CUDAHostAllocator< double > >	M2_vec {6, 5, 7, 5, 2, 2, 5, 4, 8, 2, 6, 4, 3, 8, 6, 8}

std::vector< double , CUDAHostAllocator< double > >	Ms {devM_vec.data(), devM2_vec.data()}

std::vector< int >	real_pivot {3, 3, 4, 4, 3, 3, 3, 4}

auto	checkArray

auto	check_matrix_result = checkMatrix(lu_mat, M_mat)

Detailed Description

These are unit tests for the low level LU factorization used by the full inversion and calculation of log determinant for dirac determinants.

Fundamental testing of these kernels requires full knowledge of the memory layout and data movement, As such OhmmsMatrices and custom allocators are not used. They have their own unit tests (Hopefully!) This is also documentation of how these calls expect the memory handed to them to look. Please leave this intact. Someday those container abstractions will change, if inversion breaks and this stil works you will have a fighting chance to know how to change these routines or fix the bug you introduced in the higher level abstractions.

Reference data generated by qmcpack/tests/scripts/inversion_ref.py

Definition in file test_cuBLAS_LU.cpp.

Classes

Namespaces

Typedefs

Functions

Variables

Detailed Description