QMCPACK
test_cuBLAS_LU.cpp File Reference

These are unit tests for the low level LU factorization used by the full inversion and calculation of log determinant for dirac determinants. More...

+ Include dependency graph for test_cuBLAS_LU.cpp:

Go to the source code of this file.

Classes

struct  CUDAHandles
 Doesn't depend on the resource management scheme thats out of scope for unit tests. More...
 

Namespaces

 qmcplusplus
 helper functions for EinsplineSetBuilder
 
 qmcplusplus::testing
 SpaceGrid refactored for use with batched estimator design NE should be dropped when QMCHamiltonian/SpaceGrid has been deleted.
 

Typedefs

using StdComp = std::complex< double >
 

Functions

 TEST_CASE ("cuBLAS_LU::computeLogDet", "[wavefunction][CUDA]")
 Single double computeLogDet. More...
 
 TEST_CASE ("cuBLAS_LU::computeLogDet_complex", "[wavefunction][CUDA]")
 
 TEST_CASE ("cuBLAS_LU::computeLogDet_float", "[wavefunction][CUDA]")
 while this working is a good test, in production code its likely we want to widen the matrix M to double and thereby the LU matrix as well. More...
 
std::vector< StdComp, CUDAHostAllocator< StdComp > > dev_lu (lu.size())
 
std::vector< StdComp, CUDAHostAllocator< StdComp > > dev_lu2 (lu2.size())
 
std::vector< StdComp *, CUDAHostAllocator< StdComp * > > lus (batch_size)
 
std::vector< StdComp *, CUDAAllocator< StdComp * > > dev_lus (batch_size)
 
std::vector< StdComp, CUDAHostAllocator< StdComp > > log_values (batch_size)
 
std::vector< StdComp, CUDAAllocator< StdComp > > dev_log_values (batch_size)
 
std::vector< int, CUDAAllocator< int > > dev_pivots (pivots.size())
 
 cudaErrorCheck (cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
 
 cudaErrorCheck (cudaMemcpyAsync(dev_lu2.data(), lu2.data(), sizeof(decltype(lu2)::value_type) *lu2.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
 
 cudaErrorCheck (cudaMemcpyAsync(dev_lus.data(), lus.data(), sizeof(decltype(lus)::value_type) *lus.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
 
 cudaErrorCheck (cudaMemcpyAsync(dev_pivots.data(), pivots.data(), sizeof(int) *pivots.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
 
 cudaErrorCheck (cudaMemcpyAsync(log_values.data(), dev_log_values.data(), sizeof(std::complex< double >) *2, cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying log_values from device")
 
 cudaErrorCheck (cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!")
 
 CHECK (log_values[0]==ComplexApprox(std::complex< double >{ 5.603777579195571, -6.1586603331188225 }))
 
 CHECK (log_values[1]==ComplexApprox(std::complex< double >{ 5.531331998282581, -8.805487075984523 }))
 
 TEST_CASE ("cuBLAS_LU::getrf_batched_complex", "[wavefunction][CUDA]")
 
std::vector< double, CUDAAllocator< double > > devM_vec (M_vec.size())
 
std::vector< double, CUDAAllocator< double > > devM2_vec (M2_vec.size())
 
std::vector< double *, CUDAAllocator< double * > > devMs (Ms.size())
 
std::vector< int, CUDAHostAllocator< int > > pivots (8, -1.0)
 
std::vector< int, CUDAHostAllocator< int > > infos (8, 1.0)
 
std::vector< int, CUDAAllocator< int > > dev_infos (pivots.size())
 
 cudaErrorCheck (cudaMemcpyAsync(devM_vec.data(), M_vec.data(), sizeof(decltype(M_vec)::value_type) *M_vec.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying M to device")
 
 cudaErrorCheck (cudaMemcpyAsync(devM2_vec.data(), M2_vec.data(), sizeof(decltype(M2_vec)::value_type) *M2_vec.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying M2 to device")
 
 cudaErrorCheck (cudaMemcpyAsync(devMs.data(), Ms.data(), sizeof(decltype(Ms)::value_type) *Ms.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying Ms to device")
 
 cudaErrorCheck (cudaMemcpyAsync(M_vec.data(), devM_vec.data(), sizeof(decltype(M_vec)::value_type) *M_vec.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying invM from device")
 
 cudaErrorCheck (cudaMemcpyAsync(M2_vec.data(), devM2_vec.data(), sizeof(decltype(M2_vec)::value_type) *M2_vec.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying invM from device")
 
 cudaErrorCheck (cudaMemcpyAsync(pivots.data(), dev_pivots.data(), sizeof(int) *pivots.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying pivots from device")
 
testing::MatrixAccessor< double > M_mat (M_vec.data(), 4, 4)
 
testing::MatrixAccessor< double > lu_mat (lu.data(), 4, 4)
 
testing::MatrixAccessor< double > M2_mat (M2_vec.data(), 4, 4)
 
testing::MatrixAccessor< double > lu2_mat (lu2.data(), 4, 4)
 
 checkArray (real_pivot, pivots, 8)
 
 CHECKED_ELSE (check_matrix_result.result)
 
 TEST_CASE ("cuBLAS_LU::getri_batched", "[wavefunction][CUDA]")
 

Variables

int n = 4
 
int lda = 4
 
auto & hstream = cuda_handles->hstream
 
int batch_size = 2
 
std::vector< StdComp, CUDAHostAllocator< StdComp > > lu
 
std::vector< StdComp, CUDAHostAllocator< StdComp > > lu2
 
 lus [0] = dev_lu.data()
 
std::vector< int, CUDAHostAllocator< int > > pivots = {3, 4, 3, 4, 3, 4, 4, 4}
 
std::vector< double, CUDAHostAllocator< double > > M_vec {2, 5, 7, 5, 5, 2, 5, 4, 8, 2, 6, 4, 7, 8, 6, 8}
 
std::vector< double, CUDAHostAllocator< double > > M2_vec {6, 5, 7, 5, 2, 2, 5, 4, 8, 2, 6, 4, 3, 8, 6, 8}
 
std::vector< double *, CUDAHostAllocator< double * > > Ms {devM_vec.data(), devM2_vec.data()}
 
std::vector< int > real_pivot {3, 3, 4, 4, 3, 3, 3, 4}
 
auto checkArray
 
auto check_matrix_result = checkMatrix(lu_mat, M_mat)
 

Detailed Description

These are unit tests for the low level LU factorization used by the full inversion and calculation of log determinant for dirac determinants.

Fundamental testing of these kernels requires full knowledge of the memory layout and data movement, As such OhmmsMatrices and custom allocators are not used. They have their own unit tests (Hopefully!) This is also documentation of how these calls expect the memory handed to them to look. Please leave this intact. Someday those container abstractions will change, if inversion breaks and this stil works you will have a fighting chance to know how to change these routines or fix the bug you introduced in the higher level abstractions.

Reference data generated by qmcpack/tests/scripts/inversion_ref.py

Definition in file test_cuBLAS_LU.cpp.