![]() |
QMCPACK
|
These are unit tests for the low level LU factorization used by the full inversion and calculation of log determinant for dirac determinants. More...
Go to the source code of this file.
Classes | |
struct | CUDAHandles |
Doesn't depend on the resource management scheme thats out of scope for unit tests. More... | |
Namespaces | |
qmcplusplus | |
helper functions for EinsplineSetBuilder | |
qmcplusplus::testing | |
SpaceGrid refactored for use with batched estimator design NE should be dropped when QMCHamiltonian/SpaceGrid has been deleted. | |
Typedefs | |
using | StdComp = std::complex< double > |
Functions | |
TEST_CASE ("cuBLAS_LU::computeLogDet", "[wavefunction][CUDA]") | |
Single double computeLogDet. More... | |
TEST_CASE ("cuBLAS_LU::computeLogDet_complex", "[wavefunction][CUDA]") | |
TEST_CASE ("cuBLAS_LU::computeLogDet_float", "[wavefunction][CUDA]") | |
while this working is a good test, in production code its likely we want to widen the matrix M to double and thereby the LU matrix as well. More... | |
std::vector< StdComp, CUDAHostAllocator< StdComp > > | dev_lu (lu.size()) |
std::vector< StdComp, CUDAHostAllocator< StdComp > > | dev_lu2 (lu2.size()) |
std::vector< StdComp *, CUDAHostAllocator< StdComp * > > | lus (batch_size) |
std::vector< StdComp *, CUDAAllocator< StdComp * > > | dev_lus (batch_size) |
std::vector< StdComp, CUDAHostAllocator< StdComp > > | log_values (batch_size) |
std::vector< StdComp, CUDAAllocator< StdComp > > | dev_log_values (batch_size) |
std::vector< int, CUDAAllocator< int > > | dev_pivots (pivots.size()) |
cudaErrorCheck (cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device") | |
cudaErrorCheck (cudaMemcpyAsync(dev_lu2.data(), lu2.data(), sizeof(decltype(lu2)::value_type) *lu2.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device") | |
cudaErrorCheck (cudaMemcpyAsync(dev_lus.data(), lus.data(), sizeof(decltype(lus)::value_type) *lus.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device") | |
cudaErrorCheck (cudaMemcpyAsync(dev_pivots.data(), pivots.data(), sizeof(int) *pivots.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device") | |
cudaErrorCheck (cudaMemcpyAsync(log_values.data(), dev_log_values.data(), sizeof(std::complex< double >) *2, cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying log_values from device") | |
cudaErrorCheck (cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!") | |
CHECK (log_values[0]==ComplexApprox(std::complex< double >{ 5.603777579195571, -6.1586603331188225 })) | |
CHECK (log_values[1]==ComplexApprox(std::complex< double >{ 5.531331998282581, -8.805487075984523 })) | |
TEST_CASE ("cuBLAS_LU::getrf_batched_complex", "[wavefunction][CUDA]") | |
std::vector< double, CUDAAllocator< double > > | devM_vec (M_vec.size()) |
std::vector< double, CUDAAllocator< double > > | devM2_vec (M2_vec.size()) |
std::vector< double *, CUDAAllocator< double * > > | devMs (Ms.size()) |
std::vector< int, CUDAHostAllocator< int > > | pivots (8, -1.0) |
std::vector< int, CUDAHostAllocator< int > > | infos (8, 1.0) |
std::vector< int, CUDAAllocator< int > > | dev_infos (pivots.size()) |
cudaErrorCheck (cudaMemcpyAsync(devM_vec.data(), M_vec.data(), sizeof(decltype(M_vec)::value_type) *M_vec.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying M to device") | |
cudaErrorCheck (cudaMemcpyAsync(devM2_vec.data(), M2_vec.data(), sizeof(decltype(M2_vec)::value_type) *M2_vec.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying M2 to device") | |
cudaErrorCheck (cudaMemcpyAsync(devMs.data(), Ms.data(), sizeof(decltype(Ms)::value_type) *Ms.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying Ms to device") | |
cudaErrorCheck (cudaMemcpyAsync(M_vec.data(), devM_vec.data(), sizeof(decltype(M_vec)::value_type) *M_vec.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying invM from device") | |
cudaErrorCheck (cudaMemcpyAsync(M2_vec.data(), devM2_vec.data(), sizeof(decltype(M2_vec)::value_type) *M2_vec.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying invM from device") | |
cudaErrorCheck (cudaMemcpyAsync(pivots.data(), dev_pivots.data(), sizeof(int) *pivots.size(), cudaMemcpyDeviceToHost, hstream), "cudaMemcpyAsync failed copying pivots from device") | |
testing::MatrixAccessor< double > | M_mat (M_vec.data(), 4, 4) |
testing::MatrixAccessor< double > | lu_mat (lu.data(), 4, 4) |
testing::MatrixAccessor< double > | M2_mat (M2_vec.data(), 4, 4) |
testing::MatrixAccessor< double > | lu2_mat (lu2.data(), 4, 4) |
checkArray (real_pivot, pivots, 8) | |
CHECKED_ELSE (check_matrix_result.result) | |
TEST_CASE ("cuBLAS_LU::getri_batched", "[wavefunction][CUDA]") | |
Variables | |
int | n = 4 |
int | lda = 4 |
auto & | hstream = cuda_handles->hstream |
int | batch_size = 2 |
std::vector< StdComp, CUDAHostAllocator< StdComp > > | lu |
std::vector< StdComp, CUDAHostAllocator< StdComp > > | lu2 |
lus [0] = dev_lu.data() | |
std::vector< int, CUDAHostAllocator< int > > | pivots = {3, 4, 3, 4, 3, 4, 4, 4} |
std::vector< double, CUDAHostAllocator< double > > | M_vec {2, 5, 7, 5, 5, 2, 5, 4, 8, 2, 6, 4, 7, 8, 6, 8} |
std::vector< double, CUDAHostAllocator< double > > | M2_vec {6, 5, 7, 5, 2, 2, 5, 4, 8, 2, 6, 4, 3, 8, 6, 8} |
std::vector< double *, CUDAHostAllocator< double * > > | Ms {devM_vec.data(), devM2_vec.data()} |
std::vector< int > | real_pivot {3, 3, 4, 4, 3, 3, 3, 4} |
auto | checkArray |
auto | check_matrix_result = checkMatrix(lu_mat, M_mat) |
These are unit tests for the low level LU factorization used by the full inversion and calculation of log determinant for dirac determinants.
Fundamental testing of these kernels requires full knowledge of the memory layout and data movement, As such OhmmsMatrices and custom allocators are not used. They have their own unit tests (Hopefully!) This is also documentation of how these calls expect the memory handed to them to look. Please leave this intact. Someday those container abstractions will change, if inversion breaks and this stil works you will have a fighting chance to know how to change these routines or fix the bug you introduced in the higher level abstractions.
Reference data generated by qmcpack/tests/scripts/inversion_ref.py
Definition in file test_cuBLAS_LU.cpp.