QMCPACK
test_DiracMatrixComputeCUDA.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2021 QMCPACK developers.
6 //
7 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
8 //
9 // File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 #include <catch.hpp>
13 #include <algorithm>
14 #include "OhmmsData/Libxml2Doc.h"
15 #include "OhmmsPETE/OhmmsMatrix.h"
16 #include "OhmmsPETE/OhmmsVector.h"
19 #include "makeRngSpdMatrix.hpp"
25 
26 // Legacy CPU inversion for temporary testing
28 
29 
30 namespace qmcplusplus
31 {
32 template<typename T>
33 using OffloadPinnedMatrix = Matrix<T, PinnedDualAllocator<T>>;
34 template<typename T>
35 using OffloadPinnedVector = Vector<T, PinnedDualAllocator<T>>;
36 
37 TEST_CASE("DiracMatrixComputeCUDA_cuBLAS_geam_call", "[wavefunction][fermion]")
38 {
40  int n = 4;
41  mat_a.resize(n, n);
43  temp_mat.resize(n, n);
45  mat_c.resize(n, n);
46 
47  double host_one(1.0);
48  double host_zero(0.0);
49 
50  std::vector<double> A{2, 5, 8, 7, 5, 2, 2, 8, 7, 5, 6, 6, 5, 4, 4, 8};
51  std::copy_n(A.begin(), 16, mat_a.data());
54  int lda = n;
55  cudaCheck(cudaMemcpyAsync((void*)(temp_mat.device_data()), (void*)(mat_a.data()), mat_a.size() * sizeof(double),
56  cudaMemcpyHostToDevice, queue.getNative()));
57  cublasErrorCheck(cuBLAS::geam(cuda_handles.h_cublas, CUBLAS_OP_T, CUBLAS_OP_N, n, n, &host_one,
58  temp_mat.device_data(), lda, &host_zero, mat_c.device_data(), lda, mat_a.device_data(),
59  lda),
60  "cuBLAS::geam failed.");
61 }
62 
63 TEST_CASE("DiracMatrixComputeCUDA_different_batch_sizes", "[wavefunction][fermion]")
64 {
66  mat_a.resize(4, 4);
67  std::vector<double> A{2, 5, 8, 7, 5, 2, 2, 8, 7, 5, 6, 6, 5, 4, 4, 8};
68  std::copy_n(A.data(), 16, mat_a.data());
70  log_values.resize(1);
72  inv_mat_a.resize(4, 4);
75 
76  dmcc.invert_transpose(queue, mat_a, inv_mat_a, log_values);
77 
78 
80  mat_b.resize(4, 4);
81  double invA[16]{-0.08247423, -0.26804124, 0.26804124, 0.05154639, 0.18556701, -0.89690722, 0.39690722, 0.13402062,
82  0.24742268, -0.19587629, 0.19587629, -0.15463918, -0.29896907, 1.27835052, -0.77835052, 0.06185567};
83  std::copy_n(invA, 16, mat_b.data());
84 
85  auto check_matrix_result = checkMatrix(inv_mat_a, mat_b);
86  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
87 
89  mat_a2.resize(4, 4);
90  std::copy_n(A.begin(), 16, mat_a2.data());
91  OffloadPinnedMatrix<double> inv_mat_a2;
92  inv_mat_a2.resize(4, 4);
93 
94  RefVector<const OffloadPinnedMatrix<double>> a_mats{mat_a, mat_a2};
95  RefVector<OffloadPinnedMatrix<double>> inv_a_mats{inv_mat_a, inv_mat_a2};
96 
97  log_values.resize(2);
98  dmcc.mw_invertTranspose(queue, a_mats, inv_a_mats, log_values);
99 
100  check_matrix_result = checkMatrix(inv_mat_a, mat_b);
101  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
102  check_matrix_result = checkMatrix(inv_mat_a2, mat_b);
103  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
104 
105  CHECK(log_values[0] == ComplexApprox(std::complex<double>{5.267858159063328, 6.283185307179586}));
106  CHECK(log_values[1] == ComplexApprox(std::complex<double>{5.267858159063328, 6.283185307179586}));
107 
109  mat_a3.resize(4, 4);
110  std::copy_n(A.begin(), 16, mat_a3.data());
111  OffloadPinnedMatrix<double> inv_mat_a3;
112  inv_mat_a3.resize(4, 4);
113 
114  a_mats[1] = mat_a3;
115 
116  RefVector<const OffloadPinnedMatrix<double>> a_mats3{mat_a, mat_a2, mat_a3};
117  RefVector<OffloadPinnedMatrix<double>> inv_a_mats3{inv_mat_a, inv_mat_a2, inv_mat_a3};
118 
119  log_values.resize(3);
120  dmcc.mw_invertTranspose(queue, a_mats3, inv_a_mats3, log_values);
121 
122  check_matrix_result = checkMatrix(inv_mat_a, mat_b);
123  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
124  check_matrix_result = checkMatrix(inv_mat_a2, mat_b);
125  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
126  check_matrix_result = checkMatrix(inv_mat_a3, mat_b);
127  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
128 
129  CHECK(log_values[0] == ComplexApprox(std::complex<double>{5.267858159063328, 6.283185307179586}));
130  CHECK(log_values[1] == ComplexApprox(std::complex<double>{5.267858159063328, 6.283185307179586}));
131  CHECK(log_values[2] == ComplexApprox(std::complex<double>{5.267858159063328, 6.283185307179586}));
132 }
133 
134 TEST_CASE("DiracMatrixComputeCUDA_complex_determinants_against_legacy", "[wavefunction][fermion]")
135 {
136  int n = 64;
138 
140 
142  mat_spd.resize(n, n);
144  makeRngSpdMatrix(mat_spd);
145  // You would hope you could do this
146  // OffloadPinnedMatrix<double> mat_a(mat_spd);
147  // But you can't
149  for (int i = 0; i < n; ++i)
150  for (int j = 0; j < n; ++j)
151  mat_a(i, j) = mat_spd(i, j);
152 
153  Matrix<std::complex<double>> mat_spd2;
154  mat_spd2.resize(n, n);
155  makeRngSpdMatrix(mat_spd2);
156  // You would hope you could do this
157  // OffloadPinnedMatrix<double> mat_a(mat_spd);
158  // But you can't
160  for (int i = 0; i < n; ++i)
161  for (int j = 0; j < n; ++j)
162  mat_a2(i, j) = mat_spd2(i, j);
163 
165  log_values.resize(2);
167  inv_mat_a.resize(n, n);
169  inv_mat_a2.resize(n, n);
170 
172  RefVector<OffloadPinnedMatrix<std::complex<double>>> inv_a_mats{inv_mat_a, inv_mat_a2};
173 
174  dmcc.mw_invertTranspose(queue, a_mats, inv_a_mats, log_values);
175 
177  Matrix<std::complex<double>> inv_mat_test(n, n);
178  std::complex<double> det_log_value;
179  dmat.invert_transpose(mat_spd, inv_mat_test, det_log_value);
180 
181  auto check_matrix_result = checkMatrix(inv_mat_a, inv_mat_test);
182  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
183 
184  dmat.invert_transpose(mat_spd2, inv_mat_test, det_log_value);
185  check_matrix_result = checkMatrix(inv_mat_a2, inv_mat_test);
186  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
187 }
188 
189 TEST_CASE("DiracMatrixComputeCUDA_large_determinants_against_legacy", "[wavefunction][fermion]")
190 {
191  int n = 64;
194 
195  Matrix<double> mat_spd;
196  mat_spd.resize(n, n);
198  makeRngSpdMatrix(mat_spd);
199  // You would hope you could do this
200  // OffloadPinnedMatrix<double> mat_a(mat_spd);
201  // But you can't
203  for (int i = 0; i < n; ++i)
204  for (int j = 0; j < n; ++j)
205  mat_a(i, j) = mat_spd(i, j);
206 
207  Matrix<double> mat_spd2;
208  mat_spd2.resize(n, n);
209  makeRngSpdMatrix(mat_spd2);
210  // You would hope you could do this
211  // OffloadPinnedMatrix<double> mat_a(mat_spd);
212  // But you can't
214  for (int i = 0; i < n; ++i)
215  for (int j = 0; j < n; ++j)
216  mat_a2(i, j) = mat_spd2(i, j);
217 
219  log_values.resize(2);
220  OffloadPinnedMatrix<double> inv_mat_a;
221  inv_mat_a.resize(n, n);
222  OffloadPinnedMatrix<double> inv_mat_a2;
223  inv_mat_a2.resize(n, n);
224 
225  RefVector<const OffloadPinnedMatrix<double>> a_mats{mat_a, mat_a2};
226  RefVector<OffloadPinnedMatrix<double>> inv_a_mats{inv_mat_a, inv_mat_a2};
227 
228  dmcc.mw_invertTranspose(queue, a_mats, inv_a_mats, log_values);
229 
230  DiracMatrix<double> dmat;
231  Matrix<double> inv_mat_test(n, n);
232  std::complex<double> det_log_value;
233  dmat.invert_transpose(mat_spd, inv_mat_test, det_log_value);
234 
235  auto check_matrix_result = checkMatrix(inv_mat_a, inv_mat_test);
236  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
237 
238  dmat.invert_transpose(mat_spd2, inv_mat_test, det_log_value);
239  check_matrix_result = checkMatrix(inv_mat_a2, inv_mat_test);
240  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
241 }
242 
243 } // namespace qmcplusplus
#define CUBLAS_OP_N
Definition: cuda2hip.h:19
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
Vector< T, PinnedDualAllocator< T > > OffloadPinnedVector
std::vector< StdComp, CUDAHostAllocator< StdComp > > log_values(batch_size)
TEST_CASE("complex_helper", "[type_traits]")
CHECKED_ELSE(check_matrix_result.result)
auto check_matrix_result
void resize(size_type n, size_type m)
Resize the container.
Definition: OhmmsMatrix.h:99
std::enable_if_t<!std::is_same< VALUE_FP, TMAT >::value > mw_invertTranspose(compute::Queue< PlatformKind::CUDA > &queue, const RefVector< const DualMatrix< TMAT >> &a_mats, const RefVector< DualMatrix< TMAT >> &inv_a_mats, DualVector< LogValue > &log_values)
Mixed precision specialization When TMAT is not full precision we need to still do the inversion and ...
size_type size() const
Definition: OhmmsMatrix.h:76
void makeRngSpdMatrix(testing::RandomForTest< RngValueType< T >> &rng, Matrix< T > &mat_spd)
helper class to compute matrix inversion and the log value of determinant
Definition: DiracMatrix.h:111
These allocators are to make code that should be generic with the respect to accelerator code flavor ...
void invert_transpose(compute::Queue< PlatformKind::CUDA > &queue, DualMatrix< TMAT > &a_mat, DualMatrix< TMAT > &inv_a_mat, DualVector< LogValue > &log_values)
Given a_mat returns inverted amit and log determinant of a_matches.
#define CUBLAS_OP_T
Definition: cuda2hip.h:20
#define cudaMemcpyHostToDevice
Definition: cuda2hip.h:139
pointer device_data()
Definition: OhmmsMatrix.h:188
std::vector< std::reference_wrapper< T > > RefVector
Declaraton of Vector<T,Alloc> Manage memory through Alloc directly and allow referencing an existing ...
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
class defining a compute and memory resource to compute matrix inversion and the log determinants of ...
CheckMatrixResult checkMatrix(M1 &a_mat, M2 &b_mat, const bool check_all=false, std::optional< const double > eps=std::nullopt)
This function checks equality a_mat and b_mat elements M1, M2 need to have their element type declare...
Definition: checkMatrix.hpp:63
cublasStatus_t geam(cublasHandle_t &handle, cublasOperation_t &transa, cublasOperation_t &transb, int m, int n, const float *alpha, const float *A, int lda, const float *beta, const float *B, int ldb, float *C, int ldc)
Definition: cuBLAS.hpp:110
CHECK(log_values[0]==ComplexApprox(std::complex< double >{ 5.603777579195571, -6.1586603331188225 }))
Declaration of WaveFunctionComponent.
#define cublasErrorCheck(ans, cause)
Definition: cuBLAS.hpp:34
#define cudaCheck(ans)
Definition: CUDAerror.h:27
Matrix< T, PinnedDualAllocator< T > > OffloadPinnedMatrix
std::enable_if_t< std::is_same< T_FP, TMAT >::value > invert_transpose(const Matrix< TMAT, ALLOC1 > &amat, Matrix< TMAT, ALLOC2 > &invMat, std::complex< TREAL > &LogDet)
compute the inverse of the transpose of matrix A and its determinant value in log when T_FP and TMAT ...
Definition: DiracMatrix.h:188
Functor to provide scope for rng when making SpdMatrix for testing.
#define cudaMemcpyAsync
Definition: cuda2hip.h:136