QMCPACK
AccelMatrixUpdateCUDA.hpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2024 QMCPACK developers.
6 //
7 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 
13 #ifndef QMCPLUSPLUS_COMPUTE_MATRIX_UPDATE_CUDA_H
14 #define QMCPLUSPLUS_COMPUTE_MATRIX_UPDATE_CUDA_H
15 
16 #include <QueueAliases.hpp>
17 #include "matrix_update_helper.hpp"
18 
19 namespace qmcplusplus
20 {
21 
22 namespace compute
23 {
24 
25 template<typename T>
27  const int rowchanged,
28  const int n,
29  const T* const Ainv[],
30  const int lda,
31  T* const temp[],
32  T* const rcopy[],
33  const T* const phi_vgl_in[],
34  const size_t phi_vgl_stride,
35  T* const dphi_out[],
36  T* const d2phi_out[],
37  const int batch_count)
38 {
39  cudaErrorCheck(CUDA::copyAinvRow_saveGL_batched(queue.getNative(), rowchanged, n, Ainv, lda, temp, rcopy, phi_vgl_in,
40  phi_vgl_stride, dphi_out, d2phi_out, batch_count),
41  "CUDA::copyAinvRow_saveGL_cuda failed!");
42 }
43 
44 template<typename T>
46  const int n,
47  const T* const Ainvrow[],
48  const T* const dpsiMrow[],
49  T* const grads_now,
50  const int batch_count)
51 {
52  cudaErrorCheck(CUDA::calcGradients_batched(queue.getNative(), n, Ainvrow, dpsiMrow, grads_now, batch_count),
53  "CUDA::calcGradients_cuda failed!");
54 }
55 
56 template<typename T>
58  int* const delay_list[],
59  const int rowchanged,
60  const int delay_count,
61  T* const binv[],
62  const int binv_lda,
63  const T* const ratio_inv,
64  const T* const phi_vgl_in[],
65  const size_t phi_vgl_stride,
66  T* const phi_out[],
67  T* const dphi_out[],
68  T* const d2phi_out[],
69  const int norb,
70  const int n_accepted,
71  const int batch_count)
72 {
73  cudaErrorCheck(CUDA::add_delay_list_save_sigma_VGL_batched(queue.getNative(), delay_list, rowchanged, delay_count,
74  binv, binv_lda, ratio_inv, phi_vgl_in, phi_vgl_stride,
75  phi_out, dphi_out, d2phi_out, norb, n_accepted,
76  batch_count),
77  "CUDA::add_delay_list_save_y_VGL_batched failed!");
78 }
79 
80 
81 template<typename T>
83  const int* const delay_list[],
84  const int delay_count,
85  T* const tempMat[],
86  const int lda,
87  const int batch_count)
88 {
89  cudaErrorCheck(CUDA::applyW_batched(queue.getNative(), delay_list, delay_count, tempMat, lda, batch_count),
90  "CUDA::applyW_batched failed!");
91 }
92 
93 
94 } // namespace compute
95 } // namespace qmcplusplus
96 #endif
void add_delay_list_save_sigma_VGL_batched(Queue< PlatformKind::CUDA > &queue, int *const delay_list[], const int rowchanged, const int delay_count, T *const binv[], const int binv_lda, const T *const ratio_inv, const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const phi_out[], T *const dphi_out[], T *const d2phi_out[], const int norb, const int n_accepted, const int batch_count)
void applyW_batched(Queue< PlatformKind::CUDA > &queue, const int *const delay_list[], const int delay_count, T *const tempMat[], const int lda, const int batch_count)
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
cudaError_t add_delay_list_save_sigma_VGL_batched(cudaStream_t hstream, int *const delay_list[], const int rowchanged, const int delay_count, T *const binv[], const int binv_lda, const T *const ratio_inv, const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const phi_out[], T *const dphi_out[], T *const d2phi_out[], const int norb, const int n_accepted, const int batch_count)
cudaError_t calcGradients_batched(cudaStream_t hstream, const int n, const T *const Ainvrow[], const T *const dpsiMrow[], T *const grads_now, const int batch_count)
calculate gradients
void calcGradients_batched(Queue< PlatformKind::CUDA > &queue, const int n, const T *const Ainvrow[], const T *const dpsiMrow[], T *const grads_now, const int batch_count)
cudaErrorCheck(cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
void copyAinvRow_saveGL_batched(Queue< PlatformKind::CUDA > &queue, const int rowchanged, const int n, const T *const Ainv[], const int lda, T *const temp[], T *const rcopy[], const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const dphi_out[], T *const d2phi_out[], const int batch_count)
cudaError_t applyW_batched(cudaStream_t hstream, const int *const delay_list[], const int delay_count, T *const tempMat[], const int lda, const int batch_count)
cudaError_t copyAinvRow_saveGL_batched(cudaStream_t hstream, const int rowchanged, const int n, const T *const Ainv[], const int lda, T *const temp[], T *const rcopy[], const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const dphi_out[], T *const d2phi_out[], const int batch_count)
helper function for SM-1 Fahy update subtract one in temp copy Ainv changed row to rcopy save phi G a...