13 #ifndef QMCPLUSPLUS_COMPUTE_MATRIX_UPDATE_CUDA_H 14 #define QMCPLUSPLUS_COMPUTE_MATRIX_UPDATE_CUDA_H 29 const T*
const Ainv[],
33 const T*
const phi_vgl_in[],
34 const size_t phi_vgl_stride,
37 const int batch_count)
40 phi_vgl_stride, dphi_out, d2phi_out, batch_count),
41 "CUDA::copyAinvRow_saveGL_cuda failed!");
47 const T*
const Ainvrow[],
48 const T*
const dpsiMrow[],
50 const int batch_count)
53 "CUDA::calcGradients_cuda failed!");
58 int*
const delay_list[],
60 const int delay_count,
63 const T*
const ratio_inv,
64 const T*
const phi_vgl_in[],
65 const size_t phi_vgl_stride,
71 const int batch_count)
74 binv, binv_lda, ratio_inv, phi_vgl_in, phi_vgl_stride,
75 phi_out, dphi_out, d2phi_out, norb, n_accepted,
77 "CUDA::add_delay_list_save_y_VGL_batched failed!");
83 const int*
const delay_list[],
84 const int delay_count,
87 const int batch_count)
90 "CUDA::applyW_batched failed!");
void add_delay_list_save_sigma_VGL_batched(Queue< PlatformKind::CUDA > &queue, int *const delay_list[], const int rowchanged, const int delay_count, T *const binv[], const int binv_lda, const T *const ratio_inv, const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const phi_out[], T *const dphi_out[], T *const d2phi_out[], const int norb, const int n_accepted, const int batch_count)
void applyW_batched(Queue< PlatformKind::CUDA > &queue, const int *const delay_list[], const int delay_count, T *const tempMat[], const int lda, const int batch_count)
helper functions for EinsplineSetBuilder
cudaError_t add_delay_list_save_sigma_VGL_batched(cudaStream_t hstream, int *const delay_list[], const int rowchanged, const int delay_count, T *const binv[], const int binv_lda, const T *const ratio_inv, const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const phi_out[], T *const dphi_out[], T *const d2phi_out[], const int norb, const int n_accepted, const int batch_count)
cudaError_t calcGradients_batched(cudaStream_t hstream, const int n, const T *const Ainvrow[], const T *const dpsiMrow[], T *const grads_now, const int batch_count)
calculate gradients
void calcGradients_batched(Queue< PlatformKind::CUDA > &queue, const int n, const T *const Ainvrow[], const T *const dpsiMrow[], T *const grads_now, const int batch_count)
cudaErrorCheck(cudaMemcpyAsync(dev_lu.data(), lu.data(), sizeof(decltype(lu)::value_type) *lu.size(), cudaMemcpyHostToDevice, hstream), "cudaMemcpyAsync failed copying log_values to device")
void copyAinvRow_saveGL_batched(Queue< PlatformKind::CUDA > &queue, const int rowchanged, const int n, const T *const Ainv[], const int lda, T *const temp[], T *const rcopy[], const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const dphi_out[], T *const d2phi_out[], const int batch_count)
cudaError_t applyW_batched(cudaStream_t hstream, const int *const delay_list[], const int delay_count, T *const tempMat[], const int lda, const int batch_count)
cudaError_t copyAinvRow_saveGL_batched(cudaStream_t hstream, const int rowchanged, const int n, const T *const Ainv[], const int lda, T *const temp[], T *const rcopy[], const T *const phi_vgl_in[], const size_t phi_vgl_stride, T *const dphi_out[], T *const d2phi_out[], const int batch_count)
helper function for SM-1 Fahy update subtract one in temp copy Ainv changed row to rcopy save phi G a...