13 #ifndef CUDA_DELAYED_UPDATE_HELPER_H 14 #define CUDA_DELAYED_UPDATE_HELPER_H 19 #include <cuda_runtime_api.h> 21 #include <hip/hip_runtime.h> 30 const int delay_count,
39 const int delay_count,
40 std::complex<float>* temp_gpu,
43 std::complex<float>* V_gpu,
44 const std::complex<float>* Ainv,
48 const int delay_count,
57 const int delay_count,
58 std::complex<double>* temp_gpu,
61 std::complex<double>* V_gpu,
62 const std::complex<double>* Ainv,
80 const std::complex<double>* mat,
82 std::complex<double>* diag,
105 const std::complex<double>* mat_in,
107 std::complex<float>* mat_out,
113 const std::complex<float>* mat_in,
115 std::complex<double>* mat_out,
void make_identity_matrix_cuda(const int nrows, double *mat, const int lda, cudaStream_t hstream)
create identity matrix on the device
void extract_matrix_diagonal_cuda(const int nrows, const double *mat, const int lda, double *diag, cudaStream_t hstream)
extract matrix diagonal
void copy_matrix_cuda(const int nrows, const int ncols, const double *mat_in, const int lda, float *mat_out, const int ldb, cudaStream_t hstream)
copy matrix with precision difference
void applyW_stageV_cuda(const int *delay_list_gpu, const int delay_count, float *temp_gpu, const int numorbs, const int ndelay, float *V_gpu, const float *Ainv, cudaStream_t hstream)
helper function for delayed update algorithm W matrix is applied and copy selected rows of Ainv into ...