![]() |
QMCPACK
|
implements dirac matrix delayed update using OpenMP offload and CUDA. More...
Classes | |
struct | MultiWalkerResource |
Public Types | |
using | This_t = DelayedUpdateBatched< PL, VALUE > |
using | Value = VALUE |
using | Real = RealAlias< Value > |
using | Complex = std::complex< Real > |
template<typename DT > | |
using | UnpinnedDualVector = Vector< DT, OffloadAllocator< DT > > |
template<typename DT > | |
using | DualVector = Vector< DT, OffloadPinnedAllocator< DT > > |
template<typename DT > | |
using | DualMatrix = Matrix< DT, OffloadPinnedAllocator< DT > > |
template<typename DT > | |
using | DualVGLVector = VectorSoaContainer< DT, QMCTraits::DIM+2, OffloadPinnedAllocator< DT > > |
template<typename DT > | |
using | OffloadMWVGLArray = Array< DT, 3, OffloadPinnedAllocator< DT > > |
template<typename DT > | |
using | OffloadMatrix = Matrix< DT, OffloadPinnedAllocator< DT > > |
Public Member Functions | |
DelayedUpdateBatched (size_t norb, size_t max_delay) | |
default constructor More... | |
DelayedUpdateBatched (const DelayedUpdateBatched &)=delete | |
template<typename VVT , typename FPVT > | |
void | updateRow (DualMatrix< Value > &Ainv, int rowchanged, const VVT &phiV, FPVT c_ratio_in) |
Update the "local" psiMinv_ on the device. More... | |
Static Public Member Functions | |
template<typename GT > | |
static void | mw_evalGrad (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs, const std::vector< const Value *> &dpsiM_row_list, const int rowchanged, std::vector< GT > &grad_now) |
template<typename GT > | |
static void | mw_evalGradWithSpin (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs, const std::vector< const Value *> &dpsiM_row_list, OffloadMatrix< Complex > &mw_dspin, const int rowchanged, std::vector< GT > &grad_now, std::vector< Complex > &spingrad_now) |
static void | mw_accept_rejectRow (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs, const int rowchanged, const std::vector< Value *> &psiM_g_list, const std::vector< Value *> &psiM_l_list, const std::vector< bool > &isAccepted, const OffloadMWVGLArray< Value > &phi_vgl_v, const std::vector< Value > &ratios) |
Accept or Reject row updates many of these const arguments provide pointers or references to objects that do get modified. More... | |
static void | mw_updateInvMat (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs) |
update the full Ainv and reset delay_count More... | |
static std::vector< const Value * > | mw_getInvRow (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs, const int row_id, bool on_host) |
return invRow host or device pointers based on on_host request prepare invRow if not already. More... | |
static void | mw_transferAinv_D2H (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs) |
transfer Ainv to the host More... | |
Private Types | |
template<typename DT > | |
using | DeviceMatrix = Matrix< DT, OffloadDeviceAllocator< DT > > |
template<typename DT > | |
using | DeviceVector = Vector< DT, OffloadDeviceAllocator< DT > > |
Private Member Functions | |
void | resize (int norb, int delay) |
resize the internal storage More... | |
void | guard_no_delay () const |
ensure no previous delay left. More... | |
Static Private Member Functions | |
static void | mw_prepareInvRow (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs, const int rowchanged) |
compute the row of up-to-date Ainv More... | |
static void | mw_updateRow (const RefVectorWithLeader< This_t > &engines, MultiWalkerResource &mw_rsc, const RefVector< DualMatrix< Value >> &psiMinv_refs, const int rowchanged, const std::vector< Value *> &psiM_g_list, const std::vector< Value *> &psiM_l_list, const std::vector< bool > &isAccepted, const OffloadMWVGLArray< Value > &phi_vgl_v, const std::vector< Value > &ratios) |
Do complete row updates many of these const arguments provide pointers or references somewhere in here is an update that doesn't get where it belongs resulting in a 0 gradient later. More... | |
Private Attributes | |
UnpinnedDualVector< Value > | temp |
scratch space for rank-1 update More... | |
UnpinnedDualVector< Value > | invRow |
row of up-to-date Ainv More... | |
int | invRow_id |
row id correspond to the up-to-date invRow. More... | |
UnpinnedDualVector< Value > | rcopy |
DeviceMatrix< Value > | U_gpu |
orbital values of delayed electrons More... | |
DeviceMatrix< Value > | V_gpu |
rows of Ainv corresponding to delayed electrons More... | |
DeviceMatrix< Value > | Binv_gpu |
Matrix inverse of B, at maximum KxK. More... | |
DeviceMatrix< Value > | tempMat_gpu |
scratch space, used during inverse update More... | |
DeviceVector< Value > | p_gpu |
new column of B More... | |
DeviceVector< int > | delay_list_gpu |
list of delayed electrons More... | |
int | delay_count |
current number of delays, increase one for each acceptance, reset to 0 after updating Ainv More... | |
const bool | no_delayed_update_ |
if true, updates are not delayed. More... | |
implements dirac matrix delayed update using OpenMP offload and CUDA.
It is used as DET_ENGINE in DiracDeterminantBatched. This is a 1 per walker class
T | base precision for most computation |
T_FP | high precision for matrix inversion, T_FP >= T |
Definition at line 36 of file DelayedUpdateBatched.h.
Definition at line 42 of file DelayedUpdateBatched.h.
|
private |
Definition at line 126 of file DelayedUpdateBatched.h.
|
private |
Definition at line 128 of file DelayedUpdateBatched.h.
using DualMatrix = Matrix<DT, OffloadPinnedAllocator<DT> > |
Definition at line 50 of file DelayedUpdateBatched.h.
using DualVector = Vector<DT, OffloadPinnedAllocator<DT> > |
Definition at line 48 of file DelayedUpdateBatched.h.
using DualVGLVector = VectorSoaContainer<DT, QMCTraits::DIM + 2, OffloadPinnedAllocator<DT> > |
Definition at line 52 of file DelayedUpdateBatched.h.
using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT> > |
Definition at line 56 of file DelayedUpdateBatched.h.
using OffloadMWVGLArray = Array<DT, 3, OffloadPinnedAllocator<DT> > |
Definition at line 54 of file DelayedUpdateBatched.h.
Definition at line 41 of file DelayedUpdateBatched.h.
using This_t = DelayedUpdateBatched<PL, VALUE> |
Definition at line 39 of file DelayedUpdateBatched.h.
using UnpinnedDualVector = Vector<DT, OffloadAllocator<DT> > |
Definition at line 46 of file DelayedUpdateBatched.h.
using Value = VALUE |
Definition at line 40 of file DelayedUpdateBatched.h.
|
inline |
default constructor
Definition at line 344 of file DelayedUpdateBatched.h.
References DelayedUpdateBatched< PL, VALUE >::resize().
|
delete |
|
inlineprivate |
ensure no previous delay left.
This looks like it should be an assert
Definition at line 164 of file DelayedUpdateBatched.h.
References DelayedUpdateBatched< PL, VALUE >::delay_count.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_updateRow(), and DelayedUpdateBatched< PL, VALUE >::updateRow().
|
inlinestatic |
Accept or Reject row updates many of these const arguments provide pointers or references to objects that do get modified.
[in] | engines | |
[in] | rowchanged | |
[in] | psiM_g_list | |
[in] | psiM_l_list | |
[in] | isAccepted | |
[in] | phi_vgl_v | multiple walker orbital VGL |
[in,out] | ratios |
Definition at line 542 of file DelayedUpdateBatched.h.
References DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::accept_rejectRow_buffer_H2D, qmcplusplus::compute::add_delay_list_save_sigma_VGL_batched(), DelayedUpdateBatched< PL, VALUE >::Binv_gpu, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::blas_handle, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::cminusone_vec, Matrix< T, Alloc >::cols(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::cone_vec, qmcplusplus::compute::BLAS::copy_batched(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::czero_vec, DelayedUpdateBatched< PL, VALUE >::delay_count, DelayedUpdateBatched< PL, VALUE >::delay_list_gpu, Matrix< T, Alloc >::device_data(), Array< T, D, ALLOC >::device_data_at(), qmcplusplus::compute::BLAS::gemv_batched(), qmcplusplus::compute::BLAS::ger_batched(), RefVectorWithLeader< T >::getLeader(), DelayedUpdateBatched< PL, VALUE >::invRow_id, qmcplusplus::lda, DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat(), DelayedUpdateBatched< PL, VALUE >::mw_updateRow(), DelayedUpdateBatched< PL, VALUE >::p_gpu, qmcplusplus::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::resize_fill_constant_arrays(), DelayedUpdateBatched< PL, VALUE >::U_gpu, and DelayedUpdateBatched< PL, VALUE >::V_gpu.
|
inlinestatic |
Definition at line 354 of file DelayedUpdateBatched.h.
References qmcplusplus::compute::calcGradients_batched(), Matrix< T, Alloc >::cols(), Matrix< T, Alloc >::device_data(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::evalGrad_buffer_H2D, RefVectorWithLeader< T >::getLeader(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::grads_value_v, DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), qmcplusplus::queue, and DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::queue.
|
inlinestatic |
Definition at line 403 of file DelayedUpdateBatched.h.
References Matrix< T, Alloc >::cols(), Matrix< T, Alloc >::data(), Matrix< T, Alloc >::device_data(), qmcplusplus::ewaldref::DIM, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::evalGrad_buffer_H2D, RefVectorWithLeader< T >::getLeader(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::grads_value_v, and DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::spingrads_value_v.
|
inlinestatic |
return invRow host or device pointers based on on_host request prepare invRow if not already.
Definition at line 763 of file DelayedUpdateBatched.h.
References RefVectorWithLeader< T >::getLeader(), DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), qmcplusplus::queue, and DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::queue.
|
inlinestaticprivate |
compute the row of up-to-date Ainv
Ainv | inverse matrix |
rowchanged | the row id corresponding to the proposed electron |
Definition at line 174 of file DelayedUpdateBatched.h.
References DelayedUpdateBatched< PL, VALUE >::Binv_gpu, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::blas_handle, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::cminusone_vec, Matrix< T, Alloc >::cols(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::cone_vec, qmcplusplus::compute::BLAS::copy_batched(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::czero_vec, DelayedUpdateBatched< PL, VALUE >::delay_count, Matrix< T, Alloc >::device_data(), qmcplusplus::compute::BLAS::gemv_batched(), RefVectorWithLeader< T >::getLeader(), DelayedUpdateBatched< PL, VALUE >::invRow, DelayedUpdateBatched< PL, VALUE >::p_gpu, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::prepare_inv_row_buffer_H2D, qmcplusplus::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::resize_fill_constant_arrays(), DelayedUpdateBatched< PL, VALUE >::U_gpu, and DelayedUpdateBatched< PL, VALUE >::V_gpu.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_evalGrad(), and DelayedUpdateBatched< PL, VALUE >::mw_getInvRow().
|
inlinestatic |
transfer Ainv to the host
Definition at line 813 of file DelayedUpdateBatched.h.
References RefVectorWithLeader< T >::getLeader(), qmcplusplus::queue, and DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::queue.
|
inlinestatic |
update the full Ainv and reset delay_count
Ainv | inverse matrix |
Definition at line 675 of file DelayedUpdateBatched.h.
References qmcplusplus::compute::applyW_batched(), DelayedUpdateBatched< PL, VALUE >::Binv_gpu, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::blas_handle, DelayedUpdateBatched< PL, VALUE >::delay_count, DelayedUpdateBatched< PL, VALUE >::delay_list_gpu, Matrix< T, Alloc >::device_data(), qmcplusplus::compute::BLAS::gemm_batched(), RefVectorWithLeader< T >::getLeader(), qmcplusplus::lda, qmcplusplus::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::resize_fill_constant_arrays(), DelayedUpdateBatched< PL, VALUE >::tempMat_gpu, DelayedUpdateBatched< PL, VALUE >::U_gpu, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::updateInv_buffer_H2D, and DelayedUpdateBatched< PL, VALUE >::V_gpu.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow().
|
inlinestaticprivate |
Do complete row updates many of these const arguments provide pointers or references somewhere in here is an update that doesn't get where it belongs resulting in a 0 gradient later.
Sad example of OpenMP target code that is far from clear and a poor substitute for a clear CPU reference implementation.
[in] | engines | |
[in] | rowchanged | |
[in] | psiM_g_list | device ptrs |
[in] | psiM_l_list | device ptrs |
[in] | isAccepted | bool but wait some lists are also filtered |
[in] | phi_vgl_v | multiple walker orbital VGL |
[in,out] | ratios |
Definition at line 252 of file DelayedUpdateBatched.h.
References DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::blas_handle, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::cone_vec, qmcplusplus::compute::copyAinvRow_saveGL_batched(), DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::czero_vec, Matrix< T, Alloc >::device_data(), Array< T, D, ALLOC >::device_data_at(), qmcplusplus::compute::BLAS::gemv_batched(), qmcplusplus::compute::BLAS::ger_batched(), RefVectorWithLeader< T >::getLeader(), DelayedUpdateBatched< PL, VALUE >::guard_no_delay(), qmcplusplus::lda, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::mw_rcopy, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::mw_temp, qmcplusplus::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::queue, DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::resize_fill_constant_arrays(), and DelayedUpdateBatched< PL, VALUE >::MultiWalkerResource::updateRow_buffer_H2D.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow().
|
inlineprivate |
resize the internal storage
norb | number of electrons/orbitals |
delay,maximum | delay 0<delay<=norb |
Definition at line 150 of file DelayedUpdateBatched.h.
References DelayedUpdateBatched< PL, VALUE >::Binv_gpu, DelayedUpdateBatched< PL, VALUE >::delay_list_gpu, DelayedUpdateBatched< PL, VALUE >::invRow, DelayedUpdateBatched< PL, VALUE >::p_gpu, DelayedUpdateBatched< PL, VALUE >::tempMat_gpu, DelayedUpdateBatched< PL, VALUE >::U_gpu, and DelayedUpdateBatched< PL, VALUE >::V_gpu.
Referenced by DelayedUpdateBatched< PL, VALUE >::DelayedUpdateBatched().
|
inline |
Update the "local" psiMinv_ on the device.
Side Effect Transfers:
Forced to use OpenMP target since resources are banned for single walker functions APIs and the acquireRelease pattern for a single DDB was removed by #3324
Definition at line 493 of file DelayedUpdateBatched.h.
References Matrix< T, Alloc >::cols(), BLAS::cone, BLAS::czero, Matrix< T, Alloc >::data(), qmcplusplus::ompBLAS::gemv(), qmcplusplus::ompBLAS::ger(), DelayedUpdateBatched< PL, VALUE >::guard_no_delay(), qmcplusplus::lda, DelayedUpdateBatched< PL, VALUE >::rcopy, Matrix< T, Alloc >::rows(), and DelayedUpdateBatched< PL, VALUE >::temp.
|
private |
Matrix inverse of B, at maximum KxK.
Definition at line 134 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat(), and DelayedUpdateBatched< PL, VALUE >::resize().
|
private |
current number of delays, increase one for each acceptance, reset to 0 after updating Ainv
Definition at line 142 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::guard_no_delay(), DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), and DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat().
|
private |
list of delayed electrons
Definition at line 140 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat(), and DelayedUpdateBatched< PL, VALUE >::resize().
|
private |
row of up-to-date Ainv
Definition at line 115 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), and DelayedUpdateBatched< PL, VALUE >::resize().
|
private |
row id correspond to the up-to-date invRow.
[0 norb), invRow is ready; -1, invRow is not valid. This id is set after calling getInvRow indicating invRow has been prepared for the invRow_id row ratioGrad checks if invRow_id is consistent. If not, invRow needs to be recomputed. acceptMove and completeUpdates mark invRow invalid by setting invRow_id to -1
Definition at line 121 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow().
|
private |
if true, updates are not delayed.
Definition at line 144 of file DelayedUpdateBatched.h.
|
private |
new column of B
Definition at line 138 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), and DelayedUpdateBatched< PL, VALUE >::resize().
|
private |
Definition at line 123 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::updateRow().
|
private |
scratch space for rank-1 update
Definition at line 113 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::updateRow().
|
private |
scratch space, used during inverse update
Definition at line 136 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat(), and DelayedUpdateBatched< PL, VALUE >::resize().
|
private |
orbital values of delayed electrons
Definition at line 130 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat(), and DelayedUpdateBatched< PL, VALUE >::resize().
|
private |
rows of Ainv corresponding to delayed electrons
Definition at line 132 of file DelayedUpdateBatched.h.
Referenced by DelayedUpdateBatched< PL, VALUE >::mw_accept_rejectRow(), DelayedUpdateBatched< PL, VALUE >::mw_prepareInvRow(), DelayedUpdateBatched< PL, VALUE >::mw_updateInvMat(), and DelayedUpdateBatched< PL, VALUE >::resize().