QMCPACK
DiracDeterminantBatched.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2021 QMCPACK developers.
6 //
7 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
8 // Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
9 //
10 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 
16 #include "CPU/BLAS.hpp"
17 #include "OhmmsPETE/OhmmsMatrix.h"
20 #ifndef QMC_COMPLEX
22 #endif
24 #include <cassert>
25 
26 namespace qmcplusplus
27 {
28 template<PlatformKind PL, typename VT, typename FPVT>
30 {
31  DiracDeterminantBatchedMultiWalkerResource() : Resource("DiracDeterminantBatched") {}
34  {}
35 
36  std::unique_ptr<Resource> makeClone() const override
37  {
38  return std::make_unique<DiracDeterminantBatchedMultiWalkerResource>(*this);
39  }
41  /// value, grads, laplacian of single-particle orbital for particle-by-particle update and multi walker [5][nw][norb]
43  /// multi walker of ratio
44  std::vector<Value> ratios_local;
45  /// multi walker of grads
46  std::vector<Grad> grad_new_local;
47  /// multi walker of spingrads
48  std::vector<Value> spingrad_new_local;
49  /// mw spin gradients of orbitals, matrix is [nw][norb]
51  /// reference to per DDB psiMinvs in a crowd
53  ///
54  typename UpdateEngine::MultiWalkerResource engine_rsc;
55 };
56 
57 /** constructor
58  *@param spos the single-particle orbital set
59  *@param first index of the first particle
60  */
61 template<PlatformKind PL, typename VT, typename FPVT>
63  int first,
64  int last,
65  int ndelay,
66  DetMatInvertor matrix_inverter_kind)
67  : DiracDeterminantBase("DiracDeterminantBatched", std::move(spos), first, last),
68  det_engine_(NumOrbitals, ndelay),
69  ndelay_(ndelay),
70  matrix_inverter_kind_(matrix_inverter_kind),
71  D2HTimer(createGlobalTimer("DiracDeterminantBatched::D2H", timer_level_fine)),
72  H2DTimer(createGlobalTimer("DiracDeterminantBatched::H2D", timer_level_fine))
73 {
74  static_assert(std::is_same<SPOSet::ValueType, typename UpdateEngine::Value>::value);
76 
77 #ifndef QMC_COMPLEX
78  RotatedSPOs* rot_spo = dynamic_cast<RotatedSPOs*>(Phi.get());
79  if (rot_spo)
80  rot_spo->buildOptVariables(NumPtcls);
81 #endif
82 }
83 
84 template<PlatformKind PL, typename VT, typename FPVT>
86 {
87  ScopedTimer inverse_timer(InverseTimer);
88  host_inverter_.invert_transpose(psiM, psiMinv, log_value_);
89  psiMinv.updateTo();
90 
91 #ifndef NDEBUG
92  // This is easily breakable in that it assumes this function gets psiMinv == det_engine_.psiMinv_
93  auto& engine_psiMinv = psiMinv_;
94  dummy_vmt.attachReference(engine_psiMinv.data(), engine_psiMinv.rows(), engine_psiMinv.cols());
95 #endif
96 }
97 
98 template<PlatformKind PL, typename VT, typename FPVT>
100  const RefVector<const DualMatrix<Value>>& logdetT_list,
101  const RefVector<DualMatrix<Value>>& a_inv_list)
102 {
103  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
104  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
105  ScopedTimer inverse_timer(wfc_leader.InverseTimer);
106  const auto nw = wfc_list.size();
107 
108  if (wfc_leader.matrix_inverter_kind_ == DetMatInvertor::ACCEL)
109  {
110  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
111  engine_list.reserve(nw);
112 
113  mw_res.log_values.resize(nw);
114 
115  for (int iw = 0; iw < nw; iw++)
116  {
118  engine_list.push_back(det.det_engine_);
119  mw_res.log_values[iw] = {0.0, 0.0};
120  }
121 
122  wfc_leader.accel_inverter_.getResource().mw_invertTranspose(mw_res.engine_rsc.queue, logdetT_list, a_inv_list,
123  mw_res.log_values);
124 
125  for (int iw = 0; iw < nw; ++iw)
126  {
128  det.log_value_ = mw_res.log_values[iw];
129  }
130  }
131  else
132  {
133  for (int iw = 0; iw < nw; iw++)
134  {
136  DualMatrix<Value>& psiMinv = a_inv_list[iw];
137  det.host_inverter_.invert_transpose(logdetT_list[iw].get(), psiMinv, det.log_value_);
138  psiMinv.updateTo();
139  }
140  }
141 
142 #ifndef NDEBUG
143  for (int iw = 0; iw < nw; ++iw)
144  {
146  auto& engine_psiMinv = det.psiMinv_;
147  det.dummy_vmt.attachReference(engine_psiMinv.data(), engine_psiMinv.rows(), engine_psiMinv.cols());
148  }
149 #endif
150 }
151 
152 ///reset the size: with the number of particles and number of orbtials
153 template<PlatformKind PL, typename VT, typename FPVT>
155 {
156  int norb = morb;
157  if (norb <= 0)
158  norb = nel; // for morb == -1 (default)
159  psiMinv_.resize(norb, getAlignedSize<Value>(norb));
160  psiM_vgl.resize(nel * norb);
161  // attach pointers VGL
162  psiM_temp.attachReference(psiM_vgl, psiM_vgl.data(0), nel, norb);
163  psiM_host.attachReference(psiM_vgl.data(0), nel, norb);
164  dpsiM.attachReference(reinterpret_cast<Grad*>(psiM_vgl.data(1)), nel, norb);
165  d2psiM.attachReference(psiM_vgl.data(4), nel, norb);
166 
167  psiV.resize(NumOrbitals);
168  psiV_host_view.attachReference(psiV.data(), NumOrbitals);
169  dpsiV.resize(NumOrbitals);
170  dpsiV_host_view.attachReference(dpsiV.data(), NumOrbitals);
171  d2psiV.resize(NumOrbitals);
172  d2psiV_host_view.attachReference(d2psiV.data(), NumOrbitals);
173  dspin_psiV.resize(NumOrbitals);
174  dspin_psiV_host_view.attachReference(dspin_psiV.data(), NumOrbitals);
175 }
176 
177 template<PlatformKind PL, typename VT, typename FPVT>
179  int iat)
180 {
181  ScopedTimer local_timer(RatioTimer);
182  const int WorkingIndex = iat - FirstIndex;
183  Grad g = simd::dot(psiMinv_[WorkingIndex], dpsiM[WorkingIndex], NumOrbitals);
184  assert(checkG(g));
185  return g;
186 }
187 
188 template<PlatformKind PL, typename VT, typename FPVT>
190  const RefVectorWithLeader<ParticleSet>& p_list,
191  int iat,
192  std::vector<Grad>& grad_now) const
193 {
194  assert(this == &wfc_list.getLeader());
195  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
196  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
197  ScopedTimer local_timer(RatioTimer);
198 
199  const int nw = wfc_list.size();
200  std::vector<const Value*> dpsiM_row_list(nw, nullptr);
201  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
202  engine_list.reserve(nw);
203 
204  const int WorkingIndex = iat - FirstIndex;
205  for (int iw = 0; iw < nw; iw++)
206  {
208  // capacity is the size of each vector in the VGL so this advances us to the g then makes
209  // an offset into the gradients
210  dpsiM_row_list[iw] = det.psiM_vgl.device_data() + psiM_vgl.capacity() + NumOrbitals * WorkingIndex * DIM;
211  engine_list.push_back(det.det_engine_);
212  }
213 
214  UpdateEngine::mw_evalGrad(engine_list, wfc_leader.mw_res_handle_.getResource().engine_rsc, mw_res.psiMinv_refs,
215  dpsiM_row_list, WorkingIndex, grad_now);
216 
217 #ifndef NDEBUG
218  for (int iw = 0; iw < nw; iw++)
219  checkG(grad_now[iw]);
220 #endif
221 }
222 
223 template<PlatformKind PL, typename VT, typename FPVT>
225  ParticleSet& P,
226  int iat,
227  ComplexType& spingrad)
228 {
229  Phi->evaluate_spin(P, iat, psiV_host_view, dspin_psiV_host_view);
230  ScopedTimer local_timer(RatioTimer);
231  const int WorkingIndex = iat - FirstIndex;
232  Grad g = simd::dot(psiMinv_[WorkingIndex], dpsiM[WorkingIndex], NumOrbitals);
233  ComplexType spin_g = simd::dot(psiMinv_[WorkingIndex], dspin_psiV.data(), NumOrbitals);
234  assert(checkG(g));
235  spingrad += spin_g;
236  return g;
237 }
238 
239 template<PlatformKind PL, typename VT, typename FPVT>
242  const RefVectorWithLeader<ParticleSet>& p_list,
243  int iat,
244  std::vector<Grad>& grad_now,
245  std::vector<ComplexType>& spingrad_now) const
246 {
247  assert(this == &wfc_list.getLeader());
248  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
249  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
250  auto& mw_dspin = mw_res.mw_dspin;
251  ScopedTimer local_timer(RatioTimer);
252 
253  const int nw = wfc_list.size();
254  const int num_orbitals = wfc_leader.Phi->size();
255  mw_dspin.resize(nw, num_orbitals);
256 
257  //Here, we are just always recomputing the spin gradients from the SPOSet for simplicity.
258  //If we stored and modified the accept/reject to include updating stored spin gradients, we could the
259  //mw_evaluateVGLWithSpin call below and just use the stored spin gradients.
260  //May revisit this in the future.
261  RefVectorWithLeader<SPOSet> phi_list(*Phi);
262  RefVector<SPOSet::ValueVector> psi_v_list, lap_v_list;
263  RefVector<SPOSet::GradVector> grad_v_list;
264  for (int iw = 0; iw < wfc_list.size(); iw++)
265  {
267  phi_list.push_back(*det.Phi);
268  psi_v_list.push_back(det.psiV_host_view);
269  grad_v_list.push_back(det.dpsiV_host_view);
270  lap_v_list.push_back(det.d2psiV_host_view);
271  }
272 
273  auto& phi_leader = phi_list.getLeader();
274  phi_leader.mw_evaluateVGLWithSpin(phi_list, p_list, iat, psi_v_list, grad_v_list, lap_v_list, mw_dspin);
275 
276  std::vector<const Value*> dpsiM_row_list(nw, nullptr);
277  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
278  engine_list.reserve(nw);
279 
280  const int WorkingIndex = iat - FirstIndex;
281  for (int iw = 0; iw < nw; iw++)
282  {
284  // capacity is the size of each vector in the VGL so this advances us to the g then makes
285  // an offset into the gradients
286  dpsiM_row_list[iw] = det.psiM_vgl.device_data() + psiM_vgl.capacity() + NumOrbitals * WorkingIndex * DIM;
287  engine_list.push_back(det.det_engine_);
288  }
289 
290  UpdateEngine::mw_evalGradWithSpin(engine_list, wfc_leader.mw_res_handle_.getResource().engine_rsc,
291  mw_res.psiMinv_refs, dpsiM_row_list, mw_dspin, WorkingIndex, grad_now,
292  spingrad_now);
293 
294 #ifndef NDEBUG
295  for (int iw = 0; iw < nw; iw++)
296  checkG(grad_now[iw]);
297 #endif
298 }
299 
300 template<PlatformKind PL, typename VT, typename FPVT>
302  ParticleSet& P,
303  int iat,
304  Grad& grad_iat)
305 {
306  UpdateMode = ORB_PBYP_PARTIAL;
307 
308  {
309  ScopedTimer local_timer(SPOVGLTimer);
310  Phi->evaluateVGL(P, iat, psiV_host_view, dpsiV_host_view, d2psiV_host_view);
311  }
312 
313  {
314  ScopedTimer local_timer(RatioTimer);
315  const int WorkingIndex = iat - FirstIndex;
316  curRatio = simd::dot(psiMinv_[WorkingIndex], psiV.data(), NumOrbitals);
317  grad_iat += static_cast<Value>(static_cast<PsiValue>(1.0) / curRatio) *
318  simd::dot(psiMinv_[WorkingIndex], dpsiV.data(), NumOrbitals);
319  }
320  return curRatio;
321 }
322 
323 template<PlatformKind PL, typename VT, typename FPVT>
325  const RefVectorWithLeader<ParticleSet>& p_list,
326  int iat,
327  std::vector<PsiValue>& ratios,
328  std::vector<Grad>& grad_new) const
329 {
330  assert(this == &wfc_list.getLeader());
331  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
332  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
333  auto& phi_vgl_v = mw_res.phi_vgl_v;
334  auto& ratios_local = mw_res.ratios_local;
335  auto& grad_new_local = mw_res.grad_new_local;
336  {
337  ScopedTimer local_timer(SPOVGLTimer);
338  RefVectorWithLeader<SPOSet> phi_list(*Phi);
339  phi_list.reserve(wfc_list.size());
340  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
341  engine_list.reserve(wfc_list.size());
342  const int WorkingIndex = iat - FirstIndex;
343  for (int iw = 0; iw < wfc_list.size(); iw++)
344  {
346  phi_list.push_back(*det.Phi);
347  engine_list.push_back(det.det_engine_);
348  }
349 
350  auto psiMinv_row_dev_ptr_list =
351  UpdateEngine::mw_getInvRow(engine_list, wfc_leader.mw_res_handle_.getResource().engine_rsc, mw_res.psiMinv_refs,
352  WorkingIndex, !Phi->isOMPoffload());
353 
354  phi_vgl_v.resize(DIM_VGL, wfc_list.size(), NumOrbitals);
355  ratios_local.resize(wfc_list.size());
356  grad_new_local.resize(wfc_list.size());
357 
358  wfc_leader.Phi->mw_evaluateVGLandDetRatioGrads(phi_list, p_list, iat, psiMinv_row_dev_ptr_list, phi_vgl_v,
359  ratios_local, grad_new_local);
360  }
361 
362  wfc_leader.UpdateMode = ORB_PBYP_PARTIAL;
363  for (int iw = 0; iw < wfc_list.size(); iw++)
364  {
366  det.UpdateMode = ORB_PBYP_PARTIAL;
367  ratios[iw] = det.curRatio = ratios_local[iw];
368  grad_new[iw] += grad_new_local[iw];
369  }
370 }
371 
372 template<PlatformKind PL, typename VT, typename FPVT>
375  const RefVectorWithLeader<ParticleSet>& p_list,
376  int iat,
377  std::vector<PsiValue>& ratios,
378  std::vector<Grad>& grad_new,
379  std::vector<ComplexType>& spingrad_new) const
380 {
381  assert(this == &wfc_list.getLeader());
382  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
383  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
384  auto& phi_vgl_v = mw_res.phi_vgl_v;
385  auto& ratios_local = mw_res.ratios_local;
386  auto& grad_new_local = mw_res.grad_new_local;
387  auto& spingrad_new_local = mw_res.spingrad_new_local;
388  {
389  ScopedTimer local_timer(SPOVGLTimer);
390  RefVectorWithLeader<SPOSet> phi_list(*Phi);
391  phi_list.reserve(wfc_list.size());
392  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
393  engine_list.reserve(wfc_list.size());
394  const int WorkingIndex = iat - FirstIndex;
395  for (int iw = 0; iw < wfc_list.size(); iw++)
396  {
398  phi_list.push_back(*det.Phi);
399  engine_list.push_back(det.det_engine_);
400  }
401 
402  auto psiMinv_row_dev_ptr_list =
403  UpdateEngine::mw_getInvRow(engine_list, wfc_leader.mw_res_handle_.getResource().engine_rsc, mw_res.psiMinv_refs,
404  WorkingIndex, !Phi->isOMPoffload());
405 
406  phi_vgl_v.resize(DIM_VGL, wfc_list.size(), NumOrbitals);
407  ratios_local.resize(wfc_list.size());
408  grad_new_local.resize(wfc_list.size());
409  spingrad_new_local.resize(wfc_list.size());
410 
411  wfc_leader.Phi->mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, p_list, iat, psiMinv_row_dev_ptr_list, phi_vgl_v,
412  ratios_local, grad_new_local, spingrad_new_local);
413  }
414 
415  wfc_leader.UpdateMode = ORB_PBYP_PARTIAL;
416  for (int iw = 0; iw < wfc_list.size(); iw++)
417  {
419  det.UpdateMode = ORB_PBYP_PARTIAL;
420  ratios[iw] = det.curRatio = ratios_local[iw];
421  grad_new[iw] += grad_new_local[iw];
422  spingrad_new[iw] += spingrad_new_local[iw];
423  }
424 }
425 
426 template<PlatformKind PL, typename VT, typename FPVT>
428  ParticleSet& P,
429  int iat,
430  Grad& grad_iat,
431  ComplexType& spingrad_iat)
432 {
433  UpdateMode = ORB_PBYP_PARTIAL;
434 
435  {
436  ScopedTimer local_timer(SPOVGLTimer);
437  Phi->evaluateVGL_spin(P, iat, psiV_host_view, dpsiV_host_view, d2psiV_host_view, dspin_psiV_host_view);
438  }
439 
440  {
441  ScopedTimer local_timer(RatioTimer);
442  const int WorkingIndex = iat - FirstIndex;
443  curRatio = simd::dot(psiMinv_[WorkingIndex], psiV.data(), NumOrbitals);
444  grad_iat += static_cast<Value>(static_cast<PsiValue>(1.0) / curRatio) *
445  simd::dot(psiMinv_[WorkingIndex], dpsiV.data(), NumOrbitals);
446  spingrad_iat += static_cast<Value>(static_cast<PsiValue>(1.0) / curRatio) *
447  simd::dot(psiMinv_[WorkingIndex], dspin_psiV.data(), NumOrbitals);
448  }
449  return curRatio;
450 }
451 
452 
453 /** move was accepted, update the real container
454 */
455 template<PlatformKind PL, typename VT, typename FPVT>
457 {
458  if (curRatio == PsiValue(0))
459  {
460  std::ostringstream msg;
461  msg << "DiracDeterminantBatched::acceptMove curRatio is " << curRatio << "! Report a bug." << std::endl;
462  throw std::runtime_error(msg.str());
463  }
464  const int WorkingIndex = iat - FirstIndex;
465  log_value_ += convertValueToLog(curRatio);
466  {
467  ScopedTimer local_timer(UpdateTimer);
468  psiV.updateTo();
469  det_engine_.updateRow(psiMinv_, WorkingIndex, psiV, curRatio);
470  if (UpdateMode == ORB_PBYP_PARTIAL)
471  {
472  simd::copy(dpsiM[WorkingIndex], dpsiV.data(), NumOrbitals);
473  simd::copy(d2psiM[WorkingIndex], d2psiV.data(), NumOrbitals);
474  }
475  }
476  curRatio = 1.0;
477 }
478 
479 template<PlatformKind PL, typename VT, typename FPVT>
482  const RefVectorWithLeader<ParticleSet>& p_list,
483  int iat,
484  const std::vector<bool>& isAccepted,
485  bool safe_to_delay) const
486 {
487  assert(this == &wfc_list.getLeader());
488  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
489  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
490  auto& phi_vgl_v = mw_res.phi_vgl_v;
491  auto& ratios_local = mw_res.ratios_local;
492 
493  ScopedTimer update(UpdateTimer);
494 
495  const int nw = wfc_list.size();
496  int count = 0;
497  for (int iw = 0; iw < nw; iw++)
498  if (isAccepted[iw])
499  count++;
500  const int n_accepted = count;
501 
502  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
503  engine_list.reserve(nw);
504  std::vector<Value*> psiM_g_dev_ptr_list(n_accepted, nullptr);
505  std::vector<Value*> psiM_l_dev_ptr_list(n_accepted, nullptr);
506 
507  const int WorkingIndex = iat - FirstIndex;
508  for (int iw = 0, count = 0; iw < nw; iw++)
509  {
510  // This can be auto but some debuggers can't figure the type out.
512  engine_list.push_back(det.det_engine_);
513  if (isAccepted[iw])
514  {
515  psiM_g_dev_ptr_list[count] = det.psiM_vgl.device_data() + psiM_vgl.capacity() + NumOrbitals * WorkingIndex * DIM;
516  psiM_l_dev_ptr_list[count] = det.psiM_vgl.device_data() + psiM_vgl.capacity() * 4 + NumOrbitals * WorkingIndex;
517  if (det.curRatio == PsiValue(0))
518 
519  {
520  std::ostringstream msg;
521  msg << "DiracDeterminantBatched::mw_accept_rejectMove det.curRatio is " << det.curRatio << "! Report a bug."
522  << std::endl;
523  throw std::runtime_error(msg.str());
524  }
525  det.log_value_ += convertValueToLog(det.curRatio);
526  count++;
527  }
528  det.curRatio = 1.0;
529  }
530 
531  UpdateEngine::mw_accept_rejectRow(engine_list, wfc_leader.mw_res_handle_.getResource().engine_rsc,
532  mw_res.psiMinv_refs, WorkingIndex, psiM_g_dev_ptr_list, psiM_l_dev_ptr_list,
533  isAccepted, phi_vgl_v, ratios_local);
534 
535  if (!safe_to_delay)
536  UpdateEngine::mw_updateInvMat(engine_list, wfc_leader.mw_res_handle_.getResource().engine_rsc, mw_res.psiMinv_refs);
537 }
538 
539 /** move was rejected. copy the real container to the temporary to move on
540 */
541 template<PlatformKind PL, typename VT, typename FPVT>
543 {
544  curRatio = 1.0;
545 }
546 
547 template<PlatformKind PL, typename VT, typename FPVT>
549 {
550  ScopedTimer update(UpdateTimer);
551  if (UpdateMode == ORB_PBYP_PARTIAL)
552  {
553  // dpsiM, d2psiM on the device needs to be aligned as host.
554  auto* psiM_vgl_ptr = psiM_vgl.data();
555  // transfer host to device, total size 4, g(3) + l(1)
556  PRAGMA_OFFLOAD("omp target update to(psiM_vgl_ptr[psiM_vgl.capacity():psiM_vgl.capacity()*4])")
557  }
558 }
559 
560 template<PlatformKind PL, typename VT, typename FPVT>
562  const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
563 {
564  assert(this == &wfc_list.getLeader());
565  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
566  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
567  const auto nw = wfc_list.size();
568  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
569  engine_list.reserve(nw);
570  for (int iw = 0; iw < nw; iw++)
571  {
573  engine_list.push_back(det.det_engine_);
574  }
575 
576  {
577  ScopedTimer update(UpdateTimer);
578  UpdateEngine::mw_updateInvMat(engine_list, mw_res.engine_rsc, mw_res.psiMinv_refs);
579  }
580 
581  { // transfer dpsiM, d2psiM, psiMinv to host
582  ScopedTimer d2h(D2HTimer);
583 
584  // this call also completes all the device copying of dpsiM, d2psiM before the target update
585  UpdateEngine::mw_transferAinv_D2H(engine_list, mw_res.engine_rsc, mw_res.psiMinv_refs);
586 
587  if (UpdateMode == ORB_PBYP_PARTIAL)
588  {
589  RefVector<DualVGLVector> psiM_vgl_list;
590  psiM_vgl_list.reserve(nw);
591  for (int iw = 0; iw < nw; iw++)
592  {
594  psiM_vgl_list.push_back(det.psiM_vgl);
595  }
596 
597  auto& queue = mw_res.engine_rsc.queue;
598  for (DualVGLVector& psiM_vgl : psiM_vgl_list)
599  {
600  const size_t stride = psiM_vgl.capacity();
601  // transfer device to host, total size 4, g(3) + l(1), skipping v
602  queue.enqueueD2H(psiM_vgl, stride * 4, stride);
603  }
604  queue.sync();
605  }
606  }
607 }
608 
609 template<PlatformKind PL, typename VT, typename FPVT>
612 {
613  for (size_t i = 0, iat = FirstIndex; i < NumPtcls; ++i, ++iat)
614  {
615  Grad rv = simd::dot(psiMinv_[i], dpsiM[i], NumOrbitals);
616  Value lap = simd::dot(psiMinv_[i], d2psiM[i], NumOrbitals);
617  G[iat] += rv;
618  L[iat] += lap - dot(rv, rv);
619  }
620 }
621 
622 template<PlatformKind PL, typename VT, typename FPVT>
624  const ParticleSet& P,
627  bool fromscratch)
628 {
629  if (fromscratch)
630  // this updates LogValue
631  evaluateLog(P, G, L);
632  else
633  {
634  if (UpdateMode == ORB_PBYP_RATIO)
635  { //need to compute dpsiM and d2psiM. Do not touch psiM!
636  ScopedTimer spo_timer(SPOVGLTimer);
637  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_host, dpsiM, d2psiM);
638  }
639  UpdateMode = ORB_WALKER;
640  computeGL(G, L);
641  }
642  return log_value_;
643 }
644 
645 template<PlatformKind PL, typename VT, typename FPVT>
647  const RefVectorWithLeader<ParticleSet>& p_list,
650  bool fromscratch) const
651 {
652  assert(this == &wfc_list.getLeader());
653  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
654  if (fromscratch)
655  mw_evaluateLog(wfc_list, p_list, G_list, L_list);
656  else
657  {
658  const auto nw = wfc_list.size();
659  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
660  engine_list.reserve(nw);
661 
662  if (UpdateMode == ORB_PBYP_RATIO)
663  { //need to compute dpsiM and d2psiM. psiMinv is not touched!
664  ScopedTimer spo_timer(SPOVGLTimer);
665 
666  RefVectorWithLeader<SPOSet> phi_list(*Phi);
667  RefVector<Matrix<Value>> psiM_temp_list;
668  RefVector<Matrix<Grad>> dpsiM_list;
669  RefVector<Matrix<Value>> d2psiM_list;
670  phi_list.reserve(wfc_list.size());
671  psiM_temp_list.reserve(nw);
672  dpsiM_list.reserve(nw);
673  d2psiM_list.reserve(nw);
674 
675  for (int iw = 0; iw < nw; iw++)
676  {
678  engine_list.push_back(det.det_engine_);
679  phi_list.push_back(*det.Phi);
680  psiM_temp_list.push_back(det.psiM_host);
681  dpsiM_list.push_back(det.dpsiM);
682  d2psiM_list.push_back(det.d2psiM);
683  }
684 
685  Phi->mw_evaluate_notranspose(phi_list, p_list, FirstIndex, LastIndex, psiM_temp_list, dpsiM_list, d2psiM_list);
686  }
687 
688  for (int iw = 0; iw < nw; iw++)
689  {
691 
692 #ifndef NDEBUG
693  // at this point, host and device should have exactly the same G and L values.
694  GradMatrix dpsiM_from_device = det.dpsiM;
695  Matrix<Value> d2psiM_from_device = det.d2psiM;
696 
697  auto& my_psiM_vgl = det.psiM_vgl;
698  auto* psiM_vgl_ptr = my_psiM_vgl.data();
699  // transfer device to host, total size 4, g(3) + l(1)
700  PRAGMA_OFFLOAD("omp target update from(psiM_vgl_ptr[my_psiM_vgl.capacity():my_psiM_vgl.capacity()*4])")
701  assert(dpsiM_from_device == det.dpsiM);
702  assert(d2psiM_from_device == det.d2psiM);
703 #endif
704 
705  det.UpdateMode = ORB_WALKER;
706  det.computeGL(G_list[iw], L_list[iw]);
707  }
708  }
709 }
710 
711 template<PlatformKind PL, typename VT, typename FPVT>
713 {
714  buf.add(psiMinv_.first_address(), psiMinv_.last_address());
715  buf.add(dpsiM.first_address(), dpsiM.last_address());
716  buf.add(d2psiM.first_address(), d2psiM.last_address());
717  buf.add(log_value_);
718 }
719 
720 template<PlatformKind PL, typename VT, typename FPVT>
722  ParticleSet& P,
723  WFBufferType& buf,
724  bool fromscratch)
725 {
726  evaluateGL(P, P.G, P.L, fromscratch);
727  ScopedTimer local_timer(BufferTimer);
728  buf.put(psiMinv_.first_address(), psiMinv_.last_address());
729  buf.put(dpsiM.first_address(), dpsiM.last_address());
730  buf.put(d2psiM.first_address(), d2psiM.last_address());
731  buf.put(log_value_);
732  return log_value_;
733 }
734 
735 template<PlatformKind PL, typename VT, typename FPVT>
737 {
738  ScopedTimer local_timer(BufferTimer);
739  buf.get(psiMinv_.first_address(), psiMinv_.last_address());
740  buf.get(dpsiM.first_address(), dpsiM.last_address());
741  buf.get(d2psiM.first_address(), d2psiM.last_address());
742  psiMinv_.updateTo();
743  auto* psiM_vgl_ptr = psiM_vgl.data();
744  const size_t psiM_vgl_stride = psiM_vgl.capacity();
745  // transfer host to device, total size 4, g(3) + l(1)
746  PRAGMA_OFFLOAD("omp target update to(psiM_vgl_ptr[psiM_vgl_stride:psiM_vgl_stride*4])")
747  buf.get(log_value_);
748 }
749 
750 /** return the ratio only for the iat-th partcle move
751  * @param P current configuration
752  * @param iat the particle thas is being moved
753  */
754 template<PlatformKind PL, typename VT, typename FPVT>
756  int iat)
757 {
758  UpdateMode = ORB_PBYP_RATIO;
759  const int WorkingIndex = iat - FirstIndex;
760  {
761  ScopedTimer local_timer(SPOVTimer);
762  Phi->evaluateValue(P, iat, psiV_host_view);
763  }
764  {
765  ScopedTimer local_timer(RatioTimer);
766  curRatio = simd::dot(psiMinv_[WorkingIndex], psiV.data(), NumOrbitals);
767  }
768  return curRatio;
769 }
770 
771 template<PlatformKind PL, typename VT, typename FPVT>
773  const RefVectorWithLeader<ParticleSet>& p_list,
774  int iat,
775  std::vector<PsiValue>& ratios) const
776 {
777  assert(this == &wfc_list.getLeader());
778  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
779  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
780  auto& phi_vgl_v = mw_res.phi_vgl_v;
781  auto& ratios_local = mw_res.ratios_local;
782  auto& grad_new_local = mw_res.grad_new_local;
783 
784  {
785  ScopedTimer local_timer(SPOVTimer);
786  RefVectorWithLeader<SPOSet> phi_list(*Phi);
787  phi_list.reserve(wfc_list.size());
788  RefVectorWithLeader<UpdateEngine> engine_list(wfc_leader.det_engine_);
789  engine_list.reserve(wfc_list.size());
790 
791  const int WorkingIndex = iat - FirstIndex;
792  for (int iw = 0; iw < wfc_list.size(); iw++)
793  {
795  phi_list.push_back(*det.Phi);
796  engine_list.push_back(det.det_engine_);
797  }
798 
799  auto psiMinv_row_dev_ptr_list =
800  UpdateEngine::mw_getInvRow(engine_list, wfc_leader.mw_res_handle_.getResource().engine_rsc, mw_res.psiMinv_refs,
801  WorkingIndex, !Phi->isOMPoffload());
802 
803  phi_vgl_v.resize(DIM_VGL, wfc_list.size(), NumOrbitals);
804  ratios_local.resize(wfc_list.size());
805  grad_new_local.resize(wfc_list.size());
806 
807  // calling Phi->mw_evaluateVGLandDetRatioGrads is a temporary workaround.
808  // We may implement mw_evaluateVandDetRatio in the future.
809  wfc_leader.Phi->mw_evaluateVGLandDetRatioGrads(phi_list, p_list, iat, psiMinv_row_dev_ptr_list, phi_vgl_v,
810  ratios_local, grad_new_local);
811  }
812 
813  wfc_leader.UpdateMode = ORB_PBYP_RATIO;
814  for (int iw = 0; iw < wfc_list.size(); iw++)
815  {
817  det.UpdateMode = ORB_PBYP_RATIO;
818  ratios[iw] = det.curRatio = ratios_local[iw];
819  }
820 }
821 
822 template<PlatformKind PL, typename VT, typename FPVT>
824 {
825  {
826  ScopedTimer local_timer(RatioTimer);
827  const int WorkingIndex = VP.refPtcl - FirstIndex;
828  std::copy_n(psiMinv_[WorkingIndex], d2psiV.size(), d2psiV.data());
829  }
830  {
831  ScopedTimer local_timer(SPOVTimer);
832  Phi->evaluateDetRatios(VP, psiV_host_view, d2psiV_host_view, ratios);
833  }
834 }
835 
836 template<PlatformKind PL, typename VT, typename FPVT>
838  const VirtualParticleSet& VP,
839  const std::pair<ValueVector, ValueVector>& spinor_multipler,
840  std::vector<Value>& ratios)
841 {
842  {
843  ScopedTimer local_timer(RatioTimer);
844  const int WorkingIndex = VP.refPtcl - FirstIndex;
845  std::copy_n(psiMinv_[WorkingIndex], d2psiV.size(), d2psiV.data());
846  }
847  {
848  ScopedTimer local_timer(SPOVTimer);
849  Phi->evaluateDetSpinorRatios(VP, psiV_host_view, spinor_multipler, d2psiV_host_view, ratios);
850  }
851 }
852 
853 template<PlatformKind PL, typename VT, typename FPVT>
857  std::vector<std::vector<Value>>& ratios) const
858 {
859  assert(this == &wfc_list.getLeader());
860  const size_t nw = wfc_list.size();
861 
862  RefVectorWithLeader<SPOSet> phi_list(*Phi);
863  RefVector<Vector<Value>> psiV_list;
864  std::vector<const Value*> invRow_ptr_list;
865  phi_list.reserve(nw);
866  psiV_list.reserve(nw);
867  invRow_ptr_list.reserve(nw);
868 
869  {
870  ScopedTimer local_timer(RatioTimer);
871  for (size_t iw = 0; iw < nw; iw++)
872  {
874  const VirtualParticleSet& vp(vp_list[iw]);
875  const int WorkingIndex = vp.refPtcl - FirstIndex;
876  // build lists
877  phi_list.push_back(*det.Phi);
878  psiV_list.push_back(det.psiV_host_view);
879  if (Phi->isOMPoffload())
880  invRow_ptr_list.push_back(det.psiMinv_.device_data() + WorkingIndex * psiMinv_.cols());
881  else
882  invRow_ptr_list.push_back(det.psiMinv_[WorkingIndex]);
883  }
884  }
885 
886  {
887  ScopedTimer local_timer(SPOVTimer);
888  Phi->mw_evaluateDetRatios(phi_list, vp_list, psiV_list, invRow_ptr_list, ratios);
889  }
890 }
891 
892 template<PlatformKind PL, typename VT, typename FPVT>
894  const opt_variables_type& optvars,
895  std::vector<ValueType>& ratios,
896  Matrix<ValueType>& dratios)
897 {
898  const int WorkingIndex = VP.refPtcl - FirstIndex;
899  assert(WorkingIndex >= 0);
900  std::copy_n(psiMinv_[WorkingIndex], d2psiV.size(), d2psiV.data());
901  Phi->evaluateDerivRatios(VP, optvars, psiV_host_view, d2psiV_host_view, ratios, dratios, FirstIndex, LastIndex);
902 }
903 
904 template<PlatformKind PL, typename VT, typename FPVT>
906 {
907  {
908  ScopedTimer local_timer(SPOVTimer);
909  Phi->evaluateValue(P, -1, psiV_host_view);
910  }
911  for (int i = 0; i < psiMinv_.rows(); i++)
912  ratios[FirstIndex + i] = simd::dot(psiMinv_[i], psiV.data(), NumOrbitals);
913 }
914 
915 template<PlatformKind PL, typename VT, typename FPVT>
917 {
918  grad_source_psiM.resize(NumPtcls, NumOrbitals);
919  grad_lapl_source_psiM.resize(NumPtcls, NumOrbitals);
920  grad_grad_source_psiM.resize(NumPtcls, NumOrbitals);
921  phi_alpha_Minv.resize(NumPtcls, NumOrbitals);
922  grad_phi_Minv.resize(NumPtcls, NumOrbitals);
923  lapl_phi_Minv.resize(NumPtcls, NumOrbitals);
924  grad_phi_alpha_Minv.resize(NumPtcls, NumOrbitals);
925 }
926 
927 template<PlatformKind PL, typename VT, typename FPVT>
929  ParticleSet& P,
930  ParticleSet& source,
931  int iat)
932 {
933  Grad g(0.0);
934  if (Phi->hasIonDerivs())
935  {
936  resizeScratchObjectsForIonDerivs();
937  Phi->evaluateGradSource(P, FirstIndex, LastIndex, source, iat, grad_source_psiM);
938  // psiMinv columns have padding but grad_source_psiM ones don't
939  for (int i = 0; i < psiMinv_.rows(); i++)
940  g += simd::dot(psiMinv_[i], grad_source_psiM[i], NumOrbitals);
941  }
942 
943  return g;
944 }
945 
946 template<PlatformKind PL, typename VT, typename FPVT>
948 {
949  // Hessian is not often used, so only resize/allocate if used
950  grad_grad_source_psiM.resize(psiMinv_.rows(), NumOrbitals);
951  //IM A HACK. Assumes evaluateLog has already been executed.
952  Matrix<Value> psiM_temp_host(psiM_temp.data(), psiM_temp.rows(), psiM_temp.cols());
953  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_temp_host, dpsiM, grad_grad_source_psiM);
954  invertPsiM(psiM_temp, psiMinv_);
955 
956  phi_alpha_Minv = 0.0;
957  grad_phi_Minv = 0.0;
958  lapl_phi_Minv = 0.0;
959  grad_phi_alpha_Minv = 0.0;
960  //grad_grad_psi.resize(NumPtcls);
961 
962  for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++)
963  {
964  Grad rv = simd::dot(psiMinv_[i], dpsiM[i], NumOrbitals);
965  // HessType hess_tmp=simd::dot(psiMinv_[i],grad_grad_source_psiM[i],NumOrbitals);
966  Hess hess_tmp;
967  hess_tmp = 0.0;
968  hess_tmp = simd::dot(psiMinv_[i], grad_grad_source_psiM[i], NumOrbitals);
969  grad_grad_psi[iat] = hess_tmp - outerProduct(rv, rv);
970  }
971 }
972 
973 template<PlatformKind PL, typename VT, typename FPVT>
975  ParticleSet& P,
976  ParticleSet& source,
977  int iat,
980 {
981  Grad gradPsi(0.0);
982  if (Phi->hasIonDerivs())
983  {
984  resizeScratchObjectsForIonDerivs();
985  Phi->evaluateGradSource(P, FirstIndex, LastIndex, source, iat, grad_source_psiM, grad_grad_source_psiM,
986  grad_lapl_source_psiM);
987 
988  // Compute matrices
989  phi_alpha_Minv = 0.0;
990  grad_phi_Minv = 0.0;
991  lapl_phi_Minv = 0.0;
992  grad_phi_alpha_Minv = 0.0;
993  for (int i = 0; i < NumPtcls; i++)
994  for (int j = 0; j < NumOrbitals; j++)
995  {
996  lapl_phi_Minv(i, j) = 0.0;
997  for (int k = 0; k < NumOrbitals; k++)
998  lapl_phi_Minv(i, j) += d2psiM(i, k) * psiMinv_(j, k);
999  }
1000  for (int dim = 0; dim < OHMMS_DIM; dim++)
1001  {
1002  for (int i = 0; i < NumPtcls; i++)
1003  for (int j = 0; j < NumOrbitals; j++)
1004  {
1005  for (int k = 0; k < NumOrbitals; k++)
1006  {
1007  phi_alpha_Minv(i, j)[dim] += grad_source_psiM(i, k)[dim] * psiMinv_(j, k);
1008  grad_phi_Minv(i, j)[dim] += dpsiM(i, k)[dim] * psiMinv_(j, k);
1009  for (int dim_el = 0; dim_el < OHMMS_DIM; dim_el++)
1010  grad_phi_alpha_Minv(i, j)(dim, dim_el) += grad_grad_source_psiM(i, k)(dim, dim_el) * psiMinv_(j, k);
1011  }
1012  }
1013  }
1014  for (int i = 0, iel = FirstIndex; i < NumPtcls; i++, iel++)
1015  {
1016  Hess dval(0.0);
1017  Grad d2val(0.0);
1018  for (int dim = 0; dim < OHMMS_DIM; dim++)
1019  for (int dim_el = 0; dim_el < OHMMS_DIM; dim_el++)
1020  dval(dim, dim_el) = grad_phi_alpha_Minv(i, i)(dim, dim_el);
1021  for (int j = 0; j < NumOrbitals; j++)
1022  {
1023  gradPsi += grad_source_psiM(i, j) * psiMinv_(i, j);
1024  for (int dim = 0; dim < OHMMS_DIM; dim++)
1025  for (int k = 0; k < OHMMS_DIM; k++)
1026  dval(dim, k) -= phi_alpha_Minv(j, i)[dim] * grad_phi_Minv(i, j)[k];
1027  }
1028  for (int dim = 0; dim < OHMMS_DIM; dim++)
1029  {
1030  for (int k = 0; k < OHMMS_DIM; k++)
1031  grad_grad[dim][iel][k] += dval(dim, k);
1032  for (int j = 0; j < NumOrbitals; j++)
1033  {
1034  // First term, eq 9
1035  lapl_grad[dim][iel] += grad_lapl_source_psiM(i, j)[dim] * psiMinv_(i, j);
1036  // Second term, eq 9
1037  if (j == i)
1038  for (int dim_el = 0; dim_el < OHMMS_DIM; dim_el++)
1039  lapl_grad[dim][iel] -= (Real)2.0 * grad_phi_alpha_Minv(j, i)(dim, dim_el) * grad_phi_Minv(i, j)[dim_el];
1040  // Third term, eq 9
1041  // First term, eq 10
1042  lapl_grad[dim][iel] -= phi_alpha_Minv(j, i)[dim] * lapl_phi_Minv(i, j);
1043  // Second term, eq 11
1044  for (int dim_el = 0; dim_el < OHMMS_DIM; dim_el++)
1045  lapl_grad[dim][iel] +=
1046  (Real)2.0 * phi_alpha_Minv(j, i)[dim] * grad_phi_Minv(i, i)[dim_el] * grad_phi_Minv(i, j)[dim_el];
1047  }
1048  }
1049  }
1050  }
1051  return gradPsi;
1052 }
1053 
1054 /** Calculate the log value of the Dirac determinant for particles
1055  *@param P input configuration containing N particles
1056  *@param G a vector containing N gradients
1057  *@param L a vector containing N laplacians
1058  *\return the complex value of the determinant
1059  *
1060  *\f$ (first,first+nel). \f$ Add the gradient and laplacian
1061  *contribution of the determinant to G(radient) and L(aplacian)
1062  *for local energy calculations.
1063  */
1064 template<PlatformKind PL, typename VT, typename FPVT>
1066  const ParticleSet& P,
1069 {
1070  recompute(P);
1071  computeGL(G, L);
1072  return log_value_;
1073 }
1074 
1075 template<PlatformKind PL, typename VT, typename FPVT>
1078  const RefVectorWithLeader<ParticleSet>& p_list,
1080  const RefVector<ParticleSet::ParticleLaplacian>& L_list) const
1081 {
1082  assert(this == &wfc_list.getLeader());
1083  const std::vector<bool> recompute_all(wfc_list.size(), true);
1084  mw_recompute(wfc_list, p_list, recompute_all);
1085 
1086  for (int iw = 0; iw < wfc_list.size(); iw++)
1087  {
1089  det.computeGL(G_list[iw], L_list[iw]);
1090  }
1091 }
1092 
1093 template<PlatformKind PL, typename VT, typename FPVT>
1095 {
1096  {
1097  ScopedTimer spo_timer(SPOVGLTimer);
1098 
1099  UpdateMode = ORB_WALKER;
1100  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_host, dpsiM, d2psiM);
1101  auto* psiM_vgl_ptr = psiM_vgl.data();
1102  // transfer host to device, total size 4, g(3) + l(1)
1103  PRAGMA_OFFLOAD("omp target update to(psiM_vgl_ptr[psiM_vgl.capacity():psiM_vgl.capacity()*4])")
1104  }
1105  invertPsiM(psiM_temp, psiMinv_);
1106 }
1107 
1108 template<PlatformKind PL, typename VT, typename FPVT>
1110  const RefVectorWithLeader<ParticleSet>& p_list,
1111  const std::vector<bool>& recompute) const
1112 {
1113  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
1114  const auto nw = wfc_list.size();
1115 
1116  RefVectorWithLeader<WaveFunctionComponent> wfc_filtered_list(wfc_list.getLeader());
1117  RefVectorWithLeader<ParticleSet> p_filtered_list(p_list.getLeader());
1118  RefVectorWithLeader<SPOSet> phi_list(*wfc_leader.Phi);
1119  std::vector<Matrix<Value>> psiM_host_views;
1120  RefVector<const DualMatrix<Value>> psiM_temp_list;
1121  RefVector<Matrix<Value>> psiM_host_list;
1122  RefVector<Matrix<Grad>> dpsiM_list;
1123  RefVector<Matrix<Value>> d2psiM_list;
1124  RefVector<DualMatrix<Value>> psiMinv_list;
1125 
1126  wfc_filtered_list.reserve(nw);
1127  p_filtered_list.reserve(nw);
1128  phi_list.reserve(nw);
1129  psiM_host_views.reserve(nw);
1130  psiM_temp_list.reserve(nw);
1131  dpsiM_list.reserve(nw);
1132  d2psiM_list.reserve(nw);
1133  psiMinv_list.reserve(nw);
1134 
1135  for (int iw = 0; iw < nw; iw++)
1136  if (recompute[iw])
1137  {
1138  wfc_filtered_list.push_back(wfc_list[iw]);
1139  p_filtered_list.push_back(p_list[iw]);
1140 
1142  phi_list.push_back(*det.Phi);
1143  psiM_temp_list.push_back(det.psiM_temp);
1144  psiM_host_list.push_back(det.psiM_host);
1145  dpsiM_list.push_back(det.dpsiM);
1146  d2psiM_list.push_back(det.d2psiM);
1147  psiMinv_list.push_back(det.psiMinv_);
1148  }
1149 
1150  if (!wfc_filtered_list.size())
1151  return;
1152 
1153  {
1154  ScopedTimer spo_timer(wfc_leader.SPOVGLTimer);
1155  // I think through the magic of OMPtarget psiM_host_list actually results in psiM_temp being updated
1156  // on the device. For dspiM_list, d2psiM_list I think they are calculated on CPU and this is not true
1157  // This is the reason for the strange look omp target update to below.
1158  wfc_leader.Phi->mw_evaluate_notranspose(phi_list, p_filtered_list, wfc_leader.FirstIndex, wfc_leader.LastIndex,
1159  psiM_host_list, dpsiM_list, d2psiM_list);
1160  }
1161 
1162  mw_invertPsiM(wfc_filtered_list, psiM_temp_list, psiMinv_list);
1163 
1164  { // transfer dpsiM, d2psiM, psiMinv to device
1165  ScopedTimer d2h(H2DTimer);
1166 
1167  RefVector<DualVGLVector> psiM_vgl_list;
1168  psiM_vgl_list.reserve(nw);
1169  for (int iw = 0; iw < wfc_filtered_list.size(); iw++)
1170  {
1171  auto& det = wfc_filtered_list.getCastedElement<DiracDeterminantBatched<PL, VT, FPVT>>(iw);
1172  psiM_vgl_list.push_back(det.psiM_vgl);
1173  det.UpdateMode = ORB_WALKER;
1174  }
1175 
1176  auto& queue = wfc_leader.mw_res_handle_.getResource().engine_rsc.queue;
1177  for (DualVGLVector& psiM_vgl : psiM_vgl_list)
1178  {
1179  const size_t stride = psiM_vgl.capacity();
1180  // transfer host to device, total size 4, g(3) + l(1), skipping v
1181  queue.enqueueH2D(psiM_vgl, stride * 4, stride);
1182  }
1183  queue.sync();
1184  }
1185 }
1186 
1187 template<PlatformKind PL, typename VT, typename FPVT>
1189  const opt_variables_type& active,
1190  Vector<Value>& dlogpsi,
1191  Vector<Value>& dhpsioverpsi)
1192 {
1193  Phi->evaluateDerivatives(P, active, dlogpsi, dhpsioverpsi, FirstIndex, LastIndex);
1194 }
1195 
1196 template<PlatformKind PL, typename VT, typename FPVT>
1198  const opt_variables_type& active,
1199  Vector<ValueType>& dlogpsi)
1200 {
1201  Phi->evaluateDerivativesWF(P, active, dlogpsi, FirstIndex, LastIndex);
1202 }
1203 
1204 template<PlatformKind PL, typename VT, typename FPVT>
1206  TWFFastDerivWrapper& twf) const
1207 {
1208  twf.addGroup(P, P.getGroupID(FirstIndex), Phi.get());
1209 }
1210 
1211 template<PlatformKind PL, typename VT, typename FPVT>
1212 std::unique_ptr<DiracDeterminantBase> DiracDeterminantBatched<PL, VT, FPVT>::makeCopy(
1213  std::unique_ptr<SPOSet>&& spo) const
1214 {
1215  return std::make_unique<DiracDeterminantBatched<PL, VT, FPVT>>(std::move(spo), FirstIndex, LastIndex, ndelay_,
1216  matrix_inverter_kind_);
1217 }
1218 
1219 template<PlatformKind PL, typename VT, typename FPVT>
1221 {
1222  collection.addResource(std::make_unique<DiracDeterminantBatchedMultiWalkerResource>());
1223  Phi->createResource(collection);
1224  collection.addResource(std::make_unique<DetInverter>());
1225 }
1226 
1227 template<PlatformKind PL, typename VT, typename FPVT>
1229  ResourceCollection& collection,
1230  const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
1231 {
1232  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
1234  auto& mw_res = wfc_leader.mw_res_handle_.getResource();
1235  mw_res.psiMinv_refs.reserve(wfc_list.size());
1236 
1237  RefVectorWithLeader<SPOSet> phi_list(*wfc_leader.Phi);
1238  for (WaveFunctionComponent& wfc : wfc_list)
1239  {
1240  auto& det = static_cast<DiracDeterminantBatched<PL, VT, FPVT>&>(wfc);
1241  phi_list.push_back(*det.Phi);
1242  mw_res.psiMinv_refs.push_back(det.psiMinv_);
1243  }
1244  wfc_leader.Phi->acquireResource(collection, phi_list);
1245  wfc_leader.accel_inverter_ = collection.lendResource<DetInverter>();
1246 }
1247 
1248 template<PlatformKind PL, typename VT, typename FPVT>
1250  ResourceCollection& collection,
1251  const RefVectorWithLeader<WaveFunctionComponent>& wfc_list) const
1252 {
1253  auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<PL, VT, FPVT>>();
1254  wfc_leader.mw_res_handle_.getResource().psiMinv_refs.clear();
1255  collection.takebackResource(wfc_leader.mw_res_handle_);
1256  RefVectorWithLeader<SPOSet> phi_list(*wfc_leader.Phi);
1257  for (WaveFunctionComponent& wfc : wfc_list)
1258  {
1259  auto& det = static_cast<DiracDeterminantBatched<PL, VT, FPVT>&>(wfc);
1260  phi_list.push_back(*det.Phi);
1261  }
1262  wfc_leader.Phi->releaseResource(collection, phi_list);
1263  collection.takebackResource(wfc_leader.accel_inverter_);
1264 }
1265 
1267 #if defined(ENABLE_CUDA) && defined(ENABLE_OFFLOAD)
1269 #endif
1270 #if defined(ENABLE_SYCL) && defined(ENABLE_OFFLOAD)
1272 #endif
1273 
1274 } // namespace qmcplusplus
void copyFromBuffer(ParticleSet &P, WFBufferType &buf) override
For particle-by-particle move.
Fixed-size array.
Definition: OhmmsTinyMeta.h:30
void resizeScratchObjectsForIonDerivs()
Resize all temporary arrays required for force computation.
size_t addResource(std::unique_ptr< Resource > &&res, bool noprint=false)
WaveFunctionComponent::PsiValue PsiValue
Definition: SlaterDet.cpp:25
void takebackResource(ResourceHandle< RS > &res_handle)
LogValue evaluateGL(const ParticleSet &P, ParticleSet::ParticleGradient &G, ParticleSet::ParticleLaplacian &L, bool fromscratch) override
compute gradients and laplacian of the TWF with respect to each particle.
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
Tensor< T, D >::Type_t det(const Tensor< T, D > &a)
Definition: TensorOps.h:838
T dot(const T *restrict a, const T *restrict b, int n, TRES res=TRES())
dot product
void mw_ratioGrad(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, int iat, std::vector< PsiValue > &ratios, std::vector< Grad > &grad_new) const override
void restore(int iat) override
move was rejected.
void computeGL(ParticleSet::ParticleGradient &G, ParticleSet::ParticleLaplacian &L) const
compute G and L assuming psiMinv, dpsiM, d2psiM are ready for use
void acquireResource(ResourceCollection &collection, const RefVectorWithLeader< WaveFunctionComponent > &wfc_list) const override
acquire a shared resource from a collection
void mw_completeUpdates(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list) const override
complete all the delayed or asynchronous operations for all the walkers in a batch before leaving the...
DetMatInvertor
determinant matrix inverter select
A ParticleSet that handles virtual moves of a selected particle of a given physical ParticleSet Virtu...
typename DetInverterSelector< PL, FPVT >::Inverter DetInverter
Grad evalGradWithSpin(ParticleSet &P, int iat, ComplexType &spingrad) override
return the current spin gradient for the iat-th particle Default implementation assumes that WaveFunc...
void mw_evalGrad(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, int iat, std::vector< Grad > &grad_now) const override
int refPtcl
Reference particle.
QTBase::ComplexType ComplexType
Definition: Configuration.h:59
void evaluateHessian(ParticleSet &P, HessVector &grad_grad_psi) override
static void mw_invertPsiM(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVector< const DualMatrix< Value >> &logdetT_list, const RefVector< DualMatrix< Value >> &a_inv_lis)
Inverts and finds log det for a batch of matrices.
void copy(T1 *restrict target, const T2 *restrict source, size_t n)
copy function using memcpy
Definition: algorithm.hpp:40
Attaches a unit to a Vector for IO.
std::complex< T > convertValueToLog(const std::complex< T > &logpsi)
evaluate log(psi) as log(|psi|) and phase
ParticleLaplacian L
laplacians of the particles
Definition: ParticleSet.h:85
TWFFastDerivWrapper is a wrapper class for TrialWavefunction that provides separate and low level acc...
Grad evalGrad(ParticleSet &P, int iat) override
return the current gradient for the iat-th particle
void recompute(const ParticleSet &P) override
recompute the value of the WaveFunctionComponents which require critical accuracy.
PsiValue ratioGradWithSpin(ParticleSet &P, int iat, Grad &grad_iat, ComplexType &spingrad) override
#define OHMMS_DIM
Definition: config.h:64
void evaluateRatios(const VirtualParticleSet &VP, std::vector< Value > &ratios) override
compute multiple ratios for a particle move
void mw_calcRatio(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, int iat, std::vector< PsiValue > &ratios) const override
void evaluateDerivativesWF(ParticleSet &P, const opt_variables_type &optvars, Vector< ValueType > &dlogpsi) override
Compute the derivatives of the log of the wavefunction with respect to optimizable parameters...
void buildOptVariables(size_t nel)
An abstract class for a component of a many-body trial wave function.
RefVector< DualMatrix< Value > > psiMinv_refs
reference to per DDB psiMinvs in a crowd
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
void updateTo(size_type size=0, std::ptrdiff_t offset=0)
Definition: OhmmsMatrix.h:371
const std::unique_ptr< SPOSet > Phi
a set of single-particle orbitals used to fill in the values of the matrix
void mw_evaluateRatios(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< const VirtualParticleSet > &vp_list, std::vector< std::vector< Value >> &ratios) const override
void mw_evaluateGL(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, const RefVector< ParticleSet::ParticleGradient > &G_list, const RefVector< ParticleSet::ParticleLaplacian > &L_list, bool fromscratch) const override
evaluate gradients and laplacian of the same type WaveFunctionComponent of multiple walkers ...
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > outerProduct(const TinyVector< T1, D > &lhs, const TinyVector< T2, D > &rhs)
Definition: TinyVector.h:211
void createResource(ResourceCollection &collection) const override
initialize a shared resource and hand it to a collection
ParticleGradient G
gradients of the particles
Definition: ParticleSet.h:83
Grad evalGradSource(ParticleSet &P, ParticleSet &source, int iat) override
CASTTYPE & getCastedElement(size_t i) const
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
LogValue evaluateLog(const ParticleSet &P, ParticleSet::ParticleGradient &G, ParticleSet::ParticleLaplacian &L) override
evaluate from scratch pretty much everything for this single walker determinant
class to handle a set of variables that can be modified during optimizations
Definition: VariableSet.h:49
void evaluateSpinorRatios(const VirtualParticleSet &VP, const std::pair< ValueVector, ValueVector > &spinor_multiplier, std::vector< Value > &ratios) override
PsiValue ratioGrad(ParticleSet &P, int iat, Grad &grad_iat) override
DiracDeterminantBatchedMultiWalkerResource(const DiracDeterminantBatchedMultiWalkerResource &)
void evaluateDerivRatios(const VirtualParticleSet &VP, const opt_variables_type &optvars, std::vector< ValueType > &ratios, Matrix< ValueType > &dratios) override
evaluate ratios to evaluate the non-local PP
typename SPOSet::OffloadMWVGLArray OffloadMWVGLArray
Define determinant operators.
std::vector< std::reference_wrapper< T > > RefVector
void registerTWFFastDerivWrapper(const ParticleSet &P, TWFFastDerivWrapper &twf) const override
Register the component with the TWFFastDerivWrapper wrapper.
OrbitalSetTraits< ValueType >::GradMatrix GradMatrix
Declaration of DiracDeterminantBatched with a S(ingle)P(article)O(rbital)Set.
sycl::event copy_n(sycl::queue &aq, const T1 *restrict VA, size_t array_size, T2 *restrict VC, const std::vector< sycl::event > &events)
Definition: syclBLAS.cpp:548
OffloadMWVGLArray phi_vgl_v
value, grads, laplacian of single-particle orbital for particle-by-particle update and multi walker [...
virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader< SPOSet > &spo_list, const RefVectorWithLeader< ParticleSet > &P_list, int iat, const RefVector< ValueVector > &psi_v_list, const RefVector< GradVector > &dpsi_v_list, const RefVector< ValueVector > &d2psi_v_list, OffloadMatrix< ComplexType > &mw_dspin) const
evaluate the values, gradients and laplacians and spin gradient of this single-particle orbital sets ...
Definition: SPOSet.cpp:115
void invertPsiM(const DualMatrix< Value > &psiM, DualMatrix< Value > &psiMinv)
single invert logdetT(psiM) as a side effect this->log_value_ gets the log determinant of logdetT ...
void mw_recompute(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, const std::vector< bool > &recompute) const override
Does a Phi->mw_evaluate_notranspose then mw_invertPsiM over a set of elements filtered based on the r...
void acceptMove(ParticleSet &P, int iat, bool safe_to_delay=false) override
move was accepted, update the real container
OrbitalSetTraits< ValueType >::HessVector HessVector
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
ResourceHandle< RS > lendResource()
void completeUpdates() override
complete any left over determinant matrix updates.
void releaseResource(ResourceCollection &collection, const RefVectorWithLeader< WaveFunctionComponent > &wfc_list) const override
return a shared resource to a collection
const int NumPtcls
number of particles which belong to this Dirac determinant
void mw_ratioGradWithSpin(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, int iat, std::vector< PsiValue > &ratios, std::vector< Grad > &grad_new, std::vector< ComplexType > &spingrad_new) const override
LogValue updateBuffer(ParticleSet &P, WFBufferType &buf, bool fromscratch=false) override
For particle-by-particle move.
void evaluateDerivatives(ParticleSet &P, const opt_variables_type &active, Vector< Value > &dlogpsi, Vector< Value > &dhpsioverpsi) override
ResourceHandle< DiracDeterminantBatchedMultiWalkerResource > mw_res_handle_
void resize(int nel, int morb)
reset the size: with the number of particles and number of orbtials
DiracDeterminantBatched(std::unique_ptr< SPOSet > &&spos, int first, int last, int ndelay=1, DetMatInvertor matrix_inverter_kind=DetMatInvertor::ACCEL)
constructor
void put(std::complex< T1 > &x)
Definition: PooledMemory.h:165
std::unique_ptr< DiracDeterminantBase > makeCopy(std::unique_ptr< SPOSet > &&spo) const override
cloning function
void evaluateRatiosAlltoOne(ParticleSet &P, std::vector< Value > &ratios) override
void mw_accept_rejectMove(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, int iat, const std::vector< bool > &isAccepted, bool safe_to_delay=false) const override
moves of the iat-th particle on some walkers in a batch is accepted.
void mw_evaluateLog(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, const RefVector< ParticleSet::ParticleGradient > &G_list, const RefVector< ParticleSet::ParticleLaplacian > &L_list) const override
evaluate from scratch the same type WaveFunctionComponent of multiple walkers
void registerData(ParticleSet &P, WFBufferType &buf) override
For particle-by-particle move.
int getGroupID(int iat) const
return the group id of a given particle in the particle set.
Definition: ParticleSet.h:520
OffloadMatrix< ComplexType > mw_dspin
mw spin gradients of orbitals, matrix is [nw][norb]
void mw_evalGradWithSpin(const RefVectorWithLeader< WaveFunctionComponent > &wfc_list, const RefVectorWithLeader< ParticleSet > &p_list, int iat, std::vector< Grad > &grad_now, std::vector< ComplexType > &spingrad_now) const override
void add(std::complex< T1 > &x)
Definition: PooledMemory.h:113
ResourceHandle< DetInverter > accel_inverter_
matrix inversion engine this a crowd scope resource and only the leader engine gets it ...
PsiValue ratio(ParticleSet &P, int iat) override
return the ratio only for the iat-th partcle move
void get(std::complex< T1 > &x)
Definition: PooledMemory.h:132