QMCPACK
QMCDriverNew.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2022 QMCPACK developers.
6 //
7 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
8 //
9 // File refactored from: QMCDriver.cpp
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 #include <limits>
13 #include <typeinfo>
14 #include <cmath>
15 #include <sstream>
16 #include <numeric>
17 
18 #include "QMCDriverNew.h"
22 #include "Utilities/FairDivide.h"
23 #include "OhmmsData/AttributeSet.h"
24 #include "Message/Communicate.h"
25 #include "Message/CommOperators.h"
26 #include "RandomNumberControl.h"
28 #include "hdf/HDFVersion.h"
29 #include "Utilities/qmc_common.h"
30 #include "Concurrency/Info.hpp"
33 #include "Utilities/Timer.h"
36 #include "WalkerLogManager.h"
37 
38 
39 namespace qmcplusplus
40 {
41 /** Has nasty workaround for RandomNumberControl
42  *
43  * Num crowds must be less than omp_get_max_threads because RandomNumberControl is global c lib function
44  * masquerading as a C++ object.
45  */
48  const std::optional<EstimatorManagerInput>& global_emi,
50  MCPopulation&& population,
51  const std::string timer_prefix,
53  const std::string& QMC_driver_type)
55  qmcdriver_input_(std::move(input)),
56  QMCType(QMC_driver_type),
57  population_(std::move(population)),
58  dispatchers_(!qmcdriver_input_.areWalkersSerialized()),
59  estimator_manager_(nullptr),
60  timers_(timer_prefix),
61  driver_scope_profiler_(qmcdriver_input_.get_scoped_profiling()),
62  project_data_(project_data),
63  walker_configs_ref_(wc)
64 {
65  // This is done so that the application level input structures reflect the actual input to the code.
66  // While the actual simulation objects still take singular input structures at construction.
67  auto makeEstimatorManagerInput = [](auto& global_emi, auto& local_emi) -> EstimatorManagerInput {
68  if (global_emi.has_value() && local_emi.has_value())
69  return {global_emi.value(), local_emi.value()};
70  else if (global_emi.has_value())
71  return {global_emi.value()};
72  else if (local_emi.has_value())
73  return {local_emi.value()};
74  else
75  return {};
76  };
77 
79  std::make_unique<EstimatorManagerNew>(comm,
80  makeEstimatorManagerInput(global_emi,
82  population_.get_golden_hamiltonian(), population.get_golden_electrons(),
83  population.get_golden_twf());
84 
85  drift_modifier_.reset(
87 
88  // This needs to be done here to keep dependency on CrystalLattice out of the QMCDriverInput.
89  max_disp_sq_ = input.get_max_disp_sq();
90  if (max_disp_sq_ < 0)
91  {
92  auto& lattice = population.get_golden_electrons().getLattice();
93  max_disp_sq_ = lattice.LR_rc * lattice.LR_rc;
94  }
95 
96  wOut = std::make_unique<HDFWalkerOutput>(population.get_golden_electrons().getTotalNum(), get_root_name(), myComm);
97 }
98 
99 // The Rng pointers are transferred from global storage (RandomNumberControl::Children)
100 // to local storage (Rng) for the duration of QMCDriverNew.
101 // They are transferred to local storage in createRngsStepContext (called from startup,
102 // which is usually called from the "process" function in the derived class.)
103 // The local storage is moved back to the global storage in the destructor.
104 // In optimization, there are two instances of QMCDriverNew - one for the optimizer and one
105 // for the vmc engine. As long as the vmc engine calls process first, it gets valid
106 // Rng pointers. The optimizer is called second and gets nullptr, but it doesn't use Rng,
107 // so it doesn't matter.
108 // Upon restore, the vmc engine would need to be restored last (otherwise the global storage gets
109 // the nullptr from the optimizer). However, the order is fixed by the order the destructors
110 // are called.
111 // To work around the issue, check the local pointer for nullptr before restoring to global storage.
113 {
114  for (int i = 0; i < Rng.size(); ++i)
115  if (Rng[i])
116  RandomNumberControl::Children[i].reset(Rng[i].release());
117 }
118 
120 {
121  int num_threads(Concurrency::maxCapacity<>());
122  if (num_crowds > num_threads)
123  {
124  std::stringstream error_msg;
125  error_msg << "Bad Input: num_crowds (" << num_crowds << ") > num_threads (" << num_threads << ")\n";
126  throw UniformCommunicateError(error_msg.str());
127  }
128 }
129 
131 {
132  ScopedTimer local_timer(timers_.startup_timer);
133 
134  app_summary() << QMCType << " Driver running with" << std::endl
135  << " total_walkers = " << awc.global_walkers << std::endl
136  << " walkers_per_rank = " << awc.walkers_per_rank << std::endl
137  << " num_crowds = " << awc.walkers_per_crowd.size() << std::endl
138  << " on rank 0, walkers_per_crowd = " << awc.walkers_per_crowd << std::endl
139  << std::endl
140  << " steps = " << steps_per_block_
142  ? ""
143  : " (different from input value " + std::to_string(qmcdriver_input_.get_requested_steps()) + ")")
144  << std::endl
145  << " blocks = " << qmcdriver_input_.get_max_blocks() << std::endl
146  << std::endl;
147 
148  // set num_global_walkers explicitly and then make local walkers.
150 
152  {
153  if (estimator_manager_->areThereListeners())
154  throw UniformCommunicateError("Serialized walkers ignore multiwalker API's and multiwalker resources and are "
155  "incompatible with estimators requiring per particle listeners");
156  }
157  else
158  {
159  // This needs to happen before walkers are made. i.e. this allows hamiltonian operators to update state
160  // the based on the presence of per particle listeners. In the case immediately encountered the operator CoulombPBCAA will
161  // call its associated particle set and turnOnPerParticleSK.
162  // The design for "initialization" of walker elements is for the golden elements to go through all pre walking state changes
163  // and then for the golden elements to be cloned for each walker.
164  if (estimator_manager_->areThereListeners())
166 
167  app_debug() << "Creating multi walker shared resources" << std::endl;
171  app_debug() << "Multi walker shared resources creation completed" << std::endl;
172  }
173 
175 
176  crowds_.resize(awc.walkers_per_crowd.size());
177 
178  // at this point we can finally construct the Crowd objects.
179  for (int i = 0; i < crowds_.size(); ++i)
180  {
181  crowds_[i] =
184  }
185 
186  //now give walkers references to their walkers
188 
189  // Once they are created move contexts can be created.
191 
193  measureImbalance("Startup");
194 }
195 
196 /** QMCDriverNew ignores h5name if you want to read and h5 config you have to explicitly
197  * do so.
198  */
199 void QMCDriverNew::setStatus(const std::string& aname, const std::string& h5name, bool append)
200 {
201  app_log() << "\n=========================================================" << "\n Start " << QMCType
202  << "\n File Root " << get_root_name();
203  app_log() << "\n=========================================================" << std::endl;
204 
205  if (h5name.size())
206  h5_file_root_ = h5name;
207 }
208 
209 
210 /** Read walker configurations from *.config.h5 files
211  * @param wset list of xml elements containing mcwalkerset
212  *
213  * All this does is look in the walker xml section for the hdf file.
214  * It reads that (I think) and if there are active walkers
215  * declares it a restart run.
216  *
217  * This inferred behavior is asking for trouble.
218  * Unified driver will not support until restart feature is
219  * re-architected
220  */
221 void QMCDriverNew::putWalkers(std::vector<xmlNodePtr>& wset)
222 {
223  if (wset.empty())
224  return;
225  const int nfile = wset.size();
226 
228  for (int i = 0; i < wset.size(); i++)
229  if (W_in.put(wset[i]))
230  h5_file_root_ = W_in.getFileRoot();
231  //clear the walker set
232  wset.clear();
234  myComm->bcast(nwtot);
235  if (nwtot)
237 }
238 
240 {
242  {
243  ScopedTimer local_timer(timers_.checkpoint_timer);
246  wOut->dump(walker_configs_ref_, block);
248  }
249 }
250 
251 bool QMCDriverNew::finalize(int block, bool dumpwalkers)
252 {
255  app_log() << " Carry over " << walker_configs_ref_.getGlobalNumWalkers()
256  << " walker configurations to the next QMC driver." << std::endl;
257 
258  const bool DumpConfig = qmcdriver_input_.get_dump_config();
259  if (DumpConfig && dumpwalkers)
260  wOut->dump(walker_configs_ref_, block);
261 
262  infoSummary.flush();
263  infoLog.flush();
264 
265  if (DumpConfig)
267 
268  return true;
269 }
270 
272 {
274  // ensure nwalkers local walkers in population_
275  if (population_.get_walkers().size() == 0)
276  population_.createWalkers(nwalkers, walker_configs_ref_, reserve);
277  else if (population_.get_walkers().size() < nwalkers)
278  {
279  throw std::runtime_error("Unexpected walker count resulting in dangerous spawning");
280  IndexType num_additional_walkers = nwalkers - population_.get_walkers().size();
281  for (int i = 0; i < num_additional_walkers; ++i)
283  }
284  else
285  {
286  IndexType num_walkers_to_kill = population_.get_walkers().size() - nwalkers;
287  for (int i = 0; i < num_walkers_to_kill; ++i)
289  }
290 }
291 
292 /** Creates Random Number generators for crowds and step contexts
293  *
294  * This is quite dangerous in that number of crowds can be > omp_get_max_threads()
295  * This is used instead of actually passing number of threads/crowds
296  * controlling threads all over RandomNumberControl.
297  */
299 {
300  step_contexts_.resize(num_crowds);
301  Rng.resize(num_crowds);
302 
303  if (RandomNumberControl::Children.size() == 0)
304  {
305  app_warning() << " Initializing global RandomNumberControl! "
306  << "This message should not be seen in production code but only in unit tests." << std::endl;
308  }
309 
310  for (int i = 0; i < num_crowds; ++i)
311  {
312  Rng[i].reset(RandomNumberControl::Children[i].release());
313  step_contexts_[i] = std::make_unique<ContextForSteps>(*(Rng[i]));
314  }
315 }
316 
318  UPtrVector<Crowd>& crowds,
319  UPtrVector<ContextForSteps>& context_for_steps)
320 {
321  Crowd& crowd = *(crowds[crowd_id]);
322  if (crowd.size() == 0)
323  return;
324 
325  crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen());
326  auto& ps_dispatcher = crowd.dispatchers_.ps_dispatcher_;
327  auto& twf_dispatcher = crowd.dispatchers_.twf_dispatcher_;
328  auto& ham_dispatcher = crowd.dispatchers_.ham_dispatcher_;
329 
330  const RefVectorWithLeader<ParticleSet> walker_elecs(crowd.get_walker_elecs()[0], crowd.get_walker_elecs());
331  const RefVectorWithLeader<TrialWaveFunction> walker_twfs(crowd.get_walker_twfs()[0], crowd.get_walker_twfs());
332  const RefVectorWithLeader<QMCHamiltonian> walker_hamiltonians(crowd.get_walker_hamiltonians()[0],
333  crowd.get_walker_hamiltonians());
334 
335  ResourceCollectionTeamLock<ParticleSet> pset_res_lock(crowd.getSharedResource().pset_res, walker_elecs);
336  ResourceCollectionTeamLock<TrialWaveFunction> twfs_res_lock(crowd.getSharedResource().twf_res, walker_twfs);
337  ResourceCollectionTeamLock<QMCHamiltonian> hams_res_lock(crowd.getSharedResource().ham_res, walker_hamiltonians);
338 
339  auto& walkers = crowd.get_walkers();
340  std::vector<bool> recompute_mask(walkers.size(), true);
341  ps_dispatcher.flex_loadWalker(walker_elecs, walkers, recompute_mask, true);
342  ps_dispatcher.flex_donePbyP(walker_elecs);
343  twf_dispatcher.flex_evaluateLog(walker_twfs, walker_elecs);
344 
345  // For consistency this should be in ParticleSet as a flex call, but I think its a problem
346  // in the algorithm logic and should be removed.
347  auto saveElecPosAndGLToWalkers = [](ParticleSet& pset, ParticleSet::Walker_t& walker) { pset.saveWalker(walker); };
348  for (int iw = 0; iw < crowd.size(); ++iw)
349  saveElecPosAndGLToWalkers(walker_elecs[iw], walkers[iw]);
350 
351  std::vector<QMCHamiltonian::FullPrecRealType> local_energies(
352  ham_dispatcher.flex_evaluate(walker_hamiltonians, walker_twfs, walker_elecs));
353 
354  // \todo rename these are sets not resets.
355  auto resetSigNLocalEnergy = [](MCPWalker& walker, TrialWaveFunction& twf, auto local_energy) {
356  walker.resetProperty(twf.getLogPsi(), twf.getPhase(), local_energy);
357  };
358  for (int iw = 0; iw < crowd.size(); ++iw)
359  resetSigNLocalEnergy(walkers[iw], walker_twfs[iw], local_energies[iw]);
360 
361  auto evaluateNonPhysicalHamiltonianElements = [](QMCHamiltonian& ham, ParticleSet& pset, MCPWalker& walker) {
363  };
364  for (int iw = 0; iw < crowd.size(); ++iw)
365  evaluateNonPhysicalHamiltonianElements(walker_hamiltonians[iw], walker_elecs[iw], walkers[iw]);
366 
367  auto savePropertiesIntoWalker = [](QMCHamiltonian& ham, MCPWalker& walker) {
368  ham.saveProperty(walker.getPropertyBase());
369  };
370  for (int iw = 0; iw < crowd.size(); ++iw)
371  savePropertiesIntoWalker(walker_hamiltonians[iw], walkers[iw]);
372 
373  auto doesDoinTheseLastMatter = [](MCPWalker& walker) {
374  walker.Weight = 1.;
375  walker.wasTouched = false;
376  };
377  for (int iw = 0; iw < crowd.size(); ++iw)
378  doesDoinTheseLastMatter(walkers[iw]);
379 }
380 
381 
382 void QMCDriverNew::putWalkerLogs(xmlNodePtr wlxml)
383 {
384  walker_logs_input.present = false;
385  if(wlxml)
386  {
387  walker_logs_input.readXML(wlxml);
388  walker_logs_input.present = true;
389  }
390 }
391 
392 
393 std::ostream& operator<<(std::ostream& o_stream, const QMCDriverNew& qmcd)
394 {
395  o_stream << " time step = " << qmcd.qmcdriver_input_.get_tau() << '\n';
396  o_stream << " blocks = " << qmcd.qmcdriver_input_.get_max_blocks() << '\n';
397  o_stream << " steps = " << qmcd.steps_per_block_ << '\n';
398  o_stream << " substeps = " << qmcd.qmcdriver_input_.get_sub_steps() << '\n';
399  o_stream << " current = " << qmcd.current_step_ << '\n';
400  o_stream << " target samples = " << qmcd.target_samples_ << '\n';
401  o_stream << " walkers/mpi = " << qmcd.population_.get_num_local_walkers() << std::endl;
402  app_log().flush();
403 
404  return o_stream;
405 }
406 
408  const IndexType current_configs,
409  const IndexType requested_total_walkers,
410  const IndexType requested_walkers_per_rank,
411  const RealType reserve_walkers,
412  int num_crowds)
413 {
414  const int num_ranks = comm.size();
415  const int rank_id = comm.rank();
416 
417  // Step 1. set num_crowds by input and Concurrency::maxCapacity<>()
418  checkNumCrowdsLTNumThreads(num_crowds);
419  if (num_crowds == 0)
420  num_crowds = Concurrency::maxCapacity<>();
421 
422  AdjustedWalkerCounts awc{0, {}, {}, reserve_walkers};
423  awc.walkers_per_rank.resize(num_ranks, 0);
424 
425  // Step 2. decide awc.global_walkers and awc.walkers_per_rank based on input values
426  if (requested_total_walkers != 0)
427  {
428  if (requested_total_walkers < num_ranks)
429  {
430  std::ostringstream error;
431  error << "Running on " << num_ranks << " MPI ranks. The request of " << requested_total_walkers
432  << " global walkers cannot be satisfied! Need at least one walker per MPI rank.";
433  throw UniformCommunicateError(error.str());
434  }
435  if (requested_walkers_per_rank != 0 && requested_total_walkers != requested_walkers_per_rank * num_ranks)
436  {
437  std::ostringstream error;
438  error << "Running on " << num_ranks << " MPI ranks, The request of " << requested_total_walkers
439  << " global walkers and " << requested_walkers_per_rank << " walkers per rank cannot be satisfied!";
440  throw UniformCommunicateError(error.str());
441  }
442  awc.global_walkers = requested_total_walkers;
443  awc.walkers_per_rank = fairDivide(requested_total_walkers, num_ranks);
444  }
445  else // requested_total_walkers == 0
446  {
447  if (requested_walkers_per_rank != 0)
448  awc.walkers_per_rank[rank_id] = requested_walkers_per_rank;
449  else if (current_configs) // requested_walkers_per_rank == 0 and current_configs > 0
450  awc.walkers_per_rank[rank_id] = current_configs;
451  else // requested_walkers_per_rank == 0 and current_configs == 0
452  awc.walkers_per_rank[rank_id] = num_crowds;
453  comm.allreduce(awc.walkers_per_rank);
454  awc.global_walkers = std::accumulate(awc.walkers_per_rank.begin(), awc.walkers_per_rank.end(), 0);
455  }
456 
457  if (awc.global_walkers % num_ranks)
458  app_warning() << "TotalWalkers (" << awc.global_walkers << ") not divisible by number of ranks (" << num_ranks
459  << "). This will result in a loss of efficiency.\n";
460 
461  // Step 3. decide awc.walkers_per_crowd
462  awc.walkers_per_crowd = fairDivide(awc.walkers_per_rank[rank_id], num_crowds);
463 
464  if (awc.walkers_per_rank[rank_id] % num_crowds)
465  app_warning() << "Walkers per rank (" << awc.walkers_per_rank[rank_id] << ") not divisible by number of crowds ("
466  << num_crowds << "). This will result in a loss of efficiency.\n";
467 
468  // \todo some warning if unreasonable number of threads are being used.
469 
470  return awc;
471 }
472 
474  IndexType requested_samples,
475  IndexType requested_steps,
476  IndexType blocks)
477 {
478  assert(global_walkers > 0 && "QMCDriverNew::determineStepsPerBlock global_walkers must be positive!");
479 
480  if (blocks <= 0)
481  throw UniformCommunicateError("QMCDriverNew::determineStepsPerBlock blocks must be positive!");
482 
483  if (requested_samples > 0 && requested_steps > 0)
484  {
485  if (requested_samples <= global_walkers * requested_steps * blocks)
486  return requested_steps;
487  else
488  throw UniformCommunicateError("The requested number of samples is more than the total number of walkers "
489  "multiplies the requested number of steps and blocks");
490  }
491  else if (requested_samples > 0)
492  {
493  IndexType one_step_minimal_samples = global_walkers * blocks;
494  return (requested_samples + one_step_minimal_samples - 1) / one_step_minimal_samples;
495  }
496  else if (requested_steps > 0)
497  return requested_steps;
498  else // neither requested_samples nor requested_steps is positive
499  return 1;
500 }
501 
502 /** The scalar estimator collection is quite strange
503  *
504  */
506 {
507  ScopedTimer local_timer(timers_.endblock_timer);
508  RefVector<ScalarEstimatorBase> main_scalar_estimators;
509 
510  FullPrecRealType total_block_weight = 0.0;
511  // Collect all the ScalarEstimatorsFrom EMCrowds
512  unsigned long block_accept = 0;
513  unsigned long block_reject = 0;
514 
515  std::vector<RefVector<OperatorEstBase>> crowd_operator_estimators;
516  // Seems uneeded see EstimatorManagerNew scalar_ests_ documentation.
517  std::vector<RefVector<ScalarEstimatorBase>> crowd_scalar_estimators;
518 
519  for (const UPtr<Crowd>& crowd : crowds_)
520  {
521  crowd->stopBlock();
522  main_scalar_estimators.push_back(crowd->get_estimator_manager_crowd().get_main_estimator());
523  crowd_scalar_estimators.emplace_back(crowd->get_estimator_manager_crowd().get_scalar_estimators());
524  total_block_weight += crowd->get_estimator_manager_crowd().get_block_weight();
525  block_accept += crowd->get_accept();
526  block_reject += crowd->get_reject();
527 
528  // This seems altogether easier and more sane.
529  crowd_operator_estimators.emplace_back(crowd->get_estimator_manager_crowd().get_operator_estimators());
530  }
531 
532 #ifdef DEBUG_PER_STEP_ACCEPT_REJECT
533  app_warning() << "accept: " << block_accept << " reject: " << block_reject;
534  FullPrecRealType total_accept_ratio =
535  static_cast<FullPrecRealType>(block_accept) / static_cast<FullPrecRealType>(block_accept + block_reject);
536  std::cerr << " total_accept_ratio: << " << total_accept_ratio << '\n';
537 #endif
538  estimator_manager_->collectMainEstimators(main_scalar_estimators);
539  estimator_manager_->collectScalarEstimators(crowd_scalar_estimators);
540  estimator_manager_->collectOperatorEstimators(crowd_operator_estimators);
541 
542  /// get the average cpu_block time per crowd
543  /// cpu_block_time /= crowds_.size();
544 
545  estimator_manager_->stopBlock(block_accept, block_reject, total_block_weight);
546 }
547 
548 void QMCDriverNew::checkLogAndGL(Crowd& crowd, const std::string_view location)
549 {
550  bool success = true;
551  auto& ps_dispatcher = crowd.dispatchers_.ps_dispatcher_;
552  auto& twf_dispatcher = crowd.dispatchers_.twf_dispatcher_;
553 
554  const RefVectorWithLeader<ParticleSet> walker_elecs(crowd.get_walker_elecs()[0], crowd.get_walker_elecs());
555  const RefVectorWithLeader<TrialWaveFunction> walker_twfs(crowd.get_walker_twfs()[0], crowd.get_walker_twfs());
556  std::vector<TrialWaveFunction::LogValue> log_values(walker_twfs.size());
557  std::vector<ParticleSet::ParticleGradient> Gs;
558  std::vector<ParticleSet::ParticleLaplacian> Ls;
559  Gs.reserve(log_values.size());
560  Ls.reserve(log_values.size());
561 
562  for (int iw = 0; iw < log_values.size(); iw++)
563  {
564  log_values[iw] = {walker_twfs[iw].getLogPsi(), walker_twfs[iw].getPhase()};
565  Gs.push_back(walker_twfs[iw].G);
566  Ls.push_back(walker_twfs[iw].L);
567  }
568 
569  ps_dispatcher.flex_update(walker_elecs);
570  twf_dispatcher.flex_evaluateLog(walker_twfs, walker_elecs);
571 
572  RealType threshold;
573  // mixed precision can't make this test with cuda direct inversion
574  if constexpr (std::is_same<RealType, FullPrecRealType>::value)
575  threshold = 100 * std::numeric_limits<float>::epsilon();
576  else
577  threshold = 500 * std::numeric_limits<float>::epsilon();
578 
579  std::ostringstream msg;
580  for (int iw = 0; iw < log_values.size(); iw++)
581  {
582  auto& ref_G = walker_twfs[iw].G;
583  auto& ref_L = walker_twfs[iw].L;
584  TrialWaveFunction::LogValue ref_log{walker_twfs[iw].getLogPsi(), walker_twfs[iw].getPhase()};
585  if (std::abs(std::exp(log_values[iw]) - std::exp(ref_log)) > std::abs(std::exp(ref_log)) * threshold)
586  {
587  success = false;
588  msg << "Logpsi walker[" << iw << "] " << log_values[iw] << " ref " << ref_log << std::endl;
589  }
590 
591  for (int iel = 0; iel < ref_G.size(); iel++)
592  {
593  auto grad_diff = ref_G[iel] - Gs[iw][iel];
594  if (std::sqrt(std::abs(dot(grad_diff, grad_diff))) > std::sqrt(std::abs(dot(ref_G[iel], ref_G[iel]))) * threshold)
595  {
596  success = false;
597  msg << "walker[" << iw << "] Grad[" << iel << "] ref = " << ref_G[iel] << " wrong = " << Gs[iw][iel]
598  << " Delta " << grad_diff << std::endl;
599  }
600 
601  auto lap_diff = ref_L[iel] - Ls[iw][iel];
602  if (std::abs(lap_diff) > std::abs(ref_L[iel]) * threshold)
603  {
604  // very hard to check mixed precision case, only print, no error out
605  if (std::is_same<RealType, FullPrecRealType>::value)
606  success = false;
607  msg << "walker[" << iw << "] lap[" << iel << "] ref = " << ref_L[iel] << " wrong = " << Ls[iw][iel] << " Delta "
608  << lap_diff << std::endl;
609  }
610  }
611  }
612 
613  std::cerr << msg.str();
614  if (!success)
615  throw std::runtime_error(std::string("checkLogAndGL failed at ") + std::string(location) + std::string("\n"));
616 }
617 
618 void QMCDriverNew::measureImbalance(const std::string& tag) const
619 {
620  ScopedTimer local_timer(timers_.imbalance_timer);
621  Timer only_this_barrier;
622  myComm->barrier();
623  std::vector<double> my_barrier_time(1, only_this_barrier.elapsed());
624  std::vector<double> barrier_time_all_ranks(myComm->size(), 0.0);
625  myComm->gather(my_barrier_time, barrier_time_all_ranks, 0);
626  if (!myComm->rank())
627  {
628  auto const count = static_cast<double>(barrier_time_all_ranks.size());
629  const auto max_it = std::max_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
630  const auto min_it = std::min_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
631  app_log() << std::endl
632  << tag << " MPI imbalance measured by an additional barrier (slow ranks wait less):" << std::endl
633  << " average wait seconds = "
634  << std::accumulate(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end(), 0.0) / count << std::endl
635  << " min wait at rank " << std::distance(barrier_time_all_ranks.begin(), min_it)
636  << ", seconds = " << *min_it << std::endl
637  << " max wait at rank " << std::distance(barrier_time_all_ranks.begin(), max_it)
638  << ", seconds = " << *max_it << std::endl;
639  }
640 }
641 
643 {
644  std::vector<int> nw(comm->size(), 0);
645  std::vector<int> nwoff(comm->size() + 1, 0);
646  nw[comm->rank()] = walker_configs.getActiveWalkers();
647  comm->allreduce(nw);
648  for (int ip = 0; ip < comm->size(); ip++)
649  nwoff[ip + 1] = nwoff[ip] + nw[ip];
650 
651  walker_configs.setWalkerOffsets(nwoff);
652 }
653 
654 } // namespace qmcplusplus
IndexType target_samples_
the number of saved samples
Definition: QMCDriverNew.h:382
void informOperatorsOfListener()
Some Hamiltonian components need to be informed that they are in a per particle reporting situation s...
size_t steps_per_block_
actual number of steps per block
Definition: QMCDriverNew.h:407
double elapsed() const
Definition: Timer.h:30
QMCDriverNew(const ProjectData &project_data, QMCDriverInput &&input, const std::optional< EstimatorManagerInput > &global_emi, WalkerConfigurations &wc, MCPopulation &&population, const std::string timer_prefix, Communicate *comm, const std::string &QMC_driver_type)
Constructor.
WalkerElementsRef spawnWalker()
State Requirement:
const ParticleSet & get_golden_electrons() const
Definition: MCPopulation.h:176
RealType get_drift_modifier_unr_a() const
void barrier() const
std::ostream & app_warning()
Definition: OutputManager.h:69
Abstraction of information on executor environments.
Base class for any object which needs to know about a MPI communicator.
Definition: MPIObjectBase.h:26
MCPopulation population_
the entire (on node) walker population it serves VMCBatch and DMCBatch right now but will be polymorp...
Definition: QMCDriverNew.h:426
QMCHamiltonian & get_golden_hamiltonian()
Definition: MCPopulation.h:181
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
int rank() const
return the rank
Definition: Communicate.h:116
void createResource(ResourceCollection &collection) const
initialize a shared resource and hand it to a collection
size_t getActiveWalkers() const
return the number of active walkers
This is a data structure strictly for QMCDriver and its derived classes.
Definition: QMCDriverNew.h:116
void recordBlock(int block) override
record the state of the block
InfoStream infoSummary
UPtrVector< MCPWalker > & get_walkers()
Definition: MCPopulation.h:194
IndexType get_requested_steps() const
#define app_debug
Definition: OutputManager.h:75
MakeReturn< UnaryNode< FnFabs, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t abs(const Vector< T1, C1 > &l)
WalkerLogInput walker_logs_input
walker logs input
Definition: QMCDriverNew.h:108
std::vector< StdComp, CUDAHostAllocator< StdComp > > log_values(batch_size)
QMCDriverNew Base class for Unified Drivers.
Definition: QMCDriverNew.h:75
size_t getTotalNum() const
Definition: ParticleSet.h:493
void makeLocalWalkers(int nwalkers, RealType reserve)
Adjust populations local walkers to this number.
std::ostream & app_log()
Definition: OutputManager.h:65
Declaration of QMCDriverNew.
void putWalkerLogs(xmlNodePtr wlxml) override
class ProjectData
Definition: ProjectData.h:36
void saveProperty(IT first)
save the values of Hamiltonian elements to the Properties
std::ostream & app_summary()
Definition: OutputManager.h:63
A set of light weight walkers that are carried between driver sections and restart.
WaveFunctionComponent::LogValue LogValue
Collection of Local Energy Operators.
void createResource(ResourceCollection &collection) const
initialize a shared resource and hand it to a collection
Timer class.
void setWalkerOffsets(const std::vector< int > &o)
return the total number of active walkers among a MPI group
std::vector< std::unique_ptr< T > > UPtrVector
const char walkers[]
Definition: HDFVersion.h:36
static void checkNumCrowdsLTNumThreads(const int num_crowds)
Driver synchronized step context.
Definition: Crowd.h:38
void createRngsStepContexts(int num_crowds)
Creates Random Number generators for crowds and step contexts.
static void write(const std::string &fname, Communicate *comm)
write in parallel or serial
std::unique_ptr< EstimatorManagerNew > estimator_manager_
Observables manager Has very problematic owner ship and life cycle.
Definition: QMCDriverNew.h:443
IndexType get_sub_steps() const
int size() const
return the number of tasks
Definition: Communicate.h:118
void createResource(ResourceCollection &collection) const
initialize a shared resource and hand it to a collection
size_t getGlobalNumWalkers() const
return the total number of active walkers among a MPI group
InfoStream infoLog
const MultiWalkerDispatchers dispatchers_
multi walker dispatchers
Definition: QMCDriverNew.h:436
Wrapping information on parallelism.
Definition: Communicate.h:68
void allreduce(T &)
static void setWalkerOffsets(WalkerConfigurations &, Communicate *comm)
update the global offsets of walker configurations after active walkers being touched.
CrystalLattice< OHMMS_PRECISION, OHMMS_DIM > lattice
void error(char const *m)
Definition: Standard.h:204
std::unique_ptr< HDFWalkerOutput > wOut
record engine for walkers
Definition: QMCDriverNew.h:446
static size_t determineStepsPerBlock(IndexType global_walkers, IndexType requested_samples, IndexType requested_steps, IndexType blocks)
pure function calculating the actual number of steps per block
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
bool finalize(int block, bool dumpwalkers=true)
finalize a qmc section
const std::string & get_root_name() const override
Definition: QMCDriverNew.h:331
const std::string QMCType
type of qmc: assigned by subclasses
Definition: QMCDriverNew.h:421
A collection of functions for dividing fairly.
static void initialLogEvaluation(int crowd_id, UPtrVector< Crowd > &crowds, UPtrVector< ContextForSteps > &step_context)
WalkerConfigurations & walker_configs_ref_
Definition: QMCDriverNew.h:481
Compilation units that construct QMCDriverInput need visibility to the actual input classes types in ...
Communicate * myComm
pointer to Communicate
Definition: MPIObjectBase.h:62
This a subclass for runtime errors that will occur on all ranks.
void readXML(xmlNodePtr cur)
Read variable values (initialize) from XML input, call checkValid.
void gather(T &sb, T &rb, int dest=0)
struct DriverWalkerResourceCollection golden_resource_
the golden multi walker shared resource serves ParticleSet TrialWaveFunction right now but actually s...
Definition: QMCDriverNew.h:433
std::ostream & operator<<(std::ostream &out, const AntiSymTensor< T, D > &rhs)
QMCTraits::FullPrecRealType FullPrecRealType
Definition: QMCDriverNew.h:80
const std::string get_drift_modifier() const
void set_num_global_walkers(IndexType num_global_walkers)
Definition: MCPopulation.h:183
void setStatus(const std::string &aname, const std::string &h5name, bool append) override
Set the status of the QMCDriver.
static QMCDriverNew::AdjustedWalkerCounts adjustGlobalWalkerCount(Communicate &comm, const IndexType current_configs, const IndexType requested_total_walkers, const IndexType requested_walkers_per_rank, const RealType reserve_walkers, int num_crowds)
}@
QMCTraits::IndexType IndexType
Definition: QMCDriverNew.h:79
UPtrVector< ContextForSteps > step_contexts_
Per crowd move contexts, this is where the DistanceTables etc.
Definition: QMCDriverNew.h:450
MakeReturn< UnaryNode< FnExp, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t exp(const Vector< T1, C1 > &l)
static UPtrVector< RandomBase< FullPrecRealType > > Children
RefVector< RandomBase< FullPrecRealType > > getRngRefs() const
Definition: QMCDriverNew.h:201
std::unique_ptr< T > UPtr
void createWalkers(IndexType num_walkers, const WalkerConfigurations &walker_configs, RealType reserve=1.0)
}@
void saveWalkerConfigurations(WalkerConfigurations &walker_configs)
save walker configurations to walker_configs_ref_
std::unique_ptr< DriftModifierBase > drift_modifier_
drift modifer
Definition: QMCDriverNew.h:394
const TrialWaveFunction & get_golden_twf() const
Definition: MCPopulation.h:178
static void checkLogAndGL(Crowd &crowd, const std::string_view location)
check logpsi and grad and lap against values computed from scratch
void auxHevaluate(ParticleSet &P)
const std::optional< EstimatorManagerInput > & get_estimator_manager_input() const
std::vector< std::reference_wrapper< T > > RefVector
MakeReturn< UnaryNode< FnSqrt, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t sqrt(const Vector< T1, C1 > &l)
input::PeriodStride get_check_point_period() const
void flush()
flush stream buffer
Definition: InfoStream.cpp:39
Class to represent a many-body trial wave function.
Input type for EstimatorManagerNew Parses Estimators level of input and and delegates child estimator...
handles acquire/release resource by the consumer (RefVectorWithLeader type).
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
IndexType get_max_blocks() const
void bcast(T &)
QMCTraits::RealType RealType
Definition: QMCDriverNew.h:78
UPtrVector< Crowd > crowds_
}@
Definition: QMCDriverNew.h:389
UPtrVector< RandomBase< FullPrecRealType > > Rng
Random number generators.
Definition: QMCDriverNew.h:453
QMCDriverInput qmcdriver_input_
Definition: QMCDriverNew.h:372
void measureImbalance(const std::string &tag) const
inject additional barrier and measure load imbalance.
static void make_seeds()
reset the generator
IndexType get_num_local_walkers() const
Definition: MCPopulation.h:169
DriverTimers timers_
period of dumping walker configurations and everything else for restart
Definition: QMCDriverNew.h:472
RealType max_disp_sq_
they should be limited to values that can be changed from input or are live state.
Definition: QMCDriverNew.h:380
std::vector< IV > fairDivide(IV ntot, IV npart)
return the occupation vector for ntot entities partitioned npart ways.
Definition: FairDivide.h:77
A container class to represent a walker.
Definition: Walker.h:49
void putWalkers(std::vector< xmlNodePtr > &wset) override
Read walker configurations from *.config.h5 files.
void redistributeWalkers(WTTV &walker_consumers)
distributes walkers and their "cloned" elements to the elements of a vector of unique_ptr to "walker_...
Definition: MCPopulation.h:131
void initializeQMC(const AdjustedWalkerCounts &awc)
Do common section starting tasks for VMC and DMC.
void killLastWalker()
Kill last walker (just barely)
Input representation for Driver base class runtime parameters.
void endBlock()
end of a block operations. Aggregates statistics across all MPI ranks and write to disk...
DriftModifierBase * createDriftModifier(xmlNodePtr cur, const Communicate *myComm)
create DriftModifier