48 const std::optional<EstimatorManagerInput>& global_emi,
51 const std::string timer_prefix,
53 const std::string& QMC_driver_type)
56 QMCType(QMC_driver_type),
57 population_(
std::move(population)),
58 dispatchers_(!qmcdriver_input_.areWalkersSerialized()),
59 estimator_manager_(nullptr),
60 timers_(timer_prefix),
61 driver_scope_profiler_(qmcdriver_input_.get_scoped_profiling()),
62 project_data_(project_data),
63 walker_configs_ref_(wc)
68 if (global_emi.has_value() && local_emi.has_value())
69 return {global_emi.value(), local_emi.value()};
70 else if (global_emi.has_value())
71 return {global_emi.value()};
72 else if (local_emi.has_value())
73 return {local_emi.value()};
79 std::make_unique<EstimatorManagerNew>(
comm,
80 makeEstimatorManagerInput(global_emi,
83 population.get_golden_twf());
92 auto&
lattice = population.get_golden_electrons().getLattice();
96 wOut = std::make_unique<HDFWalkerOutput>(population.get_golden_electrons().getTotalNum(),
get_root_name(),
myComm);
114 for (
int i = 0; i <
Rng.size(); ++i)
121 int num_threads(Concurrency::maxCapacity<>());
122 if (num_crowds > num_threads)
124 std::stringstream error_msg;
125 error_msg <<
"Bad Input: num_crowds (" << num_crowds <<
") > num_threads (" << num_threads <<
")\n";
155 "incompatible with estimators requiring per particle listeners");
167 app_debug() <<
"Creating multi walker shared resources" << std::endl;
171 app_debug() <<
"Multi walker shared resources creation completed" << std::endl;
179 for (
int i = 0; i <
crowds_.size(); ++i)
201 app_log() <<
"\n=========================================================" <<
"\n Start " <<
QMCType 203 app_log() <<
"\n=========================================================" << std::endl;
225 const int nfile = wset.size();
228 for (
int i = 0; i < wset.size(); i++)
229 if (W_in.put(wset[i]))
256 <<
" walker configurations to the next QMC driver." << std::endl;
259 if (DumpConfig && dumpwalkers)
279 throw std::runtime_error(
"Unexpected walker count resulting in dangerous spawning");
281 for (
int i = 0; i < num_additional_walkers; ++i)
287 for (
int i = 0; i < num_walkers_to_kill; ++i)
301 Rng.resize(num_crowds);
305 app_warning() <<
" Initializing global RandomNumberControl! " 306 <<
"This message should not be seen in production code but only in unit tests." << std::endl;
310 for (
int i = 0; i < num_crowds; ++i)
322 if (
crowd.size() == 0)
325 crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen());
326 auto& ps_dispatcher =
crowd.dispatchers_.ps_dispatcher_;
327 auto& twf_dispatcher =
crowd.dispatchers_.twf_dispatcher_;
328 auto& ham_dispatcher =
crowd.dispatchers_.ham_dispatcher_;
333 crowd.get_walker_hamiltonians());
340 std::vector<bool> recompute_mask(
walkers.size(),
true);
341 ps_dispatcher.flex_loadWalker(walker_elecs,
walkers, recompute_mask,
true);
342 ps_dispatcher.flex_donePbyP(walker_elecs);
343 twf_dispatcher.flex_evaluateLog(walker_twfs, walker_elecs);
348 for (
int iw = 0; iw <
crowd.size(); ++iw)
349 saveElecPosAndGLToWalkers(walker_elecs[iw],
walkers[iw]);
351 std::vector<QMCHamiltonian::FullPrecRealType> local_energies(
352 ham_dispatcher.flex_evaluate(walker_hamiltonians, walker_twfs, walker_elecs));
356 walker.resetProperty(
twf.getLogPsi(),
twf.getPhase(), local_energy);
358 for (
int iw = 0; iw <
crowd.size(); ++iw)
359 resetSigNLocalEnergy(
walkers[iw], walker_twfs[iw], local_energies[iw]);
364 for (
int iw = 0; iw <
crowd.size(); ++iw)
365 evaluateNonPhysicalHamiltonianElements(walker_hamiltonians[iw], walker_elecs[iw],
walkers[iw]);
370 for (
int iw = 0; iw <
crowd.size(); ++iw)
371 savePropertiesIntoWalker(walker_hamiltonians[iw],
walkers[iw]);
375 walker.wasTouched =
false;
377 for (
int iw = 0; iw <
crowd.size(); ++iw)
378 doesDoinTheseLastMatter(
walkers[iw]);
410 const IndexType requested_walkers_per_rank,
420 num_crowds = Concurrency::maxCapacity<>();
426 if (requested_total_walkers != 0)
430 std::ostringstream
error;
431 error <<
"Running on " <<
num_ranks <<
" MPI ranks. The request of " << requested_total_walkers
432 <<
" global walkers cannot be satisfied! Need at least one walker per MPI rank.";
435 if (requested_walkers_per_rank != 0 && requested_total_walkers != requested_walkers_per_rank *
num_ranks)
437 std::ostringstream
error;
438 error <<
"Running on " <<
num_ranks <<
" MPI ranks, The request of " << requested_total_walkers
439 <<
" global walkers and " << requested_walkers_per_rank <<
" walkers per rank cannot be satisfied!";
442 awc.global_walkers = requested_total_walkers;
447 if (requested_walkers_per_rank != 0)
448 awc.walkers_per_rank[rank_id] = requested_walkers_per_rank;
449 else if (current_configs)
450 awc.walkers_per_rank[rank_id] = current_configs;
452 awc.walkers_per_rank[rank_id] = num_crowds;
454 awc.global_walkers = std::accumulate(awc.walkers_per_rank.begin(), awc.walkers_per_rank.end(), 0);
458 app_warning() <<
"TotalWalkers (" << awc.global_walkers <<
") not divisible by number of ranks (" <<
num_ranks 459 <<
"). This will result in a loss of efficiency.\n";
462 awc.walkers_per_crowd =
fairDivide(awc.walkers_per_rank[rank_id], num_crowds);
464 if (awc.walkers_per_rank[rank_id] % num_crowds)
465 app_warning() <<
"Walkers per rank (" << awc.walkers_per_rank[rank_id] <<
") not divisible by number of crowds (" 466 << num_crowds <<
"). This will result in a loss of efficiency.\n";
478 assert(global_walkers > 0 &&
"QMCDriverNew::determineStepsPerBlock global_walkers must be positive!");
483 if (requested_samples > 0 && requested_steps > 0)
485 if (requested_samples <= global_walkers * requested_steps * blocks)
486 return requested_steps;
489 "multiplies the requested number of steps and blocks");
491 else if (requested_samples > 0)
493 IndexType one_step_minimal_samples = global_walkers * blocks;
494 return (requested_samples + one_step_minimal_samples - 1) / one_step_minimal_samples;
496 else if (requested_steps > 0)
497 return requested_steps;
512 unsigned long block_accept = 0;
513 unsigned long block_reject = 0;
515 std::vector<RefVector<OperatorEstBase>> crowd_operator_estimators;
517 std::vector<RefVector<ScalarEstimatorBase>> crowd_scalar_estimators;
522 main_scalar_estimators.push_back(
crowd->get_estimator_manager_crowd().get_main_estimator());
523 crowd_scalar_estimators.emplace_back(
crowd->get_estimator_manager_crowd().get_scalar_estimators());
524 total_block_weight +=
crowd->get_estimator_manager_crowd().get_block_weight();
525 block_accept +=
crowd->get_accept();
526 block_reject +=
crowd->get_reject();
529 crowd_operator_estimators.emplace_back(
crowd->get_estimator_manager_crowd().get_operator_estimators());
532 #ifdef DEBUG_PER_STEP_ACCEPT_REJECT 533 app_warning() <<
"accept: " << block_accept <<
" reject: " << block_reject;
535 static_cast<FullPrecRealType>(block_accept) / static_cast<FullPrecRealType>(block_accept + block_reject);
536 std::cerr <<
" total_accept_ratio: << " << total_accept_ratio <<
'\n';
551 auto& ps_dispatcher =
crowd.dispatchers_.ps_dispatcher_;
552 auto& twf_dispatcher =
crowd.dispatchers_.twf_dispatcher_;
556 std::vector<TrialWaveFunction::LogValue>
log_values(walker_twfs.size());
557 std::vector<ParticleSet::ParticleGradient> Gs;
558 std::vector<ParticleSet::ParticleLaplacian> Ls;
562 for (
int iw = 0; iw <
log_values.size(); iw++)
564 log_values[iw] = {walker_twfs[iw].getLogPsi(), walker_twfs[iw].getPhase()};
565 Gs.push_back(walker_twfs[iw].G);
566 Ls.push_back(walker_twfs[iw].L);
569 ps_dispatcher.flex_update(walker_elecs);
570 twf_dispatcher.flex_evaluateLog(walker_twfs, walker_elecs);
574 if constexpr (std::is_same<RealType, FullPrecRealType>::value)
575 threshold = 100 * std::numeric_limits<float>::epsilon();
577 threshold = 500 * std::numeric_limits<float>::epsilon();
579 std::ostringstream msg;
580 for (
int iw = 0; iw <
log_values.size(); iw++)
582 auto& ref_G = walker_twfs[iw].G;
583 auto& ref_L = walker_twfs[iw].L;
588 msg <<
"Logpsi walker[" << iw <<
"] " <<
log_values[iw] <<
" ref " << ref_log << std::endl;
591 for (
int iel = 0; iel < ref_G.size(); iel++)
593 auto grad_diff = ref_G[iel] - Gs[iw][iel];
597 msg <<
"walker[" << iw <<
"] Grad[" << iel <<
"] ref = " << ref_G[iel] <<
" wrong = " << Gs[iw][iel]
598 <<
" Delta " << grad_diff << std::endl;
601 auto lap_diff = ref_L[iel] - Ls[iw][iel];
605 if (std::is_same<RealType, FullPrecRealType>::value)
607 msg <<
"walker[" << iw <<
"] lap[" << iel <<
"] ref = " << ref_L[iel] <<
" wrong = " << Ls[iw][iel] <<
" Delta " 608 << lap_diff << std::endl;
613 std::cerr << msg.str();
615 throw std::runtime_error(std::string(
"checkLogAndGL failed at ") + std::string(location) + std::string(
"\n"));
621 Timer only_this_barrier;
623 std::vector<double> my_barrier_time(1, only_this_barrier.
elapsed());
624 std::vector<double> barrier_time_all_ranks(
myComm->
size(), 0.0);
625 myComm->
gather(my_barrier_time, barrier_time_all_ranks, 0);
628 auto const count =
static_cast<double>(barrier_time_all_ranks.size());
629 const auto max_it = std::max_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
630 const auto min_it = std::min_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
632 << tag <<
" MPI imbalance measured by an additional barrier (slow ranks wait less):" << std::endl
633 <<
" average wait seconds = " 634 << std::accumulate(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end(), 0.0) / count << std::endl
635 <<
" min wait at rank " << std::distance(barrier_time_all_ranks.begin(), min_it)
636 <<
", seconds = " << *min_it << std::endl
637 <<
" max wait at rank " << std::distance(barrier_time_all_ranks.begin(), max_it)
638 <<
", seconds = " << *max_it << std::endl;
644 std::vector<int> nw(
comm->
size(), 0);
645 std::vector<int> nwoff(
comm->
size() + 1, 0);
648 for (
int ip = 0; ip <
comm->
size(); ip++)
649 nwoff[ip + 1] = nwoff[ip] + nw[ip];
IndexType target_samples_
the number of saved samples
void informOperatorsOfListener()
Some Hamiltonian components need to be informed that they are in a per particle reporting situation s...
size_t steps_per_block_
actual number of steps per block
NewTimer & imbalance_timer
QMCDriverNew(const ProjectData &project_data, QMCDriverInput &&input, const std::optional< EstimatorManagerInput > &global_emi, WalkerConfigurations &wc, MCPopulation &&population, const std::string timer_prefix, Communicate *comm, const std::string &QMC_driver_type)
Constructor.
WalkerElementsRef spawnWalker()
State Requirement:
const ParticleSet & get_golden_electrons() const
RealType get_drift_modifier_unr_a() const
std::ostream & app_warning()
Abstraction of information on executor environments.
Base class for any object which needs to know about a MPI communicator.
MCPopulation population_
the entire (on node) walker population it serves VMCBatch and DMCBatch right now but will be polymorp...
QMCHamiltonian & get_golden_hamiltonian()
helper functions for EinsplineSetBuilder
int rank() const
return the rank
bool areWalkersSerialized() const
void createResource(ResourceCollection &collection) const
initialize a shared resource and hand it to a collection
size_t getActiveWalkers() const
return the number of active walkers
std::string h5_file_root_
This is a data structure strictly for QMCDriver and its derived classes.
void recordBlock(int block) override
record the state of the block
UPtrVector< MCPWalker > & get_walkers()
IndexType get_requested_steps() const
MakeReturn< UnaryNode< FnFabs, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t abs(const Vector< T1, C1 > &l)
WalkerLogInput walker_logs_input
walker logs input
std::vector< StdComp, CUDAHostAllocator< StdComp > > log_values(batch_size)
QMCDriverNew Base class for Unified Drivers.
size_t getTotalNum() const
void makeLocalWalkers(int nwalkers, RealType reserve)
Adjust populations local walkers to this number.
Declaration of QMCDriverNew.
void putWalkerLogs(xmlNodePtr wlxml) override
void saveProperty(IT first)
save the values of Hamiltonian elements to the Properties
std::ostream & app_summary()
A set of light weight walkers that are carried between driver sections and restart.
WaveFunctionComponent::LogValue LogValue
Collection of Local Energy Operators.
void createResource(ResourceCollection &collection) const
initialize a shared resource and hand it to a collection
void setWalkerOffsets(const std::vector< int > &o)
return the total number of active walkers among a MPI group
std::vector< IndexType > walkers_per_crowd
std::vector< std::unique_ptr< T > > UPtrVector
static void checkNumCrowdsLTNumThreads(const int num_crowds)
Driver synchronized step context.
void createRngsStepContexts(int num_crowds)
Creates Random Number generators for crowds and step contexts.
static void write(const std::string &fname, Communicate *comm)
write in parallel or serial
std::unique_ptr< EstimatorManagerNew > estimator_manager_
Observables manager Has very problematic owner ship and life cycle.
IndexType get_sub_steps() const
int size() const
return the number of tasks
void createResource(ResourceCollection &collection) const
initialize a shared resource and hand it to a collection
size_t getGlobalNumWalkers() const
return the total number of active walkers among a MPI group
const MultiWalkerDispatchers dispatchers_
multi walker dispatchers
Wrapping information on parallelism.
static void setWalkerOffsets(WalkerConfigurations &, Communicate *comm)
update the global offsets of walker configurations after active walkers being touched.
CrystalLattice< OHMMS_PRECISION, OHMMS_DIM > lattice
void error(char const *m)
std::unique_ptr< HDFWalkerOutput > wOut
record engine for walkers
static size_t determineStepsPerBlock(IndexType global_walkers, IndexType requested_samples, IndexType requested_steps, IndexType blocks)
pure function calculating the actual number of steps per block
Specialized paritlce class for atomistic simulations.
bool finalize(int block, bool dumpwalkers=true)
finalize a qmc section
std::vector< IndexType > walkers_per_rank
const std::string & get_root_name() const override
const std::string QMCType
type of qmc: assigned by subclasses
A collection of functions for dividing fairly.
bool get_dump_config() const
static void initialLogEvaluation(int crowd_id, UPtrVector< Crowd > &crowds, UPtrVector< ContextForSteps > &step_context)
WalkerConfigurations & walker_configs_ref_
Compilation units that construct QMCDriverInput need visibility to the actual input classes types in ...
Communicate * myComm
pointer to Communicate
This a subclass for runtime errors that will occur on all ranks.
void readXML(xmlNodePtr cur)
Read variable values (initialize) from XML input, call checkValid.
ResourceCollection twf_res
ResourceCollection pset_res
void gather(T &sb, T &rb, int dest=0)
struct DriverWalkerResourceCollection golden_resource_
the golden multi walker shared resource serves ParticleSet TrialWaveFunction right now but actually s...
std::ostream & operator<<(std::ostream &out, const AntiSymTensor< T, D > &rhs)
QMCTraits::FullPrecRealType FullPrecRealType
const std::string get_drift_modifier() const
void set_num_global_walkers(IndexType num_global_walkers)
void setStatus(const std::string &aname, const std::string &h5name, bool append) override
Set the status of the QMCDriver.
static QMCDriverNew::AdjustedWalkerCounts adjustGlobalWalkerCount(Communicate &comm, const IndexType current_configs, const IndexType requested_total_walkers, const IndexType requested_walkers_per_rank, const RealType reserve_walkers, int num_crowds)
}@
QMCTraits::IndexType IndexType
UPtrVector< ContextForSteps > step_contexts_
Per crowd move contexts, this is where the DistanceTables etc.
MakeReturn< UnaryNode< FnExp, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t exp(const Vector< T1, C1 > &l)
static UPtrVector< RandomBase< FullPrecRealType > > Children
RefVector< RandomBase< FullPrecRealType > > getRngRefs() const
std::unique_ptr< T > UPtr
void createWalkers(IndexType num_walkers, const WalkerConfigurations &walker_configs, RealType reserve=1.0)
}@
NewTimer & checkpoint_timer
void saveWalkerConfigurations(WalkerConfigurations &walker_configs)
save walker configurations to walker_configs_ref_
std::unique_ptr< DriftModifierBase > drift_modifier_
drift modifer
const TrialWaveFunction & get_golden_twf() const
static void checkLogAndGL(Crowd &crowd, const std::string_view location)
check logpsi and grad and lap against values computed from scratch
ResourceCollection ham_res
void auxHevaluate(ParticleSet &P)
NewTimer & endblock_timer
const std::optional< EstimatorManagerInput > & get_estimator_manager_input() const
std::vector< std::reference_wrapper< T > > RefVector
MakeReturn< UnaryNode< FnSqrt, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t sqrt(const Vector< T1, C1 > &l)
input::PeriodStride get_check_point_period() const
void flush()
flush stream buffer
Class to represent a many-body trial wave function.
Input type for EstimatorManagerNew Parses Estimators level of input and and delegates child estimator...
handles acquire/release resource by the consumer (RefVectorWithLeader type).
Tensor< typename BinaryReturn< T1, T2, OpMultiply >::Type_t, D > dot(const AntiSymTensor< T1, D > &lhs, const AntiSymTensor< T2, D > &rhs)
IndexType get_max_blocks() const
QMCTraits::RealType RealType
UPtrVector< Crowd > crowds_
}@
UPtrVector< RandomBase< FullPrecRealType > > Rng
Random number generators.
QMCDriverInput qmcdriver_input_
void measureImbalance(const std::string &tag) const
inject additional barrier and measure load imbalance.
static void make_seeds()
reset the generator
IndexType get_num_local_walkers() const
DriverTimers timers_
period of dumping walker configurations and everything else for restart
RealType max_disp_sq_
they should be limited to values that can be changed from input or are live state.
std::vector< IV > fairDivide(IV ntot, IV npart)
return the occupation vector for ntot entities partitioned npart ways.
A container class to represent a walker.
void putWalkers(std::vector< xmlNodePtr > &wset) override
Read walker configurations from *.config.h5 files.
bool get_measure_imbalance() const
void redistributeWalkers(WTTV &walker_consumers)
distributes walkers and their "cloned" elements to the elements of a vector of unique_ptr to "walker_...
void initializeQMC(const AdjustedWalkerCounts &awc)
Do common section starting tasks for VMC and DMC.
void killLastWalker()
Kill last walker (just barely)
Input representation for Driver base class runtime parameters.
void endBlock()
end of a block operations. Aggregates statistics across all MPI ranks and write to disk...
NewTimer & create_walkers_timer
DriftModifierBase * createDriftModifier(xmlNodePtr cur, const Communicate *myComm)
create DriftModifier