QMCPACK
EstimatorManagerBase.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2020 QMCPACK developers.
6 //
7 // File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University
8 // Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
9 // Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
10 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
11 // Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
12 // Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory
13 // Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
14 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
15 //
16 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
17 //////////////////////////////////////////////////////////////////////////////////////
18 
20 #include "EstimatorManagerBase.h"
22 #include "Message/Communicate.h"
23 #include "Message/CommOperators.h"
24 #include "Message/CommUtilities.h"
32 #include "hdf/HDFVersion.h"
33 #include "OhmmsData/AttributeSet.h"
35 
36 #include <array>
37 #include <functional>
38 
39 //leave it for serialization debug
40 //#define DEBUG_ESTIMATOR_ARCHIVE
41 
42 namespace qmcplusplus
43 {
44 /** enumeration for EstimatorManagerBase.Options
45  */
46 enum
47 {
48  COLLECT = 0,
53 };
54 
55 //initialize the name of the primary estimator
57  : RecordCount(0),
58  myComm(0),
59  MainEstimator(0),
60  main_estimator_name_("LocalEnergy"),
61  max_output_scalar_dat_(8),
62  max_block_avg_name_(20)
63 {
64  setCommunicator(c);
65 }
66 
68  : Options(em.Options),
69  RecordCount(0),
70  myComm(0),
71  MainEstimator(0),
72  EstimatorMap(em.EstimatorMap),
73  main_estimator_name_(em.main_estimator_name_),
74  max_output_scalar_dat_(em.max_output_scalar_dat_),
75  max_block_avg_name_(20)
76 {
77  //inherit communicator
79 
80  // Here Estimators are ScalarEstimatorBase
81  for (int i = 0; i < em.Estimators.size(); i++)
82  Estimators.emplace_back(em.Estimators[i]->clone());
84  if (em.Collectables)
85  Collectables.reset(em.Collectables->clone());
86 }
87 
89 
91 {
92  // I think this is actually checking if this is the "Main Estimator"
93  if (myComm && myComm == c)
94  return;
95  myComm = c ? c : OHMMS::Controller;
96  //set the default options
97  // This is a flag to tell manager if there is more than one rank
98  // running walkers, its discovered by smelly query of myComm.
99  Options.set(COLLECT, myComm->size() > 1);
100  Options.set(MANAGE, myComm->rank() == 0);
101  if (RemoteData.empty())
102  {
103  RemoteData.push_back(std::make_unique<BufferType>());
104  RemoteData.push_back(std::make_unique<BufferType>());
105  }
106 }
107 
108 /** set CollectSum
109  * @param collect if true, global sum is done over the values
110  */
112 {
113  if (!myComm)
114  setCommunicator(0);
115  Options.set(COLLECT, (myComm->size() == 1) ? false : collect);
116  //force to be false for serial runs
117  //CollectSum = (myComm->size() == 1)? false:collect;
118 }
119 
120 /** reset names of the properties
121  *
122  * The number of estimators and their order can vary from the previous state.
123  * reinitialized properties before setting up a new BlockAverage data list.
124  *
125  */
127 {
128  weightInd = BlockProperties.add("BlockWeight");
129  cpuInd = BlockProperties.add("BlockCPU");
130  acceptInd = BlockProperties.add("AcceptRatio");
131  BlockAverages.clear(); //cleaup the records
132  for (int i = 0; i < Estimators.size(); i++)
133  Estimators[i]->add2Record(BlockAverages);
135  if (Collectables)
136  Collectables->add2Record(BlockAverages);
137 }
138 
140 
141 void EstimatorManagerBase::addHeader(std::ostream& o)
142 {
143  o.setf(std::ios::scientific, std::ios::floatfield);
144  o.setf(std::ios::left, std::ios::adjustfield);
145  o.precision(10);
146  for (int i = 0; i < BlockAverages.size(); i++)
147  max_block_avg_name_ = std::max(max_block_avg_name_, BlockAverages.Names[i].size() + 2);
148  for (int i = 0; i < BlockProperties.size(); i++)
151  o << "# index ";
152  for (int i = 0; i < maxobjs; i++)
153  o << std::setw(max_block_avg_name_) << BlockAverages.Names[i];
154  for (int i = 0; i < BlockProperties.size(); i++)
155  o << std::setw(max_block_avg_name_) << BlockProperties.Names[i];
156  o << std::endl;
157  o.setf(std::ios::right, std::ios::adjustfield);
158 }
159 
160 void EstimatorManagerBase::start(int blocks, bool record)
161 {
162  reset();
163  RecordCount = 0;
166  int nc = (Collectables) ? Collectables->size() : 0;
168  // \todo Collectables should just have its own data structures not change the EMBS layout.
172  //count the buffer size for message
174  int sources = 2;
175  //allocate buffer for data collection
176  if (RemoteData.empty())
177  for (int i = 0; i < sources; ++i)
178  RemoteData.push_back(std::make_unique<BufferType>(BufferSize));
179  else
180  for (int i = 0; i < RemoteData.size(); ++i)
181  RemoteData[i]->resize(BufferSize);
182 #if defined(DEBUG_ESTIMATOR_ARCHIVE)
183  if (record && !DebugArchive)
184  {
185  std::array<char, 128> fname;
186  if (std::snprintf(fname.data(), fname.size(), "%s.p%03d.scalar.dat", myComm->getName().c_str(), myComm->rank()) < 0)
187  throw std::runtime_error("Error generating filename");
188  DebugArchive = std::make_unique<std::ofstream>(fname.data());
189  addHeader(*DebugArchive);
190  }
191 #endif
192  //set Options[RECORD] to enable/disable output
193  Options.set(RECORD, record && Options[MANAGE]);
194  if (Options[RECORD])
195  {
196  std::filesystem::path fname(myComm->getName());
197  fname.concat(".scalar.dat");
198  Archive = std::make_unique<std::ofstream>(fname);
199  addHeader(*Archive);
200  if (!h5desc.empty())
201  {
202  h5desc.clear();
203  }
204  fname = myComm->getName() + ".stat.h5";
205  h_file.create(fname);
206  for (int i = 0; i < Estimators.size(); i++)
207  Estimators[i]->registerObservables(h5desc, h_file);
208  if (Collectables)
209  Collectables->registerObservables(h5desc, h_file);
210  }
211 }
212 
213 void EstimatorManagerBase::stop(const std::vector<EstimatorManagerBase*> est)
214 {
215  int num_threads = est.size();
216  //normalize by the number of threads per node
217  RealType tnorm = 1.0 / static_cast<RealType>(num_threads);
218  //add averages and divide them by the number of threads
219  AverageCache = est[0]->AverageCache;
220  for (int i = 1; i < num_threads; i++)
221  AverageCache += est[i]->AverageCache;
222  AverageCache *= tnorm;
223  SquaredAverageCache = est[0]->SquaredAverageCache;
224  for (int i = 1; i < num_threads; i++)
226  SquaredAverageCache *= tnorm;
227  //add properties and divide them by the number of threads except for the weight
228  PropertyCache = est[0]->PropertyCache;
229  for (int i = 1; i < num_threads; i++)
230  PropertyCache += est[i]->PropertyCache;
231  for (int i = 1; i < PropertyCache.size(); i++)
232  PropertyCache[i] *= tnorm;
233  stop();
234 }
235 
236 /** Stop a run
237  *
238  * Collect data in Cache and print out data into hdf5 and ascii file.
239  * This should not be called in a OpenMP parallel region or should
240  * be guarded by master/single.
241  * Keep the ascii output for now
242  */
244 {
245  //close any open files
246  Archive.reset();
247  h_file.close();
248 }
249 
250 
252 {
253  MyTimer.restart();
254  BlockWeight = 0.0;
255 }
256 
257 /** take statistics of a block
258  * @param accept acceptance rate of this block
259  * @param collectall if true, need to gather data over MPI tasks
260  */
261 void EstimatorManagerBase::stopBlock(RealType accept, bool collectall)
262 {
263  //take block averages and update properties per block
266  PropertyCache[acceptInd] = accept;
267  for (int i = 0; i < Estimators.size(); i++)
268  Estimators[i]->takeBlockAverage(AverageCache.begin(), SquaredAverageCache.begin());
269  if (Collectables)
270  {
271  Collectables->takeBlockAverage(AverageCache.begin(), SquaredAverageCache.begin());
272  }
273  if (collectall)
275 }
276 
277 void EstimatorManagerBase::stopBlock(const std::vector<EstimatorManagerBase*>& est)
278 {
279  //normalized it by the thread
280  int num_threads = est.size();
281  RealType tnorm = 1.0 / num_threads;
282  AverageCache = est[0]->AverageCache;
283  for (int i = 1; i < num_threads; i++)
284  AverageCache += est[i]->AverageCache;
285  AverageCache *= tnorm;
286  SquaredAverageCache = est[0]->SquaredAverageCache;
287  for (int i = 1; i < num_threads; i++)
289  SquaredAverageCache *= tnorm;
290  PropertyCache = est[0]->PropertyCache;
291  for (int i = 1; i < num_threads; i++)
292  PropertyCache += est[i]->PropertyCache;
293  for (int i = 1; i < PropertyCache.size(); i++)
294  PropertyCache[i] *= tnorm;
296 }
297 
299 {
300  if (Options[COLLECT])
301  {
302  //copy cached data to RemoteData[0]
303  int n1 = AverageCache.size();
304  int n2 = n1 + AverageCache.size();
305  int n3 = n2 + PropertyCache.size();
306  {
307  BufferType::iterator cur(RemoteData[0]->begin());
310  copy(PropertyCache.begin(), PropertyCache.end(), cur + n2);
311  }
312  myComm->reduce(*RemoteData[0]);
313  if (Options[MANAGE])
314  {
315  BufferType::iterator cur(RemoteData[0]->begin());
316  copy(cur, cur + n1, AverageCache.begin());
317  copy(cur + n1, cur + n2, SquaredAverageCache.begin());
318  copy(cur + n2, cur + n3, PropertyCache.begin());
319  RealType nth = 1.0 / static_cast<RealType>(myComm->size());
320  AverageCache *= nth;
321  SquaredAverageCache *= nth;
322  //do not weight weightInd
323  for (int i = 1; i < PropertyCache.size(); i++)
324  PropertyCache[i] *= nth;
325  }
326  }
327  //add the block average to summarize
330  if (Archive)
331  {
332  *Archive << std::setw(10) << RecordCount;
334  for (int j = 0; j < maxobjs; j++)
335  *Archive << std::setw(max_block_avg_name_) << AverageCache[j];
336  for (int j = 0; j < PropertyCache.size(); j++)
337  *Archive << std::setw(max_block_avg_name_) << PropertyCache[j];
338  *Archive << std::endl;
339  for (int o = 0; o < h5desc.size(); ++o)
341  h_file.flush();
342  }
343  RecordCount++;
344 }
345 
346 /** accumulate Local energies and collectables
347  * @param W ensemble
348  */
350 {
352  RealType norm = 1.0 / W.getGlobalNumWalkers();
353  for (int i = 0; i < Estimators.size(); i++)
354  Estimators[i]->accumulate(W, W.begin(), W.end(), norm);
355  if (Collectables) //collectables are normalized by QMC drivers
356  Collectables->accumulate_all(W.Collectables, 1.0);
357 }
358 
362 {
363  BlockWeight += it_end - it;
364  RealType norm = 1.0 / W.getGlobalNumWalkers();
365  for (int i = 0; i < Estimators.size(); i++)
366  Estimators[i]->accumulate(W, it, it_end, norm);
367  if (Collectables)
368  Collectables->accumulate_all(W.Collectables, 1.0);
369 }
370 
372 {
373  if (Options[COLLECT]) //need to broadcast the value
374  {
375  RealType tmp[3];
376  tmp[0] = energyAccumulator.count();
377  tmp[1] = energyAccumulator.result();
378  tmp[2] = varAccumulator.result();
379  myComm->bcast(tmp, 3);
380  e = tmp[1] / tmp[0];
381  var = tmp[2] / tmp[0] - e * e;
382  }
383  else
384  {
386  var = varAccumulator.mean() - e * e;
387  }
388 }
389 
391 {
392  if (MainEstimator == nullptr)
393  add(std::make_unique<LocalEnergyOnlyEstimator>(), main_estimator_name_);
394  return MainEstimator;
395 }
396 
398 {
399  std::map<std::string, int>::iterator it = EstimatorMap.find(a);
400  if (it == EstimatorMap.end())
401  return nullptr;
402  else
403  return Estimators[(*it).second].get();
404 }
405 
406 /** This should be moved to branch engine */
408 {
409  std::vector<std::string> extra;
410  cur = cur->children;
411  while (cur != NULL)
412  {
413  std::string cname((const char*)(cur->name));
414  if (cname == "estimator")
415  {
416  std::string est_name(main_estimator_name_);
417  std::string use_hdf5("yes");
418  OhmmsAttributeSet hAttrib;
419  hAttrib.add(est_name, "name");
420  hAttrib.add(use_hdf5, "hdf5");
421  hAttrib.put(cur);
422  if ((est_name == main_estimator_name_) || (est_name == "elocal"))
423  {
425  add(std::make_unique<LocalEnergyEstimator>(H, use_hdf5 == "yes"), main_estimator_name_);
426  }
427  else if (est_name == "RMC")
428  {
429  int nobs(20);
430  OhmmsAttributeSet hAttrib;
431  hAttrib.add(nobs, "nobs");
432  hAttrib.put(cur);
433  max_output_scalar_dat_ = nobs * H.sizeOfObservables() + 3;
434  add(std::make_unique<RMCLocalEnergyEstimator>(H, nobs), main_estimator_name_);
435  }
436  else if (est_name == "CSLocalEnergy")
437  {
438  OhmmsAttributeSet hAttrib;
439  int nPsi = 1;
440  hAttrib.add(nPsi, "nPsi");
441  hAttrib.put(cur);
442  add(std::make_unique<CSEnergyEstimator>(H, nPsi), main_estimator_name_);
443  app_log() << " Adding a CSEnergyEstimator for the MainEstimator " << std::endl;
444  }
445  else
446  extra.push_back(est_name);
447  }
448  cur = cur->next;
449  }
450  if (Estimators.empty())
451  {
452  app_log() << " Adding a default LocalEnergyEstimator for the MainEstimator " << std::endl;
454  add(std::make_unique<LocalEnergyEstimator>(H, true), main_estimator_name_);
455  }
456  //Collectables is special and should not be added to Estimators
457  if (Collectables == nullptr && H.sizeOfCollectables())
458  {
459  app_log() << " Using CollectablesEstimator for collectables, e.g. sk, gofr, density " << std::endl;
460  Collectables = std::make_unique<CollectablesEstimator>(H);
461  }
462  return true;
463 }
464 
465 int EstimatorManagerBase::add(std::unique_ptr<EstimatorType> newestimator, const std::string& aname)
466 {
467  //check the name and set the MainEstimator
468  if (aname == main_estimator_name_)
469  {
470  MainEstimator = newestimator.get();
471  }
472  auto it = EstimatorMap.find(aname);
473  int n = Estimators.size();
474  if (it == EstimatorMap.end())
475  {
476  Estimators.push_back(std::move(newestimator));
477  EstimatorMap[aname] = n;
478  }
479  else
480  {
481  n = (*it).second;
482  app_log() << " EstimatorManagerBase::add replace " << aname << " estimator." << std::endl;
483  Estimators[n] = std::move(newestimator);
484  }
485  return n;
486 }
487 
489 {
490  int mine = BlockAverages.add(aname);
491  int add = TotalAverages.add(aname);
492  if (mine < Block2Total.size())
493  Block2Total[mine] = add;
494  else
495  Block2Total.push_back(add);
496  return mine;
497 }
498 
499 void EstimatorManagerBase::getData(int i, std::vector<RealType>& values)
500 {
501  int entries = TotalAveragesData.rows();
502  values.resize(entries);
503  for (int a = 0; a < entries; a++)
504  values[a] = TotalAveragesData(a, Block2Total[i]);
505 }
506 
507 } // namespace qmcplusplus
void resize(size_type n, Type_t val=Type_t())
Resize the container.
Definition: OhmmsVector.h:166
int add(std::unique_ptr< EstimatorType > newestimator, const std::string &aname)
add an Estimator
double elapsed() const
Definition: Timer.h:30
ScalarEstimatorBase * MainEstimator
pointer to the primary ScalarEstimatorBase
EstimatorType * getEstimator(const std::string &a)
return a pointer to the estimator aname
int sizeOfCollectables() const
return the size of collectables
void restart()
Definition: Timer.h:29
void reduce(T &)
A set of walkers that are to be advanced by Metropolis Monte Carlo.
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
void accumulate(MCWalkerConfiguration &W)
accumulate the measurements
int rank() const
return the rank
Definition: Communicate.h:116
RealType BlockWeight
total weight accumulated in a block
size_t getActiveWalkers() const
return the number of active walkers
int weightInd
index for the block weight PropertyCache(weightInd)
int acceptInd
index for the acceptance rate PropertyCache(acceptInd)
RecordNamedProperty< RealType > BlockProperties
manager of property data
std::ostream & app_log()
Definition: OutputManager.h:65
ScalarEstimatorBase::accumulator_type energyAccumulator
accumulator for the energy
QMCTraits::FullPrecRealType RealType
bool put(xmlNodePtr cur)
assign attributes to the set
Definition: AttributeSet.h:55
void close()
close all the open groups and file
Definition: hdf_archive.cpp:38
Collection of Local Energy Operators.
void getData(int i, std::vector< RealType > &values)
Vector< RealType > AverageCache
cached block averages of the values
int max_output_scalar_dat_
number of maximum data for a scalar.dat
int RecordCount
number of records in a block
ScalarEstimatorBase::accumulator_type varAccumulator
accumulator for the variance
Communicate * Controller
Global Communicator for a process.
Definition: Communicate.cpp:35
std::unique_ptr< std::ofstream > Archive
file handler to write data
int size() const
return the number of tasks
Definition: Communicate.h:118
size_t getGlobalNumWalkers() const
return the total number of active walkers among a MPI group
T min(T a, T b)
void addHeader(std::ostream &o)
add header to an std::ostream
void copy(const Array< T1, 3 > &src, Array< T2, 3 > &dest)
Definition: Blitz.h:639
void write(CT &anything, bool doappend)
Wrapping information on parallelism.
Definition: Communicate.h:68
std::vector< std::string > Names
void startBlock(int steps)
start a block
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
std::map< std::string, int > EstimatorMap
column map
const std::string & getName() const
Definition: Communicate.h:131
WalkerList_t::iterator iterator
FIX: a type alias of iterator for an object should not be for just one of many objects it holds...
size_type size() const
return the current size
Definition: OhmmsVector.h:162
double norm(const zVec &c)
Definition: VectorOps.h:118
virtual ~EstimatorManagerBase()
destructor
void setCollectionMode(bool collect)
set CollectSum
int add(const std::string &aname)
int BufferSize
size of the message buffer
class to handle a set of attributes of an xmlNode
Definition: AttributeSet.h:24
define convenience functions for mpi operations.
int sizeOfObservables() const
return the size of observables
int cpuInd
index for the block cpu PropertyCache(cpuInd)
Manager class of scalar estimators.
UPtrVector< EstimatorType > Estimators
estimators of simple scalars
size_type rows() const
Definition: OhmmsMatrix.h:77
RecordNamedProperty< RealType > TotalAverages
block averages: name to value
Buffer_t Collectables
observables in addition to those registered in Properties/PropertyList
Definition: ParticleSet.h:126
bool put(QMCHamiltonian &H, xmlNodePtr cur)
process xml tag associated with estimators
std::string main_estimator_name_
name of the primary estimator name
Class to manage a set of ScalarEstimators.
void setCommunicator(Communicate *c)
set the communicator
std::unique_ptr< CollectablesEstimator > Collectables
pointer to the CollectablesEstimator
void collectBlockAverages()
collect data and write
Matrix< RealType > TotalAveragesData
data accumulated over the blocks
return_type mean() const
return the mean
Definition: accumulators.h:126
bool create(const std::filesystem::path &fname, unsigned flags=H5F_ACC_TRUNC)
create a file
EstimatorType * getMainEstimator()
return a pointer to the estimator
return_type result() const
return the sum
Definition: accumulators.h:108
EstimatorManagerBase(Communicate *c=0)
default constructor
std::vector< ObservableHelper > h5desc
convenient descriptors for hdf5
Communicate * myComm
communicator to handle communication
std::vector< std::unique_ptr< BufferType > > RemoteData
void stopBlock(RealType accept, bool collectall=true)
stop a block
Vector< RealType > PropertyCache
cached block averages of properties, e.g. BlockCPU
RecordNamedProperty< RealType > BlockAverages
manager of scalar data
std::vector< int > Block2Total
index mapping between BlockAverages and TotalAverages
return_type count() const
return the count
Definition: accumulators.h:116
declare a handler of DMC branching
void bcast(T &)
size_t max_block_avg_name_
largest name in BlockAverages adding 2 characters
Abstract class for an estimator of a scalar operator.
void start(int blocks, bool record=true)
start a run
iterator end()
return the last iterator, [begin(), end())
Vector< RealType > SquaredAverageCache
cached block averages of the squared values
void add(PDT &aparam, const std::string &aname, std::vector< PDT > candidate_values={}, TagStatus status=TagStatus::OPTIONAL)
add a new attribute
Definition: AttributeSet.h:42
void getApproximateEnergyVariance(RealType &e, RealType &var)
get the average of per-block energy and variance of all the blocks Note: this is not weighted average...
Declaration of a MCWalkerConfiguration.
Declaration of QMCHamiltonian.
void flush()
flush a file
Definition: hdf_archive.h:146
iterator begin()
return the first iterator