QMCPACK
QMCCostFunctionBatched.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
8 // Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
9 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
10 // Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
11 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
12 // Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
13 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
14 //
15 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
16 //////////////////////////////////////////////////////////////////////////////////////
17 
18 
19 #include "QMCCostFunctionBatched.h"
23 #include "Message/CommOperators.h"
26 //#define QMCCOSTFUNCTION_DEBUG
27 
28 namespace qmcplusplus
29 {
31  TrialWaveFunction& psi,
32  QMCHamiltonian& h,
33  SampleStack& samples,
34  const std::vector<int>& walkers_per_crowd,
36  : QMCCostFunctionBase(w, psi, h, comm),
37  samples_(samples),
38  walkers_per_crowd_(walkers_per_crowd),
39  check_config_timer_(createGlobalTimer("QMCCostFunctionBatched::checkConfigurations", timer_level_medium)),
40  corr_sampling_timer_(createGlobalTimer("QMCCostFunctionBatched::correlatedSampling", timer_level_medium)),
41  fill_timer_(createGlobalTimer("QMCCostFunctionBatched::fillOverlapHamiltonianMatrices", timer_level_medium))
42 
43 {
44  app_log() << " Using QMCCostFunctionBatched::QMCCostFunctionBatched" << std::endl;
45 }
46 
47 
48 /** Clean up the vector */
50 
51 void QMCCostFunctionBatched::GradCost(std::vector<Return_rt>& PGradient,
52  const std::vector<Return_rt>& PM,
53  Return_rt FiniteDiff)
54 {
55  if (FiniteDiff > 0)
56  {
57  QMCTraits::RealType dh = 1.0 / (2.0 * FiniteDiff);
58  for (int i = 0; i < NumOptimizables; i++)
59  {
60  for (int j = 0; j < NumOptimizables; j++)
61  OptVariables[j] = PM[j];
62  OptVariables[i] = PM[i] + FiniteDiff;
63  QMCTraits::RealType CostPlus = this->Cost();
64  OptVariables[i] = PM[i] - FiniteDiff;
65  QMCTraits::RealType CostMinus = this->Cost();
66  PGradient[i] = (CostPlus - CostMinus) * dh;
67  }
68  }
69  else
70  {
71  for (int j = 0; j < NumOptimizables; j++)
72  OptVariables[j] = PM[j];
73  resetPsi();
74  //evaluate new local energies and derivatives
75  EffectiveWeight effective_weight = correlatedSampling(true);
76  //Estimators::accumulate has been called by correlatedSampling
78  // Return_t curAvg2_w = curAvg_w*curAvg_w;
80  std::vector<Return_rt> EDtotals(NumOptimizables, 0.0);
81  std::vector<Return_rt> EDtotals_w(NumOptimizables, 0.0);
82  std::vector<Return_rt> E2Dtotals_w(NumOptimizables, 0.0);
83  std::vector<Return_rt> URV(NumOptimizables, 0.0);
84  std::vector<Return_rt> HD_avg(NumOptimizables, 0.0);
85  Return_rt wgtinv = 1.0 / SumValue[SUM_WGT];
86  Return_rt delE_bar = 0;
87  {
88  for (int iw = 0; iw < rank_local_num_samples_; iw++)
89  {
90  const Return_rt* restrict saved = RecordsOnNode_[iw];
91  Return_rt weight = saved[REWEIGHT] * wgtinv;
92  Return_rt eloc_new = saved[ENERGY_NEW];
93  delE_bar += weight * std::pow(std::abs(eloc_new - EtargetEff), PowerE);
94  const Return_rt* HDsaved = HDerivRecords_[iw];
95  for (int pm = 0; pm < NumOptimizables; pm++)
96  HD_avg[pm] += HDsaved[pm];
97  }
98  }
99  myComm->allreduce(HD_avg);
100  myComm->allreduce(delE_bar);
101  for (int pm = 0; pm < NumOptimizables; pm++)
102  HD_avg[pm] *= 1.0 / static_cast<Return_rt>(NumSamples);
103  {
104  for (int iw = 0; iw < rank_local_num_samples_; iw++)
105  {
106  const Return_rt* restrict saved = RecordsOnNode_[iw];
107  Return_rt weight = saved[REWEIGHT] * wgtinv;
108  Return_rt eloc_new = saved[ENERGY_NEW];
109  Return_rt delta_l = (eloc_new - curAvg_w);
110  bool ltz(true);
111  if (eloc_new - EtargetEff < 0)
112  ltz = false;
113  Return_rt delE = std::pow(std::abs(eloc_new - EtargetEff), PowerE);
114  Return_rt ddelE = PowerE * std::pow(std::abs(eloc_new - EtargetEff), PowerE - 1);
115  const Return_t* Dsaved = DerivRecords_[iw];
116  const Return_rt* HDsaved = HDerivRecords_[iw];
117  for (int pm = 0; pm < NumOptimizables; pm++)
118  {
119  //From Toulouse J. Chem. Phys. 126, 084102 (2007), this is H_0j+H_j0, which are independent
120  //estimates of 1/2 the energy gradient g. So g1+g2 is an estimate of g.
121  EDtotals_w[pm] += weight * (HDsaved[pm] + 2.0 * std::real(Dsaved[pm]) * delta_l);
122  URV[pm] += 2.0 * (eloc_new * HDsaved[pm] - curAvg * HD_avg[pm]);
123  if (ltz)
124  EDtotals[pm] += weight * (2.0 * std::real(Dsaved[pm]) * (delE - delE_bar) + ddelE * HDsaved[pm]);
125  else
126  EDtotals[pm] += weight * (2.0 * std::real(Dsaved[pm]) * (delE - delE_bar) - ddelE * HDsaved[pm]);
127  }
128  }
129  }
130  myComm->allreduce(EDtotals);
131  myComm->allreduce(EDtotals_w);
132  myComm->allreduce(URV);
133  Return_rt smpinv = 1.0 / static_cast<Return_rt>(NumSamples);
134  {
135  for (int iw = 0; iw < rank_local_num_samples_; iw++)
136  {
137  const Return_rt* restrict saved = RecordsOnNode_[iw];
138  Return_rt weight = saved[REWEIGHT] * wgtinv;
139  Return_rt eloc_new = saved[ENERGY_NEW];
140  Return_rt delta_l = (eloc_new - curAvg_w);
141  Return_rt sigma_l = delta_l * delta_l;
142  const Return_t* Dsaved = DerivRecords_[iw];
143  const Return_rt* HDsaved = HDerivRecords_[iw];
144  for (int pm = 0; pm < NumOptimizables; pm++)
145  {
146  E2Dtotals_w[pm] +=
147  weight * 2.0 * (std::real(Dsaved[pm]) * (sigma_l - curVar_w) + delta_l * (HDsaved[pm] - EDtotals_w[pm]));
148  }
149  }
150  }
151  myComm->allreduce(E2Dtotals_w);
152  for (int pm = 0; pm < NumOptimizables; pm++)
153  URV[pm] *= smpinv;
154  for (int j = 0; j < NumOptimizables; j++)
155  {
156  PGradient[j] = 0.0;
157  if (std::abs(w_var) > 1.0e-10)
158  PGradient[j] += w_var * E2Dtotals_w[j];
159  if (std::abs(w_en) > 1.0e-10)
160  PGradient[j] += w_en * EDtotals_w[j];
161  if (std::abs(w_w) > 1.0e-10)
162  PGradient[j] += w_w * URV[j];
163  if (std::abs(w_abs) > 1.0e-10)
164  PGradient[j] += w_abs * EDtotals[j];
165  }
166 
167  IsValid = isEffectiveWeightValid(effective_weight);
168  }
169 }
170 
171 void QMCCostFunctionBatched::getConfigurations(const std::string& aroot)
172 {
173  auto components = H.getTWFDependentComponents();
174  app_log() << " Found " << components.size() << " wavefunction dependent components in the Hamiltonian";
175  if (components.size())
176  for (const OperatorBase& component : components)
177  app_log() << " '" << component.getName() << "'";
178  app_log() << "." << std::endl;
179 
181 
182  if (dLogPsi.size() != rank_local_num_samples_)
183  {
184  delete_iter(dLogPsi.begin(), dLogPsi.end());
185  delete_iter(d2LogPsi.begin(), d2LogPsi.end());
186  int nptcl = W.getTotalNum();
189  for (int i = 0; i < rank_local_num_samples_; ++i)
190  dLogPsi[i] = new ParticleGradient(nptcl);
191  for (int i = 0; i < rank_local_num_samples_; ++i)
192  d2LogPsi[i] = new ParticleLaplacian(nptcl);
193  }
194 }
195 
196 /** Compute number of batches and final batch size given the number of samples
197  * and a batch size.
198  * \param[in] sample_size number of samples to process.
199  * \param[in] batch_size process samples in batch_size at a time (typically the number of walkers in a crowd).
200  * \param[out] num_batches number of batches to use.
201  * \param[out] final_batch_size the last batch size. May be smaller than batch_size
202  * if the number of samples is not a multiple of the batch size.
203  *
204  * There may be cases where the batch size is zero. One cause is when the number of walkers per
205  * rank is less than the number of crowds.
206  */
207 void compute_batch_parameters(int sample_size, int batch_size, int& num_batches, int& final_batch_size)
208 {
209  if (batch_size == 0)
210  num_batches = 0;
211  else
212  num_batches = sample_size / batch_size;
213 
214  final_batch_size = batch_size;
215  if (batch_size != 0 && sample_size % batch_size != 0)
216  {
217  num_batches += 1;
218  final_batch_size = sample_size % batch_size;
219  }
220 }
221 
222 /** evaluate everything before optimization */
224 {
225  ScopedTimer tmp_timer(check_config_timer_);
226 
227  RealType et_tot = 0.0;
228  RealType e2_tot = 0.0;
229 
230  // Ensure number of samples did not change after getConfigurations
232 
233  if (RecordsOnNode_.size1() == 0)
234  {
236  if (needGrads)
237  {
240  }
241  }
243  {
245  if (needGrads)
246  {
249  }
250  }
251  // synchronize the random number generator with the node
252  (*MoverRng[0]) = (*RngSaved[0]);
254 
255 
256  // Create crowd-local storage for evaluation
258  const size_t opt_num_crowds = walkers_per_crowd_.size();
259  std::vector<std::unique_ptr<CostFunctionCrowdData>> opt_eval(opt_num_crowds);
260  for (int i = 0; i < opt_num_crowds; i++)
261  opt_eval[i] = std::make_unique<CostFunctionCrowdData>(walkers_per_crowd_[i], W, Psi, H, *MoverRng[0]);
263 
264 
265  // TODO - walkers per crowd may not be evenly divided, so the samples per crowd
266  // might need to be divided differently for better load balancing.
267 
268  // Divide samples among the crowds
269  std::vector<int> samples_per_crowd_offsets(opt_num_crowds + 1);
270  FairDivide(rank_local_num_samples_, opt_num_crowds, samples_per_crowd_offsets);
271 
273  // lambda to execute on each crowd
274  auto evalOptConfig = [](int crowd_id, UPtrVector<CostFunctionCrowdData>& opt_crowds,
275  const std::vector<int>& samples_per_crowd_offsets, const std::vector<int>& walkers_per_crowd,
276  std::vector<ParticleGradient*>& gradPsi, std::vector<ParticleLaplacian*>& lapPsi,
277  Matrix<Return_rt>& RecordsOnNode, Matrix<Return_t>& DerivRecords,
278  Matrix<Return_rt>& HDerivRecords, const SampleStack& samples, opt_variables_type& optVars,
279  bool needGrads, EngineHandle& handle) {
280  CostFunctionCrowdData& opt_data = *opt_crowds[crowd_id];
281 
282  const int local_samples = samples_per_crowd_offsets[crowd_id + 1] - samples_per_crowd_offsets[crowd_id];
283  int num_batches;
284  int final_batch_size;
285 
286  compute_batch_parameters(local_samples, walkers_per_crowd[crowd_id], num_batches, final_batch_size);
287 
288  for (int inb = 0; inb < num_batches; inb++)
289  {
290  int current_batch_size = walkers_per_crowd[crowd_id];
291  if (inb == num_batches - 1)
292  current_batch_size = final_batch_size;
293 
294  const int base_sample_index = inb * walkers_per_crowd[crowd_id] + samples_per_crowd_offsets[crowd_id];
295 
296  auto wf_list_no_leader = opt_data.get_wf_list(current_batch_size);
297  auto p_list_no_leader = opt_data.get_p_list(current_batch_size);
298  auto h_list_no_leader = opt_data.get_h_list(current_batch_size);
299  const RefVectorWithLeader<ParticleSet> p_list(p_list_no_leader[0], p_list_no_leader);
300  const RefVectorWithLeader<TrialWaveFunction> wf_list(wf_list_no_leader[0], wf_list_no_leader);
301  const RefVectorWithLeader<QMCHamiltonian> h_list(h_list_no_leader[0], h_list_no_leader);
302 
303  ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(opt_data.getSharedResource().pset_res, p_list);
305  ResourceCollectionTeamLock<QMCHamiltonian> hams_res_lock(opt_data.getSharedResource().ham_res, h_list);
306 
307  auto ref_dLogPsi = convertPtrToRefVectorSubset(gradPsi, base_sample_index, current_batch_size);
308  auto ref_d2LogPsi = convertPtrToRefVectorSubset(lapPsi, base_sample_index, current_batch_size);
309 
310  // Load samples into the crowd data
311  for (int ib = 0; ib < current_batch_size; ib++)
312  {
313  samples.loadSample(p_list[ib], base_sample_index + ib);
314 
315  // Set the RNG used in QMCHamiltonian. This is used to offset the grid
316  // during spherical integration in the non-local pseudopotential.
317  // The RNG state gets reset to the same starting point in correlatedSampling
318  // to use the same grid offsets in the correlated sampling values.
319  // Currently this code sets the RNG to the same state for every configuration
320  // on this node. Every configuration of electrons is different, and so in
321  // theory using the same spherical integration grid should not be a problem.
322  // If this needs to be changed, one possibility is to advance the RNG state
323  // differently for each configuration. Make sure the same initialization is
324  // performed in correlatedSampling.
325  *opt_data.get_rng_ptr_list()[ib] = opt_data.get_rng_save();
326  h_list[ib].setRandomGenerator(opt_data.get_rng_ptr_list()[ib].get());
327  }
328 
329  // Compute distance tables.
330  ParticleSet::mw_update(p_list);
331 
332  // Log psi and prepare for difference the log psi
333  opt_data.zero_log_psi();
334 
336  opt_data.get_log_psi_opt(), ref_dLogPsi, ref_d2LogPsi);
337 
338  std::vector<QMCHamiltonian::FullPrecRealType> energy_list;
339  if (needGrads)
340  {
341  // Compute parameter derivatives of the wavefunction
342  const size_t nparams = optVars.size();
343  RecordArray<Return_t> dlogpsi_array(current_batch_size, nparams);
344  RecordArray<Return_t> dhpsioverpsi_array(current_batch_size, nparams);
345 
346  energy_list = QMCHamiltonian::mw_evaluateValueAndDerivatives(h_list, wf_list, p_list, optVars, dlogpsi_array,
347  dhpsioverpsi_array);
348 
349  handle.takeSample(energy_list, dlogpsi_array, dhpsioverpsi_array, base_sample_index);
350 
351  for (int ib = 0; ib < current_batch_size; ib++)
352  {
353  const int is = base_sample_index + ib;
354  for (int j = 0; j < nparams; j++)
355  {
356  //dlogpsi is in general complex if psi is complex.
357  DerivRecords[is][j] = dlogpsi_array[ib][j];
358  //but E_L and d E_L/dc are real if c is real.
359  HDerivRecords[is][j] = std::real(dhpsioverpsi_array[ib][j]);
360  }
361  RecordsOnNode[is][LOGPSI_FIXED] = opt_data.get_log_psi_fixed()[ib];
362  RecordsOnNode[is][LOGPSI_FREE] = opt_data.get_log_psi_opt()[ib];
363  }
364  }
365  else
366  { // Energy
367  energy_list = QMCHamiltonian::mw_evaluate(h_list, wf_list, p_list);
368  }
369  for (int ib = 0; ib < current_batch_size; ib++)
370  {
371  const int is = base_sample_index + ib;
372  auto etmp = energy_list[ib];
373  opt_data.get_e0() += etmp;
374  opt_data.get_e2() += etmp * etmp;
375 
376  RecordsOnNode[is][ENERGY_NEW] = etmp;
377  RecordsOnNode[is][ENERGY_TOT] = etmp;
378  RecordsOnNode[is][REWEIGHT] = 1.0;
379  RecordsOnNode[is][ENERGY_FIXED] = etmp;
380 
381  const auto twf_dependent_components = h_list[ib].getTWFDependentComponents();
382  for (const OperatorBase& component : twf_dependent_components)
383  RecordsOnNode[is][ENERGY_FIXED] -= component.getValue();
384  }
385  }
386  };
387 
388  ParallelExecutor<> crowd_tasks;
389  crowd_tasks(opt_num_crowds, evalOptConfig, opt_eval, samples_per_crowd_offsets, walkers_per_crowd_, dLogPsi, d2LogPsi,
391  // Sum energy values over crowds
392  for (int i = 0; i < opt_eval.size(); i++)
393  {
394  et_tot += opt_eval[i]->get_e0();
395  e2_tot += opt_eval[i]->get_e2();
396  }
397 
399  // app_log() << " VMC Efavg = " << eft_tot/static_cast<Return_t>(wPerNode[NumThreads]) << std::endl;
400  //Need to sum over the processors
401  std::vector<Return_rt> etemp(3);
402  etemp[0] = et_tot;
403  etemp[1] = static_cast<Return_rt>(rank_local_num_samples_);
404  etemp[2] = e2_tot;
405  // Sum energy values over nodes
406  myComm->allreduce(etemp);
407  Etarget = static_cast<Return_rt>(etemp[0] / etemp[1]);
408  NumSamples = static_cast<int>(etemp[1]);
409  app_log() << " VMC Eavg = " << Etarget << std::endl;
410  app_log() << " VMC Evar = " << etemp[2] / etemp[1] - Etarget * Etarget << std::endl;
411  app_log() << " Total weights = " << etemp[1] << std::endl;
412 
413  handle.finishSampling();
414 
415  app_log().flush();
417  ReportCounter = 0;
418  IsValid = true;
419 
420  //collect SumValue for computedCost
421  SumValue[SUM_WGT] = etemp[1];
422  SumValue[SUM_WGTSQ] = etemp[1];
423  SumValue[SUM_E_WGT] = etemp[0];
424  SumValue[SUM_ESQ_WGT] = etemp[2];
425  SumValue[SUM_E_BARE] = etemp[0];
426  SumValue[SUM_ESQ_BARE] = etemp[2];
427  SumValue[SUM_ABSE_BARE] = 0.0;
428 }
429 
430 #ifdef HAVE_LMY_ENGINE
431 void QMCCostFunctionBatched::engine_checkConfigurations(cqmc::engine::LMYEngine<Return_t>* EngineObj,
432  DescentEngine& descentEngineObj,
433  const std::string& MinMethod)
434 {
435  APP_ABORT("LMYEngine not implemented with batch optimization");
436 }
437 #endif
438 
439 
440 void QMCCostFunctionBatched::resetPsi(bool final_reset)
441 {
443  for (int i = 0; i < equalVarMap.size(); ++i)
445  else
446  for (int i = 0; i < OptVariables.size(); ++i)
448 
449  //cout << "######### QMCCostFunctionBatched::resetPsi " << std::endl;
450  //OptVariablesForPsi.print(std::cout);
451  //cout << "-------------------------------------- " << std::endl;
453 }
454 
456 {
458 
459  {
460  // synchronize the random number generator with the node
461  (*MoverRng[0]) = (*RngSaved[0]);
463  }
464 
465  //Return_rt wgt_node = 0.0, wgt_node2 = 0.0;
466  Return_rt wgt_tot = 0.0;
467  Return_rt wgt_tot2 = 0.0;
468 
469  // Ensure number of samples did not change after getConfiguration
471 
472  Return_rt inv_n_samples = 1.0 / samples_.getGlobalNumSamples();
473 
474  const size_t opt_num_crowds = walkers_per_crowd_.size();
475  // Divide samples among crowds
476  std::vector<int> samples_per_crowd_offsets(opt_num_crowds + 1);
477  FairDivide(rank_local_num_samples_, opt_num_crowds, samples_per_crowd_offsets);
478 
479  // Create crowd-local storage for evaluation
481  std::vector<std::unique_ptr<CostFunctionCrowdData>> opt_eval(opt_num_crowds);
482  for (int i = 0; i < opt_num_crowds; i++)
483  opt_eval[i] = std::make_unique<CostFunctionCrowdData>(walkers_per_crowd_[i], W, Psi, H, *MoverRng[0]);
485 
486 
487  // lambda to execute on each crowd
488  auto evalOptCorrelated =
489  [](int crowd_id, UPtrVector<CostFunctionCrowdData>& opt_crowds, const std::vector<int>& samples_per_crowd_offsets,
490  const std::vector<int>& walkers_per_crowd, std::vector<ParticleGradient*>& gradPsi,
491  std::vector<ParticleLaplacian*>& lapPsi, Matrix<Return_rt>& RecordsOnNode, Matrix<Return_t>& DerivRecords,
492  Matrix<Return_rt>& HDerivRecords, const SampleStack& samples, const opt_variables_type& optVars,
493  bool compute_all_from_scratch, Return_rt vmc_or_dmc, bool needGrad) {
494  CostFunctionCrowdData& opt_data = *opt_crowds[crowd_id];
495 
496  const int local_samples = samples_per_crowd_offsets[crowd_id + 1] - samples_per_crowd_offsets[crowd_id];
497 
498  int num_batches;
499  int final_batch_size;
500  compute_batch_parameters(local_samples, walkers_per_crowd[crowd_id], num_batches, final_batch_size);
501 
502  for (int inb = 0; inb < num_batches; inb++)
503  {
504  int current_batch_size = walkers_per_crowd[crowd_id];
505  if (inb == num_batches - 1)
506  {
507  current_batch_size = final_batch_size;
508  }
509 
510  const int base_sample_index = inb * walkers_per_crowd[crowd_id] + samples_per_crowd_offsets[crowd_id];
511 
512  auto p_list_no_leader = opt_data.get_p_list(current_batch_size);
513  auto wf_list_no_leader = opt_data.get_wf_list(current_batch_size);
514  auto h0_list_no_leader = opt_data.get_h0_list(current_batch_size);
515  const RefVectorWithLeader<ParticleSet> p_list(p_list_no_leader[0], p_list_no_leader);
516  const RefVectorWithLeader<TrialWaveFunction> wf_list(wf_list_no_leader[0], wf_list_no_leader);
517  const RefVectorWithLeader<QMCHamiltonian> h0_list(h0_list_no_leader[0], h0_list_no_leader);
518 
519  ResourceCollectionTeamLock<ParticleSet> mw_pset_lock(opt_data.getSharedResource().pset_res, p_list);
521  ResourceCollectionTeamLock<QMCHamiltonian> hams_res_lock(opt_data.get_h0_res(), h0_list);
522 
523  // Load this batch of samples into the crowd data
524  for (int ib = 0; ib < current_batch_size; ib++)
525  {
526  samples.loadSample(p_list[ib], base_sample_index + ib);
527  // Copy the saved RNG state
528  *opt_data.get_rng_ptr_list()[ib] = opt_data.get_rng_save();
529  h0_list[ib].setRandomGenerator(opt_data.get_rng_ptr_list()[ib].get());
530  }
531 
532  // Update distance tables, etc for the loaded sample positions
533  ParticleSet::mw_update(p_list, true);
534 
535  // Evaluate difference in log psi
536 
537  std::vector<std::unique_ptr<ParticleSet::ParticleGradient>> dummyG_ptr_list;
538  std::vector<std::unique_ptr<ParticleSet::ParticleLaplacian>> dummyL_ptr_list;
541  if (compute_all_from_scratch)
542  {
543  int nptcl = gradPsi[0]->size();
544  dummyG_ptr_list.reserve(current_batch_size);
545  dummyL_ptr_list.reserve(current_batch_size);
546  for (int i = 0; i < current_batch_size; i++)
547  {
548  dummyG_ptr_list.emplace_back(std::make_unique<ParticleGradient>(nptcl));
549  dummyL_ptr_list.emplace_back(std::make_unique<ParticleLaplacian>(nptcl));
550  }
551  dummyG_list = convertUPtrToRefVector(dummyG_ptr_list);
552  dummyL_list = convertUPtrToRefVector(dummyL_ptr_list);
553  }
554  opt_data.zero_log_psi();
555 
556  TrialWaveFunction::mw_evaluateDeltaLog(wf_list, p_list, opt_data.get_log_psi_opt(), dummyG_list, dummyL_list,
557  compute_all_from_scratch);
558 
559  Return_rt inv_n_samples = 1.0 / samples.getGlobalNumSamples();
560 
561  for (int ib = 0; ib < current_batch_size; ib++)
562  {
563  const int is = base_sample_index + ib;
564  wf_list[ib].G += *gradPsi[is];
565  wf_list[ib].L += *lapPsi[is];
566  // This is needed to get the KE correct in QMCHamiltonian::mw_evaluate below
567  p_list[ib].G += *gradPsi[is];
568  p_list[ib].L += *lapPsi[is];
569  Return_rt weight = vmc_or_dmc * (opt_data.get_log_psi_opt()[ib] - RecordsOnNode[is][LOGPSI_FREE]);
570  RecordsOnNode[is][REWEIGHT] = weight;
571  // move to opt_data
572  opt_data.get_wgt() += inv_n_samples * weight;
573  opt_data.get_wgt2() += inv_n_samples * weight * weight;
574  }
575 
576  if (needGrad)
577  {
578  // Parameter derivatives
579  const size_t nparams = optVars.size();
580  RecordArray<Return_t> dlogpsi_array(current_batch_size, nparams);
581  RecordArray<Return_t> dhpsioverpsi_array(current_batch_size, nparams);
582 
583  // Energy
584  auto energy_list = QMCHamiltonian::mw_evaluateValueAndDerivatives(h0_list, wf_list, p_list, optVars,
585  dlogpsi_array, dhpsioverpsi_array);
586 
587  for (int ib = 0; ib < current_batch_size; ib++)
588  {
589  const int is = base_sample_index + ib;
590  auto etmp = energy_list[ib];
591  RecordsOnNode[is][ENERGY_NEW] = etmp + RecordsOnNode[is][ENERGY_FIXED];
592  for (int j = 0; j < nparams; j++)
593  {
594  if (optVars.recompute(j))
595  {
596  //In general, dlogpsi is complex.
597  DerivRecords[is][j] = dlogpsi_array[ib][j];
598  //However, E_L is always real, and so d E_L/dc is real, provided c is real.
599  HDerivRecords[is][j] = std::real(dhpsioverpsi_array[ib][j]);
600  }
601  }
602  }
603  }
604  else
605  {
606  // Just energy needed if no gradients
607  auto energy_list = QMCHamiltonian::mw_evaluate(h0_list, wf_list, p_list);
608  for (int ib = 0; ib < current_batch_size; ib++)
609  {
610  const int is = base_sample_index + ib;
611  auto etmp = energy_list[ib];
612  RecordsOnNode[is][ENERGY_NEW] = etmp + RecordsOnNode[is][ENERGY_FIXED];
613  }
614  }
615  }
616  };
617 
618  //if we have more than KE depending on TWF, TWF must be fully recomputed.
619  const bool compute_all_from_scratch = H.getTWFDependentComponents().size() > 1;
620  ParallelExecutor<> crowd_tasks;
621  crowd_tasks(opt_num_crowds, evalOptCorrelated, opt_eval, samples_per_crowd_offsets, walkers_per_crowd_, dLogPsi,
623  compute_all_from_scratch, vmc_or_dmc, needGrad);
624  // Sum weights over crowds
625  for (int i = 0; i < opt_eval.size(); i++)
626  {
627  wgt_tot += opt_eval[i]->get_wgt();
628  wgt_tot2 += opt_eval[i]->get_wgt2();
629  }
630 
631  //this is MPI barrier
633  //collect the total weight for normalization and apply maximum weight
634  myComm->allreduce(wgt_tot);
635  myComm->allreduce(wgt_tot2);
636  // app_log()<<"Before Purge"<<wgt_tot<<" "<<wgt_tot2<< std::endl;
637  Return_rt wgtnorm = (wgt_tot == 0) ? 0 : wgt_tot;
638  wgt_tot = 0.0;
639  {
640  for (int iw = 0; iw < rank_local_num_samples_; iw++)
641  {
642  Return_rt* restrict saved = RecordsOnNode_[iw];
643  saved[REWEIGHT] =
644  std::min(std::exp(saved[REWEIGHT] - wgtnorm), std::numeric_limits<Return_rt>::max() * (RealType)0.1);
645  wgt_tot += inv_n_samples * saved[REWEIGHT];
646  }
647  }
648  myComm->allreduce(wgt_tot);
649  // app_log()<<"During Purge"<<wgt_tot<<" "<< std::endl;
650  wgtnorm = (wgt_tot == 0) ? 1 : 1.0 / wgt_tot;
651  wgt_tot = 0.0;
652  {
653  for (int iw = 0; iw < rank_local_num_samples_; iw++)
654  {
655  Return_rt* restrict saved = RecordsOnNode_[iw];
656  saved[REWEIGHT] = std::min(saved[REWEIGHT] * wgtnorm, MaxWeight);
657  wgt_tot += inv_n_samples * saved[REWEIGHT];
658  }
659  }
660  myComm->allreduce(wgt_tot);
661  // app_log()<<"After Purge"<<wgt_tot<<" "<< std::endl;
662  for (int i = 0; i < SumValue.size(); i++)
663  SumValue[i] = 0.0;
664  {
665  for (int iw = 0; iw < rank_local_num_samples_; iw++)
666  {
667  const Return_rt* restrict saved = RecordsOnNode_[iw];
668  // Return_t weight=saved[REWEIGHT]*wgt_tot;
669  Return_rt eloc_new = saved[ENERGY_NEW];
670  Return_rt delE = std::pow(std::abs(eloc_new - EtargetEff), PowerE);
671  SumValue[SUM_E_BARE] += eloc_new;
672  SumValue[SUM_ESQ_BARE] += eloc_new * eloc_new;
673  SumValue[SUM_ABSE_BARE] += delE;
674  SumValue[SUM_E_WGT] += eloc_new * saved[REWEIGHT];
675  SumValue[SUM_ESQ_WGT] += eloc_new * eloc_new * saved[REWEIGHT];
676  SumValue[SUM_ABSE_WGT] += delE * saved[REWEIGHT];
677  SumValue[SUM_WGT] += saved[REWEIGHT];
678  SumValue[SUM_WGTSQ] += saved[REWEIGHT] * saved[REWEIGHT];
679  }
680  }
681  //collect everything
684 }
685 
686 
687 // Construct the overlap and Hamiltonian matrices for the linear method
688 // A sum over samples. Inputs are
689 // DerivRecords - derivative of log psi ( d ln (psi) / dp = 1/psi * d psi / dp )
690 // HDerivRecords - derivative of Hamiltonian
691 // RecordsOnNode - energies and weights (for reweighting)
692 // SumValue - sums of energies and weights
693 // Outputs
694 // Left - Hamiltonian matrix
695 // Right - overlap matrix
696 //
697 
699  Matrix<Return_rt>& Right)
700 {
701  ScopedTimer tmp_timer(fill_timer_);
702 
703  Right = 0.0;
704  Left = 0.0;
705 
707  Return_rt curAvg2_w = SumValue[SUM_ESQ_WGT] / SumValue[SUM_WGT];
708  RealType V_avg = curAvg2_w - curAvg_w * curAvg_w;
709  std::vector<Return_t> D_avg(getNumParams(), 0.0);
710  Return_rt wgtinv = 1.0 / SumValue[SUM_WGT];
711 
712  for (int iw = 0; iw < rank_local_num_samples_; iw++)
713  {
714  const Return_rt* restrict saved = RecordsOnNode_[iw];
715  Return_rt weight = saved[REWEIGHT] * wgtinv;
716  const Return_t* Dsaved = DerivRecords_[iw];
717  for (int pm = 0; pm < getNumParams(); pm++)
718  {
719  D_avg[pm] += Dsaved[pm] * weight;
720  }
721  }
722 
723  myComm->allreduce(D_avg);
724 
725  for (int iw = 0; iw < rank_local_num_samples_; iw++)
726  {
727  const Return_rt* restrict saved = RecordsOnNode_[iw];
728  Return_rt weight = saved[REWEIGHT] * wgtinv;
729  Return_rt eloc_new = saved[ENERGY_NEW];
730  const Return_t* Dsaved = DerivRecords_[iw];
731  const Return_rt* HDsaved = HDerivRecords_[iw];
732 
733  size_t opt_num_crowds = walkers_per_crowd_.size();
734  std::vector<int> params_per_crowd(opt_num_crowds + 1);
735  FairDivide(getNumParams(), opt_num_crowds, params_per_crowd);
736 
737 
738  auto constructMatrices = [](int crowd_id, std::vector<int>& crowd_ranges, int numParams, const Return_t* Dsaved,
739  const Return_rt* HDsaved, Return_rt weight, Return_rt eloc_new, RealType V_avg,
740  std::vector<Return_t>& D_avg, RealType b2, RealType curAvg_w, Matrix<Return_rt>& Left,
741  Matrix<Return_rt>& Right) {
742  int local_pm_start = crowd_ranges[crowd_id];
743  int local_pm_end = crowd_ranges[crowd_id + 1];
744 
745  for (int pm = local_pm_start; pm < local_pm_end; pm++)
746  {
747  Return_t wfe = (HDsaved[pm] + (Dsaved[pm] - D_avg[pm]) * eloc_new) * weight;
748  Return_t wfd = (Dsaved[pm] - D_avg[pm]) * weight;
749  Return_t vterm = HDsaved[pm] * (eloc_new - curAvg_w) +
750  (Dsaved[pm] - D_avg[pm]) * eloc_new * (eloc_new - RealType(2.0) * curAvg_w);
751  // Variance
752  Left(0, pm + 1) += b2 * std::real(vterm) * weight;
753  Left(pm + 1, 0) += b2 * std::real(vterm) * weight;
754  // Hamiltonian
755  Left(0, pm + 1) += (1 - b2) * std::real(wfe);
756  Left(pm + 1, 0) += (1 - b2) * std::real(wfd) * eloc_new;
757  for (int pm2 = 0; pm2 < numParams; pm2++)
758  {
759  // Hamiltonian
760  Left(pm + 1, pm2 + 1) +=
761  std::real((1 - b2) * std::conj(wfd) * (HDsaved[pm2] + (Dsaved[pm2] - D_avg[pm2]) * eloc_new));
762  // Overlap
763  RealType ovlij = std::real(std::conj(wfd) * (Dsaved[pm2] - D_avg[pm2]));
764  Right(pm + 1, pm2 + 1) += ovlij;
765  // Variance
766  RealType varij = weight *
767  std::real((HDsaved[pm] - RealType(2.0) * std::conj(Dsaved[pm] - D_avg[pm]) * eloc_new) *
768  (HDsaved[pm2] - RealType(2.0) * (Dsaved[pm2] - D_avg[pm2]) * eloc_new));
769  Left(pm + 1, pm2 + 1) += b2 * (varij + V_avg * ovlij);
770  }
771  }
772  };
773 
774  ParallelExecutor<> crowd_tasks;
775  crowd_tasks(opt_num_crowds, constructMatrices, params_per_crowd, getNumParams(), Dsaved, HDsaved, weight, eloc_new,
776  V_avg, D_avg, w_beta, curAvg_w, Left, Right);
777  }
778  myComm->allreduce(Right);
779  myComm->allreduce(Left);
780  Left(0, 0) = (1 - w_beta) * curAvg_w + w_beta * V_avg;
781  Right(0, 0) = 1.0;
782 
783  return 1.0;
784 }
785 } // namespace qmcplusplus
Return_rt Cost(bool needGrad=true) override
return the cost value for CGMinimization
size_type size1() const
Definition: OhmmsMatrix.h:79
std::vector< ParticleLaplacian * > d2LogPsi
Fixed Laplacian , , components.
DriverWalkerResourceCollection & getSharedResource()
void pause()
Pause the summary and log streams.
void barrier() const
void delete_iter(IT first, IT last)
delete the pointers in [first,last)
QMCTraits::RealType real
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
~QMCCostFunctionBatched() override
Destructor.
int NumSamples
global number of samples to use in correlated sampling
void checkConfigurations(EngineHandle &handle) override
evaluate everything before optimization
RefVector< OperatorBase > getTWFDependentComponents()
return components, auxH not included, depending on TWF.
QTBase::RealType RealType
Definition: Configuration.h:58
std::vector< Return_rt > & get_log_psi_fixed()
Abstraction for running concurrent tasks in parallel by an executor executor workers can be OpenMP th...
MakeReturn< UnaryNode< FnFabs, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t abs(const Vector< T1, C1 > &l)
size_t getTotalNum() const
Definition: ParticleSet.h:493
std::ostream & app_log()
Definition: OutputManager.h:65
std::vector< TinyVector< int, 2 > > equalVarMap
index mapping for <equal> constraints
static RefVector< T > convertPtrToRefVectorSubset(const std::vector< T *> &ptr_list, int offset, int len)
RandomBase< FullPrecRealType > & get_rng_save()
int NumOptimizables
total number of optimizable variables
Collection of Local Energy Operators.
QMCTraits::QTFull::RealType EffectiveWeight
Return_rt curAvg_w
current weighted average (correlated sampling)
size_t getNumSamples() const
Definition: SampleStack.h:41
opt_variables_type OptVariables
list of optimizables
std::vector< std::unique_ptr< T > > UPtrVector
UPtrVector< RandomBase< FullPrecRealType > > RngSaved
Random number generators.
void resize(size_type n, size_type m)
Resize the container.
Definition: OhmmsMatrix.h:99
ParticleSet::ParticleLaplacian ParticleLaplacian
Communicate * Controller
Global Communicator for a process.
Definition: Communicate.cpp:35
ParticleSet::ParticleGradient ParticleGradient
Saved derivative properties and Hderivative properties of all the walkers.
static RefVector< T > convertUPtrToRefVector(const UPtrVector< T > &ptr_list)
convert a vector of std::unique_ptrs<T> to a refvector<T>
T min(T a, T b)
void GradCost(std::vector< Return_rt > &PGradient, const std::vector< Return_rt > &PM, Return_rt FiniteDiff=0) override
static std::vector< QMCHamiltonian::FullPrecRealType > mw_evaluate(const RefVectorWithLeader< QMCHamiltonian > &ham_list, const RefVectorWithLeader< TrialWaveFunction > &wf_list, const RefVectorWithLeader< ParticleSet > &p_list)
batched version of evaluate for LocalEnergy
OutputManagerClass outputManager(Verbosity::HIGH)
static std::vector< QMCHamiltonian::FullPrecRealType > mw_evaluateValueAndDerivatives(const RefVectorWithLeader< QMCHamiltonian > &ham_list, const RefVectorWithLeader< TrialWaveFunction > &wf_list, const RefVectorWithLeader< ParticleSet > &p_list, const opt_variables_type &optvars, RecordArray< ValueType > &dlogpsi, RecordArray< ValueType > &dhpsioverpsi)
Wrapping information on parallelism.
Definition: Communicate.h:68
void allreduce(T &)
MakeReturn< BinaryNode< FnPow, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t, typename CreateLeaf< Vector< T2, C2 > >::Leaf_t > >::Expression_t pow(const Vector< T1, C1 > &l, const Vector< T2, C2 > &r)
RefVector< QMCHamiltonian > get_h_list(int len)
void resume()
Resume the summary and log streams.
Specialized paritlce class for atomistic simulations.
Definition: ParticleSet.h:55
std::vector< ParticleGradient * > dLogPsi
Fixed Gradients , , components.
void compute_batch_parameters(int sample_size, int batch_size, int &num_batches, int &final_batch_size)
Compute number of batches and final batch size given the number of samples and a batch size...
Communicate * myComm
pointer to Communicate
Definition: MPIObjectBase.h:62
Return_rt curAvg
current Average
RefVector< ParticleSet > get_p_list(int len)
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
opt_variables_type OptVariablesForPsi
full list of optimizables
class to handle a set of variables that can be modified during optimizations
Definition: VariableSet.h:49
static void mw_evaluateDeltaLog(const RefVectorWithLeader< TrialWaveFunction > &wf_list, const RefVectorWithLeader< ParticleSet > &p_list, std::vector< RealType > &logpsi_list, RefVector< ParticleSet::ParticleGradient > &dummyG_list, RefVector< ParticleSet::ParticleLaplacian > &dummyL_list, bool recompute=false)
evaluate the log value for optimizable parts of a many-body wave function
QMCHamiltonian & H
Hamiltonian.
Return_rt w_en
weights for energy and variance in the cost function
#define APP_ABORT(msg)
Widely used but deprecated fatal error macros from legacy code.
Definition: AppAbort.h:27
Implements wave-function optimization.
MakeReturn< UnaryNode< FnExp, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t exp(const Vector< T1, C1 > &l)
ParticleSet & W
Particle set.
static void mw_evaluateDeltaLogSetup(const RefVectorWithLeader< TrialWaveFunction > &wf_list, const RefVectorWithLeader< ParticleSet > &p_list, std::vector< RealType > &logpsi_fixed_list, std::vector< RealType > &logpsi_opt_list, RefVector< ParticleSet::ParticleGradient > &fixedG_list, RefVector< ParticleSet::ParticleLaplacian > &fixedL_list)
evaluate the sum of log value of optimizable many-body wavefunctions
bool isEffectiveWeightValid(EffectiveWeight effective_weight) const
check the validity of the effective weight calculated by correlatedSampling
size_type size() const
return the size
Definition: VariableSet.h:88
void resetPsi(bool final_reset=false) override
reset the wavefunction
float conj(const float &c)
Workaround to allow conj on scalar to return real instead of complex.
EffectiveWeight correlatedSampling(bool needGrad=true) override
run correlated sampling return effective walkers ( w_i)^2/(Nw * w^2_i)
Declaration of a TrialWaveFunction.
Implements wave-function optimization.
int ReportCounter
counter for output
An abstract class for Local Energy operators.
Definition: OperatorBase.h:59
std::vector< std::reference_wrapper< T > > RefVector
void FairDivide(int ntot, int npart, IV &adist)
Partition ntot over npart.
Definition: FairDivide.h:57
virtual void prepareSampling(int num_params, int num_samples)=0
Function for preparing derivative ratio vectors used by optimizer engines.
Class to represent a many-body trial wave function.
RefVector< QMCHamiltonian > get_h0_list(int len)
std::vector< Return_rt > SumValue
Sum of energies and weights for averages.
static void mw_update(const RefVectorWithLeader< ParticleSet > &p_list, bool skipSK=false)
batched version of update
std::vector< Return_rt > & get_log_psi_opt()
virtual void takeSample(const std::vector< FullPrecReal > &energy_list, const RecordArray< Value > &dlogpsi_array, const RecordArray< Value > &dhpsioverpsi_array, int base_sample_index)=0
Function for passing derivative ratios to optimizer engines.
RefVector< TrialWaveFunction > get_wf_list(int len)
void resetOptimizableObjects(TrialWaveFunction &psi, const opt_variables_type &opt_variables) const
TrialWaveFunction & Psi
Trial function.
int getNumParams() const override
return the number of optimizable parameters
handles acquire/release resource by the consumer (RefVectorWithLeader type).
Return_rt MaxWeight
maximum weight beyond which the weight is set to 1
virtual void finishSampling()=0
Function for having optimizer engines execute their sample_finish functions.
QMCCostFunctionBatched(ParticleSet &w, TrialWaveFunction &psi, QMCHamiltonian &h, SampleStack &samples, const std::vector< int > &walkers_per_crowd, Communicate *comm)
Constructor.
UPtrVector< RandomBase< FullPrecRealType > > & get_rng_ptr_list()
int PowerE
|E-E_T|^PowerE is used for the cost function
void setRandomGenerator(RandomBase< FullPrecRealType > *rng)
Matrix< Return_t > DerivRecords_
Temp derivative properties and Hderivative properties of all the walkers.
void zero_log_psi()
Set the log_psi_* arrays to zero.
size_t getGlobalNumSamples() const
Global number of samples is number of samples per rank * number of ranks.
Definition: SampleStack.h:45
Return_rt curVar_w
current weighted variance (correlated sampling)
Declaration of a MCWalkerConfiguration.
Return_rt fillOverlapHamiltonianMatrices(Matrix< Return_rt > &Left, Matrix< Return_rt > &Right) override
void getConfigurations(const std::string &aroot) override
BareKineticEnergy::Return_t Return_t
Return_rt EtargetEff
real target energy with the Correlation Factor
std::vector< RandomBase< FullPrecRealType > * > MoverRng