QMCPACK
QMCCostFunction.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
8 // Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
9 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
10 // Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
11 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
12 // Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
13 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
14 //
15 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
16 //////////////////////////////////////////////////////////////////////////////////////
17 
18 
19 #include "QMCCostFunction.h"
22 #include "Message/CommOperators.h"
24 //#define QMCCOSTFUNCTION_DEBUG
25 
26 namespace qmcplusplus
27 {
29  : QMCCostFunctionBase(w, psi, h, comm),
30  fill_timer_(createGlobalTimer("QMCCostFunction::fillOverlapHamiltonianMatrices", timer_level_medium))
31 {
32  CSWeight = 1.0;
33  app_log() << " Using QMCCostFunction::QMCCostFunction" << std::endl;
34 }
35 
36 
37 /** Clean up the vector */
39 {
40  delete_iter(RecordsOnNode.begin(), RecordsOnNode.end());
41  delete_iter(DerivRecords.begin(), DerivRecords.end());
42  delete_iter(HDerivRecords.begin(), HDerivRecords.end());
43 }
44 
45 void QMCCostFunction::GradCost(std::vector<Return_rt>& PGradient,
46  const std::vector<Return_rt>& PM,
47  Return_rt FiniteDiff)
48 {
49  if (FiniteDiff > 0)
50  {
51  QMCTraits::RealType dh = 1.0 / (2.0 * FiniteDiff);
52  for (int i = 0; i < NumOptimizables; i++)
53  {
54  for (int j = 0; j < NumOptimizables; j++)
55  OptVariables[j] = PM[j];
56  OptVariables[i] = PM[i] + FiniteDiff;
57  QMCTraits::RealType CostPlus = this->Cost();
58  OptVariables[i] = PM[i] - FiniteDiff;
59  QMCTraits::RealType CostMinus = this->Cost();
60  PGradient[i] = (CostPlus - CostMinus) * dh;
61  }
62  }
63  else
64  {
65  for (int j = 0; j < NumOptimizables; j++)
66  OptVariables[j] = PM[j];
67  resetPsi();
68  //evaluate new local energies and derivatives
69  EffectiveWeight effective_weight = correlatedSampling(true);
70  //Estimators::accumulate has been called by correlatedSampling
72  // Return_t curAvg2_w = curAvg_w*curAvg_w;
74  std::vector<Return_rt> EDtotals(NumOptimizables, 0.0);
75  std::vector<Return_rt> EDtotals_w(NumOptimizables, 0.0);
76  std::vector<Return_rt> E2Dtotals_w(NumOptimizables, 0.0);
77  std::vector<Return_rt> URV(NumOptimizables, 0.0);
78  std::vector<Return_rt> HD_avg(NumOptimizables, 0.0);
79  Return_rt wgtinv = 1.0 / SumValue[SUM_WGT];
80  Return_rt delE_bar = 0;
81  for (int ip = 0; ip < NumThreads; ip++)
82  {
83  int nw = wClones[ip]->numSamples();
84  for (int iw = 0; iw < nw; iw++)
85  {
86  const Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
87  Return_rt weight = saved[REWEIGHT] * wgtinv;
88  Return_rt eloc_new = saved[ENERGY_NEW];
89  delE_bar += weight * std::pow(std::abs(eloc_new - EtargetEff), PowerE);
90  const Return_rt* HDsaved = (*HDerivRecords[ip])[iw];
91  for (int pm = 0; pm < NumOptimizables; pm++)
92  HD_avg[pm] += HDsaved[pm];
93  }
94  }
95  myComm->allreduce(HD_avg);
96  myComm->allreduce(delE_bar);
97  for (int pm = 0; pm < NumOptimizables; pm++)
98  HD_avg[pm] *= 1.0 / static_cast<Return_rt>(NumSamples);
99  for (int ip = 0; ip < NumThreads; ip++)
100  {
101  int nw = wClones[ip]->numSamples();
102  for (int iw = 0; iw < nw; iw++)
103  {
104  const Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
105  Return_rt weight = saved[REWEIGHT] * wgtinv;
106  Return_rt eloc_new = saved[ENERGY_NEW];
107  Return_rt delta_l = (eloc_new - curAvg_w);
108  bool ltz(true);
109  if (eloc_new - EtargetEff < 0)
110  ltz = false;
111  Return_rt delE = std::pow(std::abs(eloc_new - EtargetEff), PowerE);
112  Return_rt ddelE = PowerE * std::pow(std::abs(eloc_new - EtargetEff), PowerE - 1);
113  const Return_t* Dsaved = (*DerivRecords[ip])[iw];
114  const Return_rt* HDsaved = (*HDerivRecords[ip])[iw];
115  for (int pm = 0; pm < NumOptimizables; pm++)
116  {
117  EDtotals_w[pm] += weight * (HDsaved[pm] + 2.0 * std::real(Dsaved[pm]) * delta_l);
118  URV[pm] += 2.0 * (eloc_new * HDsaved[pm] - curAvg * HD_avg[pm]);
119  if (ltz)
120  EDtotals[pm] += weight * (2.0 * std::real(Dsaved[pm]) * (delE - delE_bar) + ddelE * HDsaved[pm]);
121  else
122  EDtotals[pm] += weight * (2.0 * std::real(Dsaved[pm]) * (delE - delE_bar) - ddelE * HDsaved[pm]);
123  }
124  }
125  }
126  myComm->allreduce(EDtotals);
127  myComm->allreduce(EDtotals_w);
128  myComm->allreduce(URV);
129  Return_rt smpinv = 1.0 / static_cast<Return_rt>(NumSamples);
130  for (int ip = 0; ip < NumThreads; ip++)
131  {
132  int nw = wClones[ip]->numSamples();
133  for (int iw = 0; iw < nw; iw++)
134  {
135  const Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
136  Return_rt weight = saved[REWEIGHT] * wgtinv;
137  Return_rt eloc_new = saved[ENERGY_NEW];
138  Return_rt delta_l = (eloc_new - curAvg_w);
139  Return_rt sigma_l = delta_l * delta_l;
140  const Return_t* Dsaved = (*DerivRecords[ip])[iw];
141  const Return_rt* HDsaved = (*HDerivRecords[ip])[iw];
142  for (int pm = 0; pm < NumOptimizables; pm++)
143  {
144  E2Dtotals_w[pm] +=
145  weight * 2.0 * (std::real(Dsaved[pm]) * (sigma_l - curVar_w) + delta_l * (HDsaved[pm] - EDtotals_w[pm]));
146  }
147  }
148  }
149  myComm->allreduce(E2Dtotals_w);
150  for (int pm = 0; pm < NumOptimizables; pm++)
151  URV[pm] *= smpinv;
152  for (int j = 0; j < NumOptimizables; j++)
153  {
154  PGradient[j] = 0.0;
155  if (std::abs(w_var) > 1.0e-10)
156  PGradient[j] += w_var * E2Dtotals_w[j];
157  if (std::abs(w_en) > 1.0e-10)
158  PGradient[j] += w_en * EDtotals_w[j];
159  if (std::abs(w_w) > 1.0e-10)
160  PGradient[j] += w_w * URV[j];
161  if (std::abs(w_abs) > 1.0e-10)
162  PGradient[j] += w_abs * EDtotals[j];
163  }
164 
165  IsValid = isEffectiveWeightValid(effective_weight);
166  }
167 }
168 
169 
170 void QMCCostFunction::getConfigurations(const std::string& aroot)
171 {
172  //makeClones(W,Psi,H);
173  if (H_KE_Node.empty())
174  {
175  app_log() << " QMCCostFunction is created with " << NumThreads << " threads." << std::endl;
176  //make H_KE_Node
177  H_KE_Node.resize(NumThreads);
178  RecordsOnNode.resize(NumThreads, 0);
179  DerivRecords.resize(NumThreads, 0);
180  HDerivRecords.resize(NumThreads, 0);
181  }
182 
183  //#pragma omp parallel for
184  for (int ip = 0; ip < NumThreads; ++ip)
185  if (!H_KE_Node[ip])
186  {
187  auto components = hClones[ip]->getTWFDependentComponents();
188  if (ip == 0)
189  {
190  app_log() << " Found " << components.size() << " wavefunction dependent components in the Hamiltonian";
191  if (components.size())
192  for (const OperatorBase& component : components)
193  app_log() << " '" << component.getName() << "'";
194  app_log() << "." << std::endl;
195  }
196  H_KE_Node[ip] = std::make_unique<HamiltonianRef>(components);
197  }
198 
199  //load samples from SampleStack
200  app_log() << " Number of samples loaded to each thread : ";
201  wPerRank[0] = 0;
202  for (int ip = 0; ip < NumThreads; ++ip)
203  {
204  wPerRank[ip + 1] = wPerRank[ip] + wClones[ip]->numSamples();
205  app_log() << wClones[ip]->numSamples() << " ";
206  }
207  app_log() << std::endl;
208  app_log().flush();
209 
210  if (dLogPsi.size() != wPerRank[NumThreads])
211  {
212  delete_iter(dLogPsi.begin(), dLogPsi.end());
213  delete_iter(d2LogPsi.begin(), d2LogPsi.end());
214  int nptcl = W.getTotalNum();
215  int nwtot = wPerRank[NumThreads];
216  dLogPsi.resize(nwtot);
217  d2LogPsi.resize(nwtot);
218  for (int i = 0; i < nwtot; ++i)
219  dLogPsi[i] = new ParticleGradient(nptcl);
220  for (int i = 0; i < nwtot; ++i)
221  d2LogPsi[i] = new ParticleLaplacian(nptcl);
222  }
223 }
224 
225 /** evaluate everything before optimization */
227 {
228  RealType et_tot = 0.0;
229  RealType e2_tot = 0.0;
230 #pragma omp parallel reduction(+ : et_tot, e2_tot)
231  {
232  int ip = omp_get_thread_num();
233  MCWalkerConfiguration& wRef(*wClones[ip]);
234  if (RecordsOnNode[ip] == 0)
235  {
237  RecordsOnNode[ip]->resize(wRef.numSamples(), SUM_INDEX_SIZE);
238  if (needGrads)
239  {
240  DerivRecords[ip] = new Matrix<Return_t>;
241  DerivRecords[ip]->resize(wRef.numSamples(), NumOptimizables);
243  HDerivRecords[ip]->resize(wRef.numSamples(), NumOptimizables);
244  }
245  }
246  else if (RecordsOnNode[ip]->size1() != wRef.numSamples())
247  {
248  RecordsOnNode[ip]->resize(wRef.numSamples(), SUM_INDEX_SIZE);
249  if (needGrads)
250  {
251  DerivRecords[ip]->resize(wRef.numSamples(), NumOptimizables);
252  HDerivRecords[ip]->resize(wRef.numSamples(), NumOptimizables);
253  }
254  }
255  // Populate local to global index mapping into psiClone internal component 'myVars',
256  // because psiClones persist between different sections and need update.
257  psiClones[ip]->checkOutVariables(OptVariablesForPsi);
258  // synchronize the random number generator with the node
259  (*MoverRng[ip]) = (*RngSaved[ip]);
260  hClones[ip]->setRandomGenerator(MoverRng[ip]);
261  //int nat = wRef.getTotalNum();
262  Return_rt e0 = 0.0;
263  // Return_t ef=0.0;
264  Return_rt e2 = 0.0;
265  for (int iw = 0, iwg = wPerRank[ip]; iw < wRef.numSamples(); ++iw, ++iwg)
266  {
267  wRef.loadSample(wRef, iw);
268  wRef.update();
269  Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
270  psiClones[ip]->evaluateDeltaLogSetup(wRef, saved[LOGPSI_FIXED], saved[LOGPSI_FREE], *dLogPsi[iwg],
271  *d2LogPsi[iwg]);
272  saved[REWEIGHT] = 1.0;
273  Return_rt etmp;
274  if (needGrads)
275  {
276  //allocate vector
277  Vector<Return_rt> rDsaved(NumOptimizables, 0.0);
278  Vector<Return_rt> rHDsaved(NumOptimizables, 0.0);
279 
280  Vector<Return_t> Dsaved(NumOptimizables, 0.0);
281  Vector<Return_t> HDsaved(NumOptimizables, 0.0);
282 
283  etmp = hClones[ip]->evaluateValueAndDerivatives(wRef, OptVariablesForPsi, Dsaved, HDsaved);
284 
285 
286  //FIXME the ifdef should be removed after the optimizer is made compatible with complex coefficients
287  for (int i = 0; i < NumOptimizables; i++)
288  {
289  rHDsaved[i] = std::real(HDsaved[i]);
290  }
291  std::copy(Dsaved.begin(), Dsaved.end(), (*DerivRecords[ip])[iw]);
292  std::copy(rHDsaved.begin(), rHDsaved.end(), (*HDerivRecords[ip])[iw]);
293  }
294  else
295  etmp = hClones[ip]->evaluate(wRef);
296 
297  e0 += saved[ENERGY_TOT] = saved[ENERGY_NEW] = etmp;
298  e2 += etmp * etmp;
299  saved[ENERGY_FIXED] = saved[ENERGY_TOT];
300  const auto twf_dependent_components = hClones[ip]->getTWFDependentComponents();
301  for (const OperatorBase& component : twf_dependent_components)
302  saved[ENERGY_FIXED] -= component.getValue();
303  }
304  //add them all using reduction
305  et_tot += e0;
306  e2_tot += e2;
307  // #pragma omp atomic
308  // eft_tot+=ef;
309  }
311  // app_log() << " VMC Efavg = " << eft_tot/static_cast<Return_t>(wPerRank[NumThreads]) << std::endl;
312  //Need to sum over the processors
313  std::vector<Return_rt> etemp(3);
314  etemp[0] = et_tot;
315  etemp[1] = static_cast<Return_rt>(wPerRank[NumThreads]);
316  etemp[2] = e2_tot;
317  myComm->allreduce(etemp);
318  Etarget = static_cast<Return_rt>(etemp[0] / etemp[1]);
319  NumSamples = static_cast<int>(etemp[1]);
320  app_log() << " VMC Eavg = " << Etarget << std::endl;
321  app_log() << " VMC Evar = " << etemp[2] / etemp[1] - Etarget * Etarget << std::endl;
322  app_log() << " Total weights = " << etemp[1] << std::endl;
323  app_log().flush();
325  ReportCounter = 0;
326  IsValid = true;
327  //collect SumValue for computedCost
328  SumValue[SUM_WGT] = etemp[1];
329  SumValue[SUM_WGTSQ] = etemp[1];
330  SumValue[SUM_E_WGT] = etemp[0];
331  SumValue[SUM_ESQ_WGT] = etemp[2];
332  SumValue[SUM_E_BARE] = etemp[0];
333  SumValue[SUM_ESQ_BARE] = etemp[2];
334  SumValue[SUM_ABSE_BARE] = 0.0;
335 }
336 
337 #ifdef HAVE_LMY_ENGINE
338 /** evaluate everything before optimization
339  *In future, both the LM and descent engines should be children of some parent engine base class.
340  * */
341 void QMCCostFunction::engine_checkConfigurations(cqmc::engine::LMYEngine<Return_t>* EngineObj,
342  DescentEngine& descentEngineObj,
343  const std::string& MinMethod)
344 {
345  if (MinMethod == "descent")
346  {
347  //Seem to need this line to get non-zero derivatives for traditional Jastrow parameters when using descent.
349  //Reset vectors and scalars from any previous iteration
351  }
352  RealType et_tot = 0.0;
353  RealType e2_tot = 0.0;
354 #pragma omp parallel reduction(+ : et_tot, e2_tot)
355  {
356  int ip = omp_get_thread_num();
357  MCWalkerConfiguration& wRef(*wClones[ip]);
358  if (RecordsOnNode[ip] == 0)
359  {
360  RecordsOnNode[ip] = new Matrix<Return_rt>;
361  RecordsOnNode[ip]->resize(wRef.numSamples(), SUM_INDEX_SIZE);
362  if (needGrads)
363  {
364  DerivRecords[ip] = new Matrix<Return_t>;
365  //DerivRecords[ip]->resize(wRef.numSamples(),NumOptimizables);
366  HDerivRecords[ip] = new Matrix<Return_rt>;
367  //HDerivRecords[ip]->resize(wRef.numSamples(),NumOptimizables);
368  }
369  }
370  else if (RecordsOnNode[ip]->size1() != wRef.numSamples())
371  {
372  RecordsOnNode[ip]->resize(wRef.numSamples(), SUM_INDEX_SIZE);
373  if (needGrads)
374  {
375  //DerivRecords[ip]->resize(wRef.numSamples(),NumOptimizables);
376  //HDerivRecords[ip]->resize(wRef.numSamples(),NumOptimizables);
377  }
378  }
379  // Populate local to global index mapping into psiClone internal component 'myVars',
380  // because psiClones persist between different sections and need update.
381  psiClones[ip]->checkOutVariables(OptVariablesForPsi);
382  // synchronize the random number generator with the node
383  (*MoverRng[ip]) = (*RngSaved[ip]);
384  hClones[ip]->setRandomGenerator(MoverRng[ip]);
385  //int nat = wRef.getTotalNum();
386  Return_rt e0 = 0.0;
387  // Return_t ef=0.0;
388  Return_rt e2 = 0.0;
389 
390 
391  for (int iw = 0, iwg = wPerRank[ip]; iw < wRef.numSamples(); ++iw, ++iwg)
392  {
393  wRef.loadSample(wRef, iw);
394  wRef.update();
395  Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
396  psiClones[ip]->evaluateDeltaLogSetup(wRef, saved[LOGPSI_FIXED], saved[LOGPSI_FREE], *dLogPsi[iwg],
397  *d2LogPsi[iwg]);
398  saved[REWEIGHT] = 1.0;
399  Return_rt etmp;
400  if (needGrads)
401  {
402  //allocate vector
403  Vector<Return_t> Dsaved(NumOptimizables, 0.0);
404  Vector<Return_t> HDsaved(NumOptimizables, 0.0);
405 
406  etmp = hClones[ip]->evaluateValueAndDerivatives(wRef, OptVariablesForPsi, Dsaved, HDsaved);
407 
408  // add non-differentiated derivative vector
409  std::vector<Return_t> der_rat_samp(NumOptimizables + 1, 0.0);
410  std::vector<Return_t> le_der_samp(NumOptimizables + 1, 0.0);
411 
412  // dervative vectors
413  der_rat_samp.at(0) = 1.0;
414  for (int i = 0; i < Dsaved.size(); i++)
415  der_rat_samp[i + 1] = Dsaved[i];
416 
417  // energy dervivatives
418  le_der_samp.at(0) = etmp;
419  for (int i = 0; i < HDsaved.size(); i++)
420  le_der_samp[i + 1] = HDsaved[i] + etmp * Dsaved[i];
421 
422 #ifdef HAVE_LMY_ENGINE
423  if (MinMethod == "adaptive")
424  {
425  // pass into engine
426  EngineObj->take_sample(der_rat_samp, le_der_samp, le_der_samp, 1.0, saved[REWEIGHT]);
427  }
428  else if (MinMethod == "descent")
429  {
430  //Could remove this copying over if LM engine becomes compatible with complex numbers
431  //so that der_rat_samp and le_der_samp are vectors of std::complex<double> when QMC_COMPLEX=1
432  std::vector<FullPrecValueType> der_rat_samp_comp(der_rat_samp.begin(), der_rat_samp.end());
433  std::vector<FullPrecValueType> le_der_samp_comp(le_der_samp.begin(), le_der_samp.end());
434 
435  descentEngineObj.takeSample(ip, der_rat_samp_comp, le_der_samp_comp, le_der_samp_comp, 1.0, saved[REWEIGHT]);
436  }
437 #endif
438  }
439  else
440  etmp = hClones[ip]->evaluate(wRef);
441 
442  e0 += saved[ENERGY_TOT] = etmp;
443  e2 += etmp * etmp;
444 
445  saved[ENERGY_FIXED] = saved[ENERGY_TOT];
446  const auto twf_dependent_components = hClones[ip]->getTWFDependentComponents();
447  for (const OperatorBase& component : twf_dependent_components)
448  saved[ENERGY_FIXED] -= component.getValue();
449  }
450 
451  //add them all using reduction
452  et_tot += e0;
453  e2_tot += e2;
454  // #pragma omp atomic
455  // eft_tot+=ef;
456  }
457 
458  // app_log() << " VMC Efavg = " << eft_tot/static_cast<Return_t>(wPerRank[NumThreads]) << endl;
459  //Need to sum over the processors
460  std::vector<Return_rt> etemp(3);
461  etemp[0] = et_tot;
462  etemp[1] = static_cast<Return_rt>(wPerRank[NumThreads]);
463  etemp[2] = e2_tot;
464  myComm->allreduce(etemp);
465  Etarget = static_cast<Return_rt>(etemp[0] / etemp[1]);
466  NumSamples = static_cast<int>(etemp[1]);
467  app_log() << " VMC Eavg = " << Etarget << std::endl;
468  app_log() << " VMC Evar = " << etemp[2] / etemp[1] - Etarget * Etarget << std::endl;
469  app_log() << " Total weights = " << etemp[1] << std::endl;
470 
471 
472 #ifdef HAVE_LMY_ENGINE
473  // engine finish taking samples
474  if (MinMethod == "adaptive")
475  {
476  EngineObj->sample_finish();
477 
478  if (EngineObj->block_first())
479  {
481  app_log() << "calling setComputed function" << std::endl;
482  }
483  }
484  else if (MinMethod == "descent")
485  {
486  descentEngineObj.sample_finish();
487  }
488 #endif
489 
490  app_log().flush();
491 
493  ReportCounter = 0;
494 }
495 #endif
496 
497 
498 void QMCCostFunction::resetPsi(bool final_reset)
499 {
501  for (int i = 0; i < equalVarMap.size(); ++i)
503  else
504  for (int i = 0; i < OptVariables.size(); ++i)
506  //cout << "######### QMCCostFunction::resetPsi " << std::endl;
507  //OptVariablesForPsi.print(std::cout);
508  //cout << "-------------------------------------- " << std::endl;
509 
511  for (int i = 0; i < psiClones.size(); ++i)
513 }
514 
516 {
517  for (int ip = 0; ip < NumThreads; ++ip)
518  {
519  // synchronize the random number generator with the node
520  (*MoverRng[ip]) = (*RngSaved[ip]);
521  hClones[ip]->setRandomGenerator(MoverRng[ip]);
522  }
523 
524  Return_rt wgt_tot = 0.0;
525  Return_rt wgt_tot2 = 0.0;
526  Return_rt inv_n_samples = 1.0 / NumSamples;
527 #pragma omp parallel reduction(+ : wgt_tot, wgt_tot2)
528  {
529  const int ip = omp_get_thread_num();
530  //if we have more than KE depending on TWF, TWF must be fully recomputed.
531  const bool compute_all_from_scratch = hClones[ip]->getTWFDependentComponents().size() > 1;
532 
533  MCWalkerConfiguration& wRef(*wClones[ip]);
534  Return_rt wgt_node = 0.0, wgt_node2 = 0.0;
535  for (int iw = 0, iwg = wPerRank[ip]; iw < wRef.numSamples(); ++iw, ++iwg)
536  {
537  wRef.loadSample(wRef, iw);
538  wRef.update(true);
539  Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
540  Return_rt logpsi;
541  logpsi = psiClones[ip]->evaluateDeltaLog(wRef, compute_all_from_scratch);
542  wRef.G += *dLogPsi[iwg];
543  wRef.L += *d2LogPsi[iwg];
544  Return_rt weight = saved[REWEIGHT] = vmc_or_dmc * (logpsi - saved[LOGPSI_FREE]);
545  if (needGrad)
546  {
548  Vector<Return_t> HDsaved(NumOptimizables, 0);
549 
551  Vector<Return_rt> rHDsaved(NumOptimizables, 0);
552 
553  saved[ENERGY_NEW] =
554  H_KE_Node[ip]->evaluateValueAndDerivatives(wRef, OptVariablesForPsi, Dsaved, HDsaved) + saved[ENERGY_FIXED];
555  ;
556 
557  for (int i = 0; i < NumOptimizables; i++)
558  {
559  rDsaved[i] = std::real(Dsaved[i]);
560  rHDsaved[i] = std::real(HDsaved[i]);
561  }
562 
563  for (int i = 0; i < NumOptimizables; i++)
565  {
566  (*DerivRecords[ip])(iw, i) = rDsaved[i];
567  (*HDerivRecords[ip])(iw, i) = rHDsaved[i];
568  }
569  }
570  else
571  saved[ENERGY_NEW] = H_KE_Node[ip]->evaluate(wRef) + saved[ENERGY_FIXED];
572  wgt_node += inv_n_samples * weight;
573  wgt_node2 += inv_n_samples * weight * weight;
574  }
575  wgt_tot += wgt_node;
576  wgt_tot2 += wgt_node2;
577  }
578  //this is MPI barrier
580  //collect the total weight for normalization and apply maximum weight
581  myComm->allreduce(wgt_tot);
582  myComm->allreduce(wgt_tot2);
583  // app_log()<<"Before Purge"<<wgt_tot<<" "<<wgt_tot2<< std::endl;
584  Return_rt wgtnorm = (wgt_tot == 0) ? 0 : wgt_tot;
585  wgt_tot = 0.0;
586  for (int ip = 0; ip < NumThreads; ip++)
587  {
588  int nw = wClones[ip]->numSamples();
589  for (int iw = 0; iw < nw; iw++)
590  {
591  Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
592  saved[REWEIGHT] =
593  std::min(std::exp(saved[REWEIGHT] - wgtnorm), std::numeric_limits<Return_rt>::max() * (RealType)0.1);
594  wgt_tot += inv_n_samples * saved[REWEIGHT];
595  }
596  }
597  myComm->allreduce(wgt_tot);
598  // app_log()<<"During Purge"<<wgt_tot<<" "<< std::endl;
599  wgtnorm = (wgt_tot == 0) ? 1 : 1.0 / wgt_tot;
600  wgt_tot = 0.0;
601  for (int ip = 0; ip < NumThreads; ip++)
602  {
603  int nw = wClones[ip]->numSamples();
604  for (int iw = 0; iw < nw; iw++)
605  {
606  Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
607  saved[REWEIGHT] = std::min(saved[REWEIGHT] * wgtnorm, MaxWeight);
608  wgt_tot += inv_n_samples * saved[REWEIGHT];
609  }
610  }
611  myComm->allreduce(wgt_tot);
612  // app_log()<<"After Purge"<<wgt_tot<<" "<< std::endl;
613  for (int i = 0; i < SumValue.size(); i++)
614  SumValue[i] = 0.0;
615  CSWeight = wgt_tot = (wgt_tot == 0) ? 1 : 1.0 / wgt_tot;
616  for (int ip = 0; ip < NumThreads; ip++)
617  {
618  int nw = wClones[ip]->numSamples();
619  for (int iw = 0; iw < nw; iw++)
620  {
621  const Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
622  // Return_t weight=saved[REWEIGHT]*wgt_tot;
623  Return_rt eloc_new = saved[ENERGY_NEW];
624  Return_rt delE = std::pow(std::abs(eloc_new - EtargetEff), PowerE);
625  SumValue[SUM_E_BARE] += eloc_new;
626  SumValue[SUM_ESQ_BARE] += eloc_new * eloc_new;
627  SumValue[SUM_ABSE_BARE] += delE;
628  SumValue[SUM_E_WGT] += eloc_new * saved[REWEIGHT];
629  SumValue[SUM_ESQ_WGT] += eloc_new * eloc_new * saved[REWEIGHT];
630  SumValue[SUM_ABSE_WGT] += delE * saved[REWEIGHT];
631  SumValue[SUM_WGT] += saved[REWEIGHT];
632  SumValue[SUM_WGTSQ] += saved[REWEIGHT] * saved[REWEIGHT];
633  }
634  }
635  //collect everything
638 }
639 
640 
642  Matrix<Return_rt>& Right)
643 {
644  ScopedTimer tmp_timer(fill_timer_);
645 
646  RealType b2(w_beta);
647 
648  Right = 0.0;
649  Left = 0.0;
650 
651  // resetPsi();
653  Return_rt curAvg2_w = SumValue[SUM_ESQ_WGT] / SumValue[SUM_WGT];
654  RealType V_avg = curAvg2_w - curAvg_w * curAvg_w;
655  std::vector<Return_t> D_avg(getNumParams(), 0.0);
656  Return_rt wgtinv = 1.0 / SumValue[SUM_WGT];
657  for (int ip = 0; ip < NumThreads; ip++)
658  {
659  int nw = wClones[ip]->numSamples();
660  for (int iw = 0; iw < nw; iw++)
661  {
662  const Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
663  Return_rt weight = saved[REWEIGHT] * wgtinv;
664  const Return_t* Dsaved = (*DerivRecords[ip])[iw];
665  for (int pm = 0; pm < getNumParams(); pm++)
666  {
667  D_avg[pm] += Dsaved[pm] * weight;
668  }
669  }
670  }
671 
672  myComm->allreduce(D_avg);
673 
674  for (int ip = 0; ip < NumThreads; ip++)
675  {
676  int nw = wClones[ip]->numSamples();
677  for (int iw = 0; iw < nw; iw++)
678  {
679  const Return_rt* restrict saved = (*RecordsOnNode[ip])[iw];
680  Return_rt weight = saved[REWEIGHT] * wgtinv;
681  Return_rt eloc_new = saved[ENERGY_NEW];
682  const Return_t* Dsaved = (*DerivRecords[ip])[iw];
683  const Return_rt* HDsaved = (*HDerivRecords[ip])[iw];
684 #pragma omp parallel for
685  for (int pm = 0; pm < getNumParams(); pm++)
686  {
687  Return_t wfe = (HDsaved[pm] + (Dsaved[pm] - D_avg[pm]) * eloc_new) * weight;
688  Return_t wfd = (Dsaved[pm] - D_avg[pm]) * weight;
689  Return_t vterm = HDsaved[pm] * (eloc_new - curAvg_w) +
690  (Dsaved[pm] - D_avg[pm]) * eloc_new * (eloc_new - RealType(2.0) * curAvg_w);
691  // Return_t vterm = (HDsaved[pm]+(Dsaved[pm]-D_avg[pm])*eloc_new -curAvg_w)*(eloc_new-curAvg_w);
692  // Variance
693  Left(0, pm + 1) += b2 * std::real(vterm) * weight;
694  Left(pm + 1, 0) += b2 * std::real(vterm) * weight;
695  // Hamiltonian
696  Left(0, pm + 1) += (1 - b2) * std::real(wfe);
697  Left(pm + 1, 0) += (1 - b2) * std::real(wfd) * eloc_new;
698  for (int pm2 = 0; pm2 < getNumParams(); pm2++)
699  {
700  // Hamiltonian
701  Left(pm + 1, pm2 + 1) +=
702  std::real((1 - b2) * std::conj(wfd) * (HDsaved[pm2] + (Dsaved[pm2] - D_avg[pm2]) * eloc_new));
703  // Overlap
704  RealType ovlij = std::real(std::conj(wfd) * (Dsaved[pm2] - D_avg[pm2]));
705  Right(pm + 1, pm2 + 1) += ovlij;
706  // Variance
707  RealType varij = weight *
708  std::real((HDsaved[pm] - RealType(2.0) * std::conj(Dsaved[pm] - D_avg[pm]) * eloc_new) *
709  (HDsaved[pm2] - RealType(2.0) * (Dsaved[pm2] - D_avg[pm2]) * eloc_new));
710  // RealType varij=weight*(HDsaved[pm] +(Dsaved[pm]-D_avg[pm])*eloc_new-curAvg_w)*
711  // (HDsaved[pm2] + (Dsaved[pm2]-D_avg[pm2])*eloc_new-curAvg_w);
712  Left(pm + 1, pm2 + 1) += b2 * (varij + V_avg * ovlij);
713  }
714  }
715  }
716  }
717  myComm->allreduce(Right);
718  myComm->allreduce(Left);
719  Left(0, 0) = (1 - b2) * curAvg_w + b2 * V_avg;
720  Right(0, 0) = 1.0;
721 
722  return 1.0;
723 }
724 } // namespace qmcplusplus
Return_rt Cost(bool needGrad=true) override
return the cost value for CGMinimization
std::vector< ParticleLaplacian * > d2LogPsi
Fixed Laplacian , , components.
bool recompute(int i) const
Definition: VariableSet.h:206
void barrier() const
A set of walkers that are to be advanced by Metropolis Monte Carlo.
static std::vector< TrialWaveFunction * > psiClones
Definition: CloneManager.h:65
void delete_iter(IT first, IT last)
delete the pointers in [first,last)
void checkConfigurations(EngineHandle &handle) override
evaluate everything before optimization
QMCTraits::RealType real
std::vector< int > wPerRank
Walkers per MPI rank.
Definition: CloneManager.h:91
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
int NumSamples
global number of samples to use in correlated sampling
QTBase::RealType RealType
Definition: Configuration.h:58
void sample_finish()
Function that reduces all vector information from all processors to the root processor.
void GradCost(std::vector< Return_rt > &PGradient, const std::vector< Return_rt > &PM, Return_rt FiniteDiff=0) override
~QMCCostFunction() override
Destructor.
MakeReturn< UnaryNode< FnFabs, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t abs(const Vector< T1, C1 > &l)
size_t getTotalNum() const
Definition: ParticleSet.h:493
static std::vector< MCWalkerConfiguration * > wClones
Definition: CloneManager.h:61
std::ostream & app_log()
Definition: OutputManager.h:65
std::vector< TinyVector< int, 2 > > equalVarMap
index mapping for <equal> constraints
Return_rt fillOverlapHamiltonianMatrices(Matrix< Return_rt > &Left, Matrix< Return_rt > &Right) override
int NumOptimizables
total number of optimizable variables
Collection of Local Energy Operators.
EffectiveWeight correlatedSampling(bool needGrad=true) override
run correlated sampling return effective walkers ( w_i)^2/(Nw * w^2_i)
QMCTraits::QTFull::RealType EffectiveWeight
Return_rt curAvg_w
current weighted average (correlated sampling)
opt_variables_type OptVariables
list of optimizables
UPtrVector< RandomBase< FullPrecRealType > > RngSaved
Random number generators.
void update(bool skipSK=false)
update the internal data
ParticleSet::ParticleLaplacian ParticleLaplacian
Communicate * Controller
Global Communicator for a process.
Definition: Communicate.cpp:35
ParticleSet::ParticleGradient ParticleGradient
Saved derivative properties and Hderivative properties of all the walkers.
ParticleLaplacian L
laplacians of the particles
Definition: ParticleSet.h:85
std::vector< std::unique_ptr< HamiltonianRef > > H_KE_Node
void prepareStorage(const int num_replicas, const int num_optimizables)
Prepare for taking samples.
T min(T a, T b)
void copy(const Array< T1, 3 > &src, Array< T2, 3 > &dest)
Definition: Blitz.h:639
Wrapping information on parallelism.
Definition: Communicate.h:68
void allreduce(T &)
MakeReturn< BinaryNode< FnPow, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t, typename CreateLeaf< Vector< T2, C2 > >::Leaf_t > >::Expression_t pow(const Vector< T1, C1 > &l, const Vector< T2, C2 > &r)
omp_int_t omp_get_thread_num()
Definition: OpenMP.h:25
std::vector< ParticleGradient * > dLogPsi
Fixed Gradients , , components.
omp_int_t omp_get_max_threads()
Definition: OpenMP.h:26
Communicate * myComm
pointer to Communicate
Definition: MPIObjectBase.h:62
std::vector< Matrix< Return_t > * > DerivRecords
Temp derivative properties and Hderivative properties of all the walkers.
Return_rt curAvg
current Average
ParticleGradient G
gradients of the particles
Definition: ParticleSet.h:83
NewTimer & createGlobalTimer(const std::string &myname, timer_levels mylevel)
opt_variables_type OptVariablesForPsi
full list of optimizables
const IndexType NumThreads
number of threads
Definition: CloneManager.h:54
void resetPsi(bool final_reset=false) override
reset the wavefunction
Return_rt w_en
weights for energy and variance in the cost function
Implements wave-function optimization.
MakeReturn< UnaryNode< FnExp, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t exp(const Vector< T1, C1 > &l)
ParticleSet & W
Particle set.
bool isEffectiveWeightValid(EffectiveWeight effective_weight) const
check the validity of the effective weight calculated by correlatedSampling
void getConfigurations(const std::string &aroot) override
size_type size() const
return the size
Definition: VariableSet.h:88
float conj(const float &c)
Workaround to allow conj on scalar to return real instead of complex.
Declaration of a TrialWaveFunction.
int ReportCounter
counter for output
static std::vector< QMCHamiltonian * > hClones
Definition: CloneManager.h:71
An abstract class for Local Energy operators.
Definition: OperatorBase.h:59
QMCTraits::RealType RealType
Class to represent a many-body trial wave function.
std::vector< Return_rt > SumValue
Sum of energies and weights for averages.
void resetOptimizableObjects(TrialWaveFunction &psi, const opt_variables_type &opt_variables) const
TrialWaveFunction & Psi
Trial function.
int getNumParams() const override
return the number of optimizable parameters
QMCCostFunction(MCWalkerConfiguration &w, TrialWaveFunction &psi, QMCHamiltonian &h, Communicate *comm)
Constructor.
void loadSample(ParticleSet &pset, size_t iw) const
load a single sample from SampleStack
Return_rt MaxWeight
maximum weight beyond which the weight is set to 1
std::vector< Matrix< Return_rt > * > HDerivRecords
int PowerE
|E-E_T|^PowerE is used for the cost function
void takeSample(const int replica_id, const std::vector< FullPrecValueType > &der_rat_samp, const std::vector< FullPrecValueType > &le_der_samp, const std::vector< FullPrecValueType > &ls_der_samp, ValueType vgs_samp, ValueType weight_samp)
Function that Take Sample Data from the Host Code.
Return_rt curVar_w
current weighted variance (correlated sampling)
Declaration of a MCWalkerConfiguration.
std::vector< Matrix< Return_rt > * > RecordsOnNode
BareKineticEnergy::Return_t Return_t
Return_rt EtargetEff
real target energy with the Correlation Factor
std::vector< RandomBase< FullPrecRealType > * > MoverRng