QMCPACK
QPParser.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Anouar Benali, benali@anl.gov, Argonne National Laboratory
8 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
9 //
10 // File created by: Anouar Benali, benali@anl.gov, Argonne National Laboratory
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 
14 #include "QPParser.h"
15 #include <fstream>
16 #include <iterator>
17 #include <algorithm>
18 #include <set>
19 #include <map>
20 
21 
23 {
24  basisName = "Gaussian";
25  Normalized = "no";
26  BohrUnit = true;
27  MOtype = "Canonical";
28  angular_type = "cartesian";
29  readtype = 0;
30  NFZC = 0;
31  numAO = 0;
32 }
33 
34 QPParser::QPParser(int argc, char** argv) : QMCGaussianParserBase(argc, argv)
35 {
36  basisName = "Gaussian";
37  Normalized = "no";
38  BohrUnit = true;
39  MOtype = "Canonical";
40  angular_type = "cartesian";
41  SpinRestricted = true;
42  readtype = 0;
43  NFZC = 0;
44  numAO = 0;
45 }
46 
47 void QPParser::parse(const std::string& fname)
48 {
49  std::ifstream fin(fname.c_str());
50  pivot_begin = fin.tellg();
51  std::string aline;
52 
53  search(fin, "do_pseudo", aline);
54  parsewords(aline.c_str(), currentWords);
55  if (currentWords[1] == "True")
56  {
57  ECP = true;
58  }
59  else
60  {
61  ECP = false;
62  }
63  std::cout << "usingECP: " << (ECP ? ("yes") : ("no")) << std::endl;
64  std::cout.flush();
65 
66  search(fin, "multi_det", aline);
67  parsewords(aline.c_str(), currentWords);
68  if (currentWords[1] == "True")
69  multideterminant = true;
70  else
71  multideterminant = false;
72 
73  std::cout << "Multideterminant: " << (multideterminant ? ("yes") : ("no")) << std::endl;
74 
75 
76  search(fin, "ao_num", aline);
77  parsewords(aline.c_str(), currentWords);
78  numAO = atoi(currentWords[1].c_str());
79  std::cout << "NUMBER OF AOs: " << numAO << std::endl;
81  std::cout << "Size of Basis Set: " << SizeOfBasisSet << std::endl;
82 
83  search(fin, "mo_num", aline);
84  parsewords(aline.c_str(), currentWords);
85  numMO = atoi(currentWords[1].c_str());
86  std::cout << "NUMBER OF MOs: " << numMO << std::endl;
87 
88 
89  search(fin, "elec_alpha_num", aline);
90  parsewords(aline.c_str(), currentWords);
91  NumberOfAlpha = atoi(currentWords[1].c_str());
92  std::cout << "Number of alpha electrons: " << NumberOfAlpha << std::endl;
93  search(fin, "elec_beta_num", aline);
94  parsewords(aline.c_str(), currentWords);
95  NumberOfBeta = atoi(currentWords[1].c_str());
96  std::cout << "Number of beta electrons: " << NumberOfBeta << std::endl;
97 
98 
99  search(fin, "elec_tot_num", aline);
100  parsewords(aline.c_str(), currentWords);
101  NumberOfEls = atoi(currentWords[1].c_str());
102  std::cout << "Number of electrons: " << NumberOfEls << std::endl;
103 
104 
105  search(fin, "spin_multiplicity", aline);
106  parsewords(aline.c_str(), currentWords);
107  SpinMultiplicity = atoi(currentWords[1].c_str());
108  std::cout << "SPIN MULTIPLICITY: " << SpinMultiplicity << std::endl;
109 
110  SpinRestricted = true;
111 
112 
113  search(fin, "nucl_num", aline);
114  parsewords(aline.c_str(), currentWords);
115  NumberOfAtoms = atoi(currentWords[1].c_str());
116  std::cout << "NUMBER OF ATOMS: " << NumberOfAtoms << std::endl;
117 
118 
120  GroupName.resize(NumberOfAtoms);
121  getGeometry(fin);
122  fin.seekg(pivot_begin);
123 
124  getGaussianCenters(fin);
125  fin.seekg(pivot_begin);
126  MOtype = "Canonical";
127  readtype = 0;
128  getMO(fin);
129  fin.close();
130 
131 
132  if (multideterminant)
133  {
134  fin.open(fname.c_str());
135  QP = true;
136  pivot_begin = fin.tellg();
137  search(fin, "BEGIN_DET", aline);
138  getwords(currentWords, fin); //Empty Line
139  getwords(currentWords, fin); //mo_num
140  getwords(currentWords, fin); //Number Of determinants
141  std::cout << "Found " << currentWords[1] << " Determinants" << std::endl;
142  ci_size = atoi(currentWords[1].c_str());
143  NFZC = 0;
144  NAC = numMO;
145  NEXT = 0;
146  NTOT = NEXT + NAC;
147  std::cout << "# core, #active, #external: " << NFZC << " " << NAC << " " << NEXT << std::endl;
148  //fin.seekg(pivot_begin);
149  getQPCI(fin);
150  }
151 }
152 
153 void QPParser::getGeometry(std::istream& is)
154 {
155  //atomic numbers
156  std::vector<int> atomic_number, core;
157  std::vector<double> q, pos;
158  int natms = 0;
159  tags.clear();
160  is.seekg(pivot_begin);
161  //read atomic info
162  bool notfound = true;
163 
164  do
165  {
166  if (is.eof())
167  {
168  std::cerr << "Could not find atomic coordinates. \n";
169  abort();
170  }
171  getwords(currentWords, is);
172  if (currentWords.size() < 4)
173  continue;
174  if (currentWords.size() == 4)
175  if (currentWords[0] == "Atomic" && currentWords[1] == "coord" && currentWords[2] == "in" &&
176  currentWords[3] == "Bohr")
177  {
178  notfound = false;
179  getwords(currentWords, is);
180  while (currentWords.size() != 0)
181  {
182  if (currentWords[0] == "BEGIN_BASIS_SET")
183  break;
184  natms++;
185  double z = atof(currentWords[1].c_str());
186  int zint = (int)z; // is this dangerous???
187  atomic_number.push_back(zint);
188  q.push_back(z); // if using ECPs, change below
189  tags.push_back(currentWords[0]);
190  pos.push_back(atof(currentWords[2].c_str()));
191  pos.push_back(atof(currentWords[3].c_str()));
192  pos.push_back(atof(currentWords[4].c_str()));
193  getwords(currentWords, is);
194  }
195  }
196  } while (notfound);
197  if (natms != NumberOfAtoms)
198  {
199  std::cerr << "Could not find atomic coordinates for all atoms. \n";
200  abort();
201  }
202  //ECP PART!!!
203  if (ECP == true)
204  {
205  is.seekg(pivot_begin);
206  notfound = true;
207 
208  while (notfound)
209  {
210  if (is.eof())
211  {
212  std::cerr << "Problem looking for ECPs, this should not happen. Contact developers for help. \n";
213  abort();
214  }
215  getwords(currentWords, is);
216  // this should appear below the ECP section in the output file
217  // so use this to avoid going all the way to the bottom
218  if (currentWords.size() == 0)
219  continue;
220  if (currentWords[0] == "BEGIN_PSEUDO")
221  {
222  core.resize(NumberOfAtoms);
223  // getwords(currentWords,is); // -------------
224  bool done = false;
225  while (!done)
226  {
227  if (is.eof())
228  {
229  std::cerr << "2 Found ECPs, but problem looking ZCORE data.\n";
230  abort();
231  }
232  getwords(currentWords, is);
233  if (currentWords.size() == 0)
234  continue;
235  if (currentWords.size() == 1)
236  {
237  if (currentWords[0] == "END_PSEUDO")
238  done = true;
239  }
240  if (currentWords[0] == "PARAMETERS" && currentWords[1] == "FOR")
241  {
242  //done=true;
243  std::vector<std::string>::iterator it, it0;
244  it = find(currentWords.begin(), currentWords.end(), "ZCORE");
245  it0 = find(currentWords.begin(), currentWords.end(), "ATOM");
246  if (it0 == currentWords.end())
247  {
248  std::cerr << "Problem with ECP data. Didn't found ATOM tag\n";
249  std::cerr << is.rdbuf() << std::endl;
250  abort();
251  }
252  it0++;
253  int nq0 = atoi(it0->c_str()) - 1;
254  if (it != currentWords.end())
255  {
256  it++;
257  core[nq0] = atoi(it->c_str());
258  q[nq0] -= core[nq0];
259 
260  std::cout << "1 Found ECP for atom " << nq0 << " with zcore " << core[nq0] << std::endl;
261  }
262  else
263  {
264  it = find(currentWords.begin(), currentWords.end(), "ATOM");
265  if (it == currentWords.end())
266  {
267  std::cerr << "Problem with ECP data. Didn't found ATOM tag\n";
268  std::cerr << "Atom: " << nq0 << std::endl;
269  abort();
270  }
271  std::vector<std::string>::iterator it2 = it;
272  it2++;
273  int nq = atoi(it2->c_str());
274  if (nq != nq0 + 1)
275  {
276  std::cerr << "Problem with ECP data. ID's don't agree\n";
277  std::cerr << "Atom: " << nq0 << std::endl;
278  abort();
279  }
280  it = find(it2, currentWords.end(), "ATOM");
281  if (it == currentWords.end())
282  {
283  std::cerr << "Problem with ECP data (2).\n";
284  std::cerr << "Atom: " << nq0 << std::endl;
285  abort();
286  }
287  nq = atoi((it + 1)->c_str());
288  core[nq0] = core[nq - 1];
289  q[nq0] -= core[nq0];
290  std::cout << "2 Found ECP for atom " << nq0 << " with zcore " << core[nq0] << std::endl;
291  }
292  }
293  }
294  notfound = false;
295  }
296  else
297  {
298  if (currentWords.size() < 2)
299  continue;
300  if (currentWords[0] == "END_PSEUDO")
301  break;
302  }
303  }
304  }
305 
306 
307  SpeciesSet& species(IonSystem.getSpeciesSet());
308  for (int i = 0, ii = 0; i < NumberOfAtoms; i++)
309  {
310  IonSystem.R[i][0] = pos[ii++];
311  IonSystem.R[i][1] = pos[ii++];
312  IonSystem.R[i][2] = pos[ii++];
313  GroupName[i] = IonName[atomic_number[i]];
314  int speciesID = species.addSpecies(GroupName[i]);
315  IonSystem.GroupID[i] = speciesID;
316  species(AtomicNumberIndex, speciesID) = atomic_number[i];
317  species(IonChargeIndex, speciesID) = q[i];
318  }
319 }
320 
321 void QPParser::getGaussianCenters(std::istream& is)
322 {
323  std::string Shell_temp;
324  gBound.resize(NumberOfAtoms + 1);
325  int ng;
326  std::string aline;
327  std::map<std::string, int> basisDataMap;
328  int nUniqAt = 0;
329  for (int i = 0; i < NumberOfAtoms; i++)
330  {
331  std::map<std::string, int>::iterator it(basisDataMap.find(tags[i]));
332  if (it == basisDataMap.end())
333  {
334  basisDataMap[tags[i]] = nUniqAt++;
335  }
336  }
337  std::vector<std::vector<double>> expo(nUniqAt), coef(nUniqAt), coef2(nUniqAt);
338  std::vector<int> nshll(nUniqAt, 0); //use this to
339  std::vector<std::vector<int>> ncoeffpershell(nUniqAt);
340  std::vector<std::vector<std::string>> shID(nUniqAt);
341  std::map<std::string, int> gsMap;
342  gsMap[std::string("S")] = 1;
343  gsMap[std::string("SP")] = 2;
344  gsMap[std::string("P")] = 3;
345  gsMap[std::string("D")] = 4;
346  gsMap[std::string("F")] = 5;
347  gsMap[std::string("G")] = 6;
348  gsMap[std::string("H")] = 7;
349  gsMap[std::string("I")] = 8;
350  is.seekg(pivot_begin);
351  bool found = false;
352  while (!found)
353  {
354  if (is.eof())
355  {
356  std::cerr << "Problem with basis set data.\n";
357  abort();
358  }
359  getwords(currentWords, is);
360  if (currentWords.size() > 2)
361  continue;
362  if (currentWords[0] == "BEGIN_BASIS_SET")
363  found = true;
364  }
365 
366 
367  is.seekg(pivot_begin);
368 
369 
370  int currPos = -1;
371  lookFor(is, "BEGIN_BASIS_SET");
372  int NbCoeffperShell = 0;
373  getwords(currentWords, is);
374  bool allbase = true;
375  while (allbase == true)
376  {
377  if (currentWords.empty())
378  getwords(currentWords, is);
379  if (currentWords.size() == 1)
380  {
381  std::map<std::string, int>::iterator it(basisDataMap.find(currentWords[0]));
382  if (it == basisDataMap.end())
383  {
384  std::cerr << "Error in parser.\n";
385  abort();
386  }
387  currPos = it->second;
388  nshll[currPos] = 0;
389  ncoeffpershell[currPos].clear();
390  ncoeffpershell[currPos].push_back(0);
391  shID[currPos].clear();
392  shID[currPos].push_back("NONE");
393 
394  bool group = true;
395  do
396  {
397  getwords(currentWords, is);
398  if (currentWords[0] == "S" || currentWords[0] == "P" || currentWords[0] == "D" || currentWords[0] == "F" ||
399  currentWords[0] == "G" || currentWords[0] == "H" || currentWords[0] == "I") //empty line after the shell
400  {
401  shID[currPos].push_back(currentWords[0]);
402  Shell_temp = currentWords[0];
403  NbCoeffperShell = atoi(currentWords[1].c_str());
404  ncoeffpershell[currPos][nshll[currPos]] = NbCoeffperShell;
405  for (int nbcoef = 0; nbcoef < NbCoeffperShell; nbcoef++)
406  {
407  getwords(currentWords, is);
408  expo[currPos].push_back(atof(currentWords[1].c_str()));
409  coef[currPos].push_back(atof(currentWords[2].c_str()));
410  shID[currPos][nshll[currPos]] = Shell_temp;
411  if (debug)
412  {
413  std::cout << currPos << ":" << expo[currPos].back() << " " << coef[currPos].back() << " "
414  << ncoeffpershell[currPos][nshll[currPos]] << " " << shID[currPos][nshll[currPos]] << std::endl;
415  }
416  }
417  nshll[currPos]++;
418  ncoeffpershell[currPos].push_back(0);
419  shID[currPos].push_back("NONE");
420  }
421  else
422  {
423  if (currentWords[0] == "END_BASIS_SET")
424  {
425  ng = SizeOfBasisSet;
426  group = false;
427  allbase = false;
428  break;
429  }
430  else
431  {
432  break;
433  }
434  }
435  } while (group == true);
436  }
437  else
438  {
439  std::cerr << "error in parser" << std::endl;
440  abort();
441  }
442  }
443 
444  gShell.clear();
445  gNumber.clear();
446  gExp.clear();
447  gC0.clear();
448  gC1.clear();
449  int gtot = 0;
450 
451  for (int i = 0; i < NumberOfAtoms; i++)
452  {
453  std::map<std::string, int>::iterator it(basisDataMap.find(tags[i]));
454  if (it == basisDataMap.end())
455  {
456  std::cerr << "Error in parser.\n";
457  abort();
458  }
459  gBound[i] = gtot;
460  int indx = it->second;
461  gtot += nshll[indx];
462  for (int k = 0; k < nshll[indx]; k++)
463  {
464  gShell.push_back(gsMap[shID[indx][k]]);
465  }
466  for (int k = 0; k < nshll[indx]; k++)
467  gNumber.push_back(ncoeffpershell[indx][k]);
468  for (int k = 0; k < expo[indx].size(); k++)
469  gExp.push_back(expo[indx][k]);
470  for (int k = 0; k < coef[indx].size(); k++)
471  gC0.push_back(coef[indx][k]);
472  }
473  gBound[NumberOfAtoms] = gtot;
474 }
475 
476 void QPParser::getMO(std::istream& is)
477 {
478  EigVal_alpha.resize(numMO);
479  EigVal_beta.resize(numMO);
480  EigVec.resize(2 * SizeOfBasisSet * numMO);
481  std::string aline;
482  search(is, "BEGIN_MO");
483  getwords(currentWords, is); // empty line
484 
485  std::vector<double> dummy(50);
487  std::streampos pivot;
488  pivot = is.tellg();
489  std::vector<std::string> CartLabels(SizeOfBasisSet);
490  getwords(currentWords, is);
491  for (int k = 0; k < SizeOfBasisSet; k++)
492  {
493  getwords(currentWords, is);
494  CartLabels[k] = currentWords[0];
495  }
496 
497  is.seekg(pivot);
498 
499  getMO_single_set(is, CartMat, EigVal_alpha);
500  int cnt = 0;
501  for (int i = 0; i < numMO; i++)
502  for (int k = 0; k < SizeOfBasisSet; k++)
503  EigVec[cnt++] = CartMat[i][k];
504 
505  // beta states for now
506  if (!SpinRestricted)
507  {
508  search(is, "BEGIN_MO");
509  getwords(currentWords, is); // empty line
510  getMO_single_set(is, CartMat, EigVal_beta);
511  }
512 
513  for (int i = 0; i < numMO; i++)
514  for (int k = 0; k < SizeOfBasisSet; k++)
515  EigVec[cnt++] = CartMat[i][k];
516  std::cout << "Finished reading MO." << std::endl;
517 }
518 
519 void QPParser::getMO_single_set(std::istream& is, Matrix<double>& CartMat, std::vector<value_type>& EigVal)
520 {
521  int nq = numMO / 4;
522  int rem = numMO % 4;
523  int cnt = 0;
524  for (int i = 0; i < nq; i++)
525  {
526  getwords(currentWords, is);
527  for (int k = 0; k < SizeOfBasisSet; k++)
528  {
529  getwords(currentWords, is);
530  if (currentWords.size() == 6)
531  {
532  CartMat[cnt][k] = atof(currentWords[2].c_str());
533  CartMat[cnt + 1][k] = atof(currentWords[3].c_str());
534  CartMat[cnt + 2][k] = atof(currentWords[4].c_str());
535  CartMat[cnt + 3][k] = atof(currentWords[5].c_str());
536  }
537  else
538  {
539  std::cerr << "Problem reading orbitals!!" << std::endl;
540  abort();
541  }
542  }
543  getwords(currentWords, is);
544  cnt += 4;
545  }
546  if (rem > 0)
547  {
548  getwords(currentWords, is);
549 
550  for (int k = 0; k < SizeOfBasisSet; k++)
551  {
552  getwords(currentWords, is);
553  for (int i = 0; i < rem; i++)
554  {
555  CartMat[cnt + i][k] = atof(currentWords[2 + i].c_str());
556  }
557  }
558  getwords(currentWords, is);
559  }
560 }
561 
562 
563 void QPParser::getQPCI(std::istream& is)
564 {
565  CIcoeff.clear();
566  CIalpha.clear();
567  CIbeta.clear();
568  CIcoeff.resize(ci_size);
569  CIalpha.resize(ci_size);
570  CIbeta.resize(ci_size);
571 
572 
573  for (int i = 0; i < ci_size; i++)
574  {
575  getwords(currentWords, is); //Empty Line
576  if (currentWords[0] == "END_DET")
577  {
578  std::cout << "Done reading determinants" << std::endl;
579  break;
580  }
581 
582  getwords(currentWords, is); //Coeff
583  CIcoeff[i] = atof(currentWords[0].c_str());
584 
585  //Alpha spin
586  getwords(currentWords, is);
587  CIalpha[i] = currentWords[0];
588  //Beta spin
589  getwords(currentWords, is);
590  CIbeta[i] = currentWords[0];
591  }
592  ci_nea = ci_neb = 0;
593  for (int i = 0; i < CIalpha[0].size(); i++)
594  if (CIalpha[0].at(i) == '1')
595  ci_nea++;
596  for (int i = 0; i < CIbeta[0].size(); i++)
597  if (CIbeta[0].at(i) == '1')
598  ci_neb++;
599  if (CIalpha[0].size() != CIbeta[0].size())
600  {
601  std::cerr << "QMCPACK can't handle different number of active orbitals in alpha and beta channels right now. "
602  "Contact developers for help (Miguel).\n";
603  abort();
604  }
605  // int ds=SpinMultiplicity-1;
606  // int neb= (NumberOfEls-ds)/2;
607  // int nea= NumberOfEls-NumberOfBeta;
608  ci_nca = 0;
609  ci_ncb = 0;
610  std::cout << " Done reading CIs!!" << std::endl;
611  ci_nstates = CIalpha[0].size();
612 }
std::vector< value_type > EigVal_beta
std::vector< std::string > currentWords
Definition: SimpleParser.h:49
int NAC
Definition: QPParser.h:35
std::vector< double > CIcoeff
void getMO(std::istream &is)
Definition: QPParser.cpp:476
int NEXT
Definition: QPParser.h:35
std::vector< std::string > tags
Definition: QPParser.h:32
std::string MOtype
Definition: QPParser.h:33
std::vector< std::string > GroupName
void parse(const std::string &fname)
Definition: QPParser.cpp:47
ParticleIndex GroupID
Species ID.
Definition: ParticleSet.h:77
std::vector< value_type > EigVal_alpha
int NFZC
Definition: QPParser.h:35
std::vector< value_type > gC0
int readtype
Definition: QPParser.h:34
unsigned parsewords(const char *inbuf, std::vector< std::string > &slist, const std::string &extra_tokens)
int numAO
Definition: QPParser.h:34
std::vector< value_type > EigVec
QPParser()
Definition: QPParser.cpp:22
int search(std::istream &is, const std::string &keyword)
Definition: SimpleParser.h:89
ParticlePos R
Position.
Definition: ParticleSet.h:79
std::vector< value_type > gExp
static std::map< int, std::string > IonName
SpeciesSet & getSpeciesSet()
retrun the SpeciesSet of this particle set
Definition: ParticleSet.h:231
std::vector< std::string > CIalpha
void create(const std::vector< int > &agroup)
create grouped particles
int NTOT
Definition: QPParser.h:35
bool lookFor(std::istream &is, const std::string &keyword)
Definition: SimpleParser.h:130
constexpr double done
Definition: BLAS.hpp:48
std::streampos pivot_begin
Definition: QPParser.h:31
std::vector< int > gNumber
Custom container for set of attributes for a set of species.
Definition: SpeciesSet.h:33
int getwords(std::vector< std::string > &slist, std::istream &fp, std::string &aline)
std::vector< std::string > CIbeta
std::vector< value_type > gC1
void getGeometry(std::istream &is)
Definition: QPParser.cpp:153
void getMO_single_set(std::istream &is, Matrix< double > &CartMat, std::vector< value_type > &EigVal_alpha)
Definition: QPParser.cpp:519
void getQPCI(std::istream &is)
Definition: QPParser.cpp:563
void getGaussianCenters(std::istream &is)
Definition: QPParser.cpp:321