QMCPACK
SimpleParser.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
8 // Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
9 // Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
10 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
11 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
12 // Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
13 //
14 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
15 //////////////////////////////////////////////////////////////////////////////////////
16 
17 
18 #include <array>
19 #include <cstdio>
20 #include <cstring>
21 #include <iterator>
22 #include <regex>
23 #include <string>
24 
25 #include "SimpleParser.h"
26 #include <algorithm>
27 
28 char* readLine(char* s, int max, std::istream& fp)
29 {
30  char ch;
31  int i = 0;
32  while (fp.get(ch) && !(ch == '\n' || ch == ';'))
33  {
34  if (ch == '\\') // line continuation character
35  {
36  // check if backslash is followed by a newline
37  fp.get(ch);
38  if (ch == '\n')
39  {
40  // backslash followed by newline, do nothing
41  }
42  else
43  {
44  // backslash not followed by newline
45  if (i < max - 1)
46  s[i++] = '\\';
47  if (i < max - 1)
48  s[i++] = ch; //
49  }
50  }
51  else
52  {
53  if (i < max - 1)
54  s[i++] = ch;
55  }
56  }
57  if (max > 0)
58  s[i] = '\0'; // add terminating NULL
59  if (!(ch == '\n' || ch == ';'))
60  return nullptr; // return NULL for end of file
61  return s;
62 }
63 
64 
65 // NOTE that it only adds strings
66 unsigned parsewords(const char* inbuf, std::vector<std::string>& slist, const std::string& extra_tokens /* = "" */)
67 {
68  std::string token = "=, \t\n\"";
69  token.append(extra_tokens);
70 
71  std::string tmpstr(inbuf);
72  slist.clear();
73  int num = 0;
74  char* tokenp = strtok(tmpstr.data(), token.c_str());
75  while (tokenp && tokenp[0] != '#')
76  {
77  num++;
78  slist.push_back(std::string(tokenp));
79  tokenp = strtok(nullptr, token.c_str());
80  }
81  return num;
82 }
83 
84 unsigned parsewords(const char* inbuf, std::list<std::string>& slist)
85 {
86  const char* token = "=, \t\n";
87  std::string tmpstr(inbuf);
88  slist.clear();
89  unsigned num = 0;
90  char* tokenp = strtok(tmpstr.data(), token);
91  while (tokenp && tokenp[0] != '#')
92  {
93  num++;
94  slist.push_back(std::string(tokenp));
95  tokenp = strtok(nullptr, token);
96  }
97  return num;
98 }
99 
100 int getwords(std::vector<std::string>& slist, std::istream& fp, std::string& aline)
101 {
102  const int max = 1024;
103  char s[max];
104  if (readLine(s, max, fp))
105  {
106  aline.clear();
107  aline.append(s);
108  return parsewords(s, slist);
109  }
110  else
111  return -1;
112 }
113 
114 /* dummy argument present so function's type signature can be distinguished
115  from previous function */
116 
117 int getwords(std::vector<std::string>& slist,
118  std::istream& fp,
119  int dummy /* = 0*/,
120  const std::string& extra_tokens /* ="" */)
121 {
122  const int max = 1024;
123  char s[max];
124  if (readLine(s, max, fp))
125  return parsewords(s, slist, extra_tokens);
126  else
127  return -1;
128 }
129 
130 // Variation of getwords that splits merged numbers due to fortran fixed format
131 // Handles unambiguous cases with minus signs only "123-456" -> "123 -456"
132 int getwordsWithMergedNumbers(std::vector<std::string>& slist,
133  std::istream& fp,
134  int dummy /* = 0*/,
135  const std::string& extra_tokens /* ="" */)
136 {
137  const int max = 1024;
138  char s[max];
139  if (readLine(s, max, fp))
140  {
141  std::regex dash("-");
142  const std::string space_dash(" -");
143  std::string merged(s);
144  std::string unmerged = std::regex_replace(merged, dash, space_dash, std::regex_constants::format_default);
145  return parsewords(unmerged.c_str(), slist, extra_tokens);
146  }
147  else
148  {
149  return -1;
150  }
151 }
152 
153 void readXmol(std::istream& fxmol, double* data, int numvar)
154 {
155  std::vector<std::string> slist;
156  getwords(slist, fxmol);
157  unsigned natom = atoi(slist.front().c_str());
158  getwords(slist, fxmol);
159  int ii = 0;
160  for (int i = 0; i < natom; i++)
161  {
162  getwords(slist, fxmol);
163  for (int ivar = 1; ivar <= numvar; ivar++)
164  {
165  data[ii++] = atof(slist[ivar].c_str());
166  }
167  }
168 }
169 
170 
171 /* \fn
172 int getwords(std::vector<std::string>& slist,std::istream& fpos, const char* field, const char* terminate)
173 * \param slist, input strings between <field> </field>
174 * \param fpos std::istream
175 * \param field <filed> data </field>
176 * \param terminate std::string to stop searching
177 */
178 
179 int getwords(std::vector<std::string>& slist, std::istream& fpos, const char* field, const char* terminate)
180 {
181  slist.clear();
182  std::array<char, 128> end_key;
183  if (std::snprintf(end_key.data(), end_key.size(), "</%s>", field) < 0)
184  throw std::runtime_error("Error extract end_key from field.");
185 
186  std::vector<std::string> vlist;
187  while (true)
188  {
189  if (getwords(vlist, fpos) == 0)
190  continue;
191  if (vlist[0] == terminate || vlist[0] == end_key.data())
192  break;
193  slist.insert(slist.end(), std::make_move_iterator(vlist.begin()), std::make_move_iterator(vlist.end()));
194  };
195  return slist.size();
196 }
197 
198 ////////////////////////////////////////////////////////
199 // simple parser to get around XML parser problem
200 ////////////////////////////////////////////////////////
201 unsigned parseXwords(const char* inbuf, std::vector<std::string>& slist)
202 {
203  slist.clear();
204 
205  const char* token = "=, <>\"\t\n";
206  std::string tmpstr(inbuf);
207  unsigned num = 0;
208  char* tokenp = strtok(tmpstr.data(), token);
209  while (tokenp && tokenp[0] != '#')
210  {
211  num++;
212  slist.push_back(std::string(tokenp));
213  tokenp = strtok(nullptr, token);
214  }
215  return num;
216 }
217 
218 int getXwords(std::vector<std::string>& slist, std::istream& fp)
219 {
220  const int max = 1024;
221  char s[max];
222  if (readLine(s, max, fp))
223  return parseXwords(s, slist);
224  else
225  return -1;
226 }
char * readLine(char *s, int max, std::istream &fp)
unsigned parsewords(const char *inbuf, std::vector< std::string > &slist, const std::string &extra_tokens)
int getXwords(std::vector< std::string > &slist, std::istream &fp)
void readXmol(std::istream &fxmol, double *data, int numvar)
int getwordsWithMergedNumbers(std::vector< std::string > &slist, std::istream &fp, int dummy, const std::string &extra_tokens)
int getwords(std::vector< std::string > &slist, std::istream &fp, std::string &aline)
unsigned parseXwords(const char *inbuf, std::vector< std::string > &slist)