QMCPACK
XmlRep.cpp
Go to the documentation of this file.
1 /////////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois / NCSA Open Source License.
3 // See LICENSE file in top directory for details .
4 //
5 // Copyright ( c ) 2018 QMCPACK developers
6 //
7 // File developed by : Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
8 //
9 // File created by : Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
10 /////////////////////////////////////////////////////////////////////////////////////////
11 
12 #include "XmlRep.h"
13 #include <ctype.h>
14 #include <stdio.h>
15 #include <string.h>
16 using namespace std;
17 
18 string XmlStream::getTagName(const string& tagstr, tagType type) const
19 {
20  string tagName;
21 
22  size_t spaceLoc = tagstr.find(" ");
23  size_t closeLoc = tagstr.find('>');
24  size_t slashLoc = string::npos;
25 
26  if (type == tagType::selfClosing)
27  {
28  slashLoc = tagstr.find("/");
29  }
30 
31  int endChar = tagstr.size();
32  if (spaceLoc != string::npos)
33  {
34  endChar = spaceLoc;
35  }
36  if (closeLoc < endChar && closeLoc != string::npos)
37  {
38  endChar = closeLoc;
39  }
40  if (slashLoc < endChar && slashLoc != string::npos)
41  {
42  endChar = slashLoc;
43  }
44 
45  if (type == tagType::closing)
46  {
47  endChar -= 2;
48  tagName = tagstr.substr(2, endChar);
49  }
50  else
51  {
52  endChar -= 1;
53  tagName = tagstr.substr(1, endChar);
54  }
55  return tagName;
56 }
57 
58 int XmlStream::startComment(long position, long length) const
59 {
60  int isCommentStart = 0;
61  std::streampos curLoc = stream_->tellg();
62  if ((length - position) > 4)
63  {
64  char buf[4];
65  stream_->read(buf, 4);
66  if (strncmp(buf, "<!--", 4) == 0)
67  {
68  isCommentStart = 1;
69  }
70  }
71  stream_->seekg(curLoc);
72  return isCommentStart;
73 }
74 
75 int XmlStream::endComment(long position, long length) const
76 {
77  int isCommentEnd = 0;
78  std::streampos curLoc = stream_->tellg();
79  if ((length - position) > 3)
80  {
81  char buf[3];
82  stream_->read(buf, 3);
83  if (strncmp(buf, "-->", 3) == 0)
84  {
85  isCommentEnd = 1;
86  }
87  }
88  stream_->seekg(curLoc);
89  return isCommentEnd;
90 }
91 
92 int XmlStream::checkForPOD(const XmlElement& before, const XmlElement& after) const
93 {
94  int foundPOD = 0;
95  std::streampos curLoc = stream_->tellg();
96 
97  stream_->seekg(before.endLoc);
98 
99  long length = after.startLoc - before.endLoc;
100  long position = 0;
101  int c;
102  int inComment = 0;
103  while (foundPOD == 0 && position < length)
104  {
105  c = stream_->get();
106  if (!isspace(c))
107  {
108  //if (!isspace(c) && c != '\n') {
109  stream_->unget();
110  // if we're in a comment, check that we are not ending it
111  if (inComment == 1)
112  {
113  if (endComment(position, length) == 1)
114  {
115  // position ourselves after the end comment tag
116  stream_->seekg(3, std::ios_base::cur);
117  position += 3;
118  inComment = 0;
119  }
120  }
121  else
122  {
123  // if we're not in a comment, check that we aren't starting one
124  if (endComment(position, length) == 1)
125  {
126  // position ourselves after the comment tag
127  stream_->seekg(4, std::ios_base::cur);
128  position += 4;
129  inComment = 1;
130  }
131  else
132  {
133  // we've found POD !!
134  foundPOD = 1;
135  }
136  }
137  }
138  position++;
139  }
140 
141  stream_->seekg(curLoc);
142  return foundPOD;
143 }
144 
145 
147 {
148  std::streampos start;
149  std::streampos end;
150  int twoprev = 0;
151  int prev = 0;
152  int current = 0;
153 
154  int isProcessingInstruction = 0;
155  int isComment = 0;
156  int isClosingTag = 0;
157  int isSelfClosing = 0;
158 
159  int openCaretFound = 0;
160  int closeCaretFound = 0;
161 
162  int numSingleQuotes = 0;
163  int numDoubleQuotes = 0;
164  while ((current = stream_->get()) && current != EOF && closeCaretFound == 0)
165  {
166  if (current == '<')
167  {
168  if (prev != '\\')
169  {
170  // we found a live start string
171  stream_->unget();
172  start = stream_->tellg();
173  stream_->get();
174  openCaretFound = 1;
175  }
176  int one = stream_->get();
177  int two = stream_->get();
178  int three = stream_->get();
179  if (one == '\?')
180  {
181  isProcessingInstruction = 1;
182  }
183  if (one == '!' && two == '-' && three == '-')
184  {
185  isComment = 1;
186  }
187  if (one == '/')
188  {
189  isClosingTag = 1;
190  }
191  stream_->unget();
192  stream_->unget();
193  stream_->unget();
194  }
195  if (openCaretFound == 1)
196  {
197  if (current == '\'')
198  {
199  numSingleQuotes++;
200  }
201  else if (current == '\"')
202  {
203  numDoubleQuotes++;
204  }
205  else if (current == '>')
206  {
207  // check that we aren't currently in a quoted section
208  if (numSingleQuotes % 2 == 0 && numDoubleQuotes % 2 == 0)
209  {
210  // check that this close caret isn't escaped
211  if (prev != '\\')
212  {
213  if (isComment == 1)
214  {
215  if (prev == '-' && twoprev == '-')
216  {
217  closeCaretFound = 1;
218  end = stream_->tellg();
219  }
220  }
221  else
222  {
223  closeCaretFound = 1;
224  end = stream_->tellg();
225  if (prev == '/')
226  {
227  isSelfClosing = 1;
228  }
229  }
230  }
231  }
232  }
233  }
234  twoprev = prev;
235  prev = current;
236  }
237 
238  if (current == EOF)
239  {
240  stream_->clear();
241  stream_->seekg(0);
242  return -1;
243  }
244 
245  if (isProcessingInstruction == 0 && isComment == 0)
246  {
247  XmlElement elem;
248  elem.startLoc = start;
249  elem.endLoc = end;
250 
251  if (isSelfClosing == 1)
252  {
253  elem.type = tagType::selfClosing;
254  }
255  else if (isClosingTag == 1)
256  {
257  elem.type = tagType::closing;
258  }
259  else
260  {
261  elem.type = tagType::opening;
262  stream_->unget();
263  }
264  elem.name = getTagName(getTag(elem), elem.type);
265  elements.push_back(elem);
266  return 1;
267  }
268  if (isProcessingInstruction == 1 || isComment == 1)
269  {
270  stream_->unget();
271  return 0;
272  }
273  return -1;
274 }
275 
276 string XmlStream::getStreamSection(const std::streampos& start, const std::streampos& end) const
277 {
278  // save current place in the stream
279  std::streampos curLoc = stream_->tellg();
280 
281  std::string result(end - start, '\0');
282  stream_->seekg(start);
283  stream_->read(result.data(), end - start);
284 
285  // go back to current place in the stream
286  stream_->seekg(curLoc);
287  return result;
288 }
289 
290 void XmlStream::findChildElements(int start, vector<int>& childIndices, int& podIndex) const
291 {
292  int index = start + 1;
293  int level = 1;
294 
295  while (index < elements.size() && level > 0)
296  {
297  const tagType tt = elements[index].type;
298  if (tt == tagType::opening)
299  {
300  if (level == 1)
301  {
302  childIndices.push_back(index);
303  }
304  level++;
305  }
306  else if (tt == tagType::closing)
307  {
308  level--;
309  }
310  else if (tt == tagType::selfClosing)
311  {
312  if (level == 1)
313  {
314  childIndices.push_back(index);
315  }
316  }
317  else if (tt == tagType::pod)
318  {
319  if (level == 1)
320  {
321  podIndex = index;
322  }
323  }
324  else
325  {
326  //cout << "There is an error, in findChildElements and the XmlElement at index: " << index << " did not have a recognized tag type" << endl;
327  exit(1);
328  }
329  index++;
330  }
331 }
332 
333 void XmlStream::listAll() const
334 {
335  for (int i = 0; i < elements.size(); i++)
336  {
337  std::cout << i << " ";
338  tagType tt = elements[i].type;
339  if (tt == tagType::opening)
340  {
341  std::cout << "opening, name = " << elements[i].name << std::endl;
342  }
343  else if (tt == tagType::closing)
344  {
345  std::cout << "closing, name = " << elements[i].name << std::endl;
346  }
347  else if (tt == tagType::selfClosing)
348  {
349  std::cout << "selfClosing, name = " << elements[i].name << std::endl;
350  }
351  else if (tt == tagType::pod)
352  {
353  std::cout << "POD" << std::endl;
354  }
355  }
356 }
357 
358 XmlStream::XmlStream(std::istream* is) : stream_(is)
359 {
360  // on first pass, try to find all of the tags and encode their names
361  while (addNextTag() != -1) {}
362 
363  //now go through and look at space between live tags and see if there is POD there
364  std::vector<XmlElement> tags;
365  elements.swap(tags);
366 
367  elements.push_back(tags[0]);
368  for (int i = 1; i < tags.size(); i++)
369  {
370  if (checkForPOD(tags[i - 1], tags[i]) == 1)
371  {
372  // add POD element
373  XmlElement podElem;
374  podElem.startLoc = tags[i - 1].endLoc;
375  podElem.endLoc = tags[i].startLoc;
376  podElem.type = tagType::pod;
377  elements.push_back(podElem);
378  }
379  elements.push_back(tags[i]);
380  }
381 }
382 
383 
384 string XmlStream::getTag(const XmlElement& e) const { return getStreamSection(e.startLoc, e.endLoc); }
385 
386 string XmlStream::getTag(int i) const
387 {
388  if (i >= elements.size())
389  {
390  cerr << "requested a tag index past the end of the vector" << endl;
391  exit(1);
392  }
393  return getTag(elements[i]);
394 }
395 
396 void XmlNode::readToString(std::string& s) const
397 {
398  if (valueDeferred_)
399  {
400  std::streampos curLoc = stream_->tellg();
401 
402  s.resize(podEnd_ - podStart_);
403  stream_->seekg(podStart_);
404  stream_->read(s.data(), podEnd_ - podStart_);
405 
406  // go back to current place in the stream
407  stream_->seekg(curLoc);
408 
409  // now strip out any xml comments
410  std::stringstream ss;
411  int commentStart = -1;
412  int commentEnd = 0;
413 
414  // if we find a comment, will put everything but the comments in ss
415  while (s.find("<!--", commentEnd) != string::npos)
416  {
417  commentStart = s.find("<!--", commentEnd);
418  ss << s.substr(commentEnd, commentStart - commentEnd);
419  commentEnd = s.find("-->", commentStart);
420  }
421  // this means we didn't find a comment, so the string s is OK to return
422  if (commentStart == -1)
423  {
424  return;
425  }
426  else
427  {
428  // we found comments and we're putting everything after the last comment in ss
429  ss << s.substr(commentEnd + 3, s.size() - commentEnd - 3);
430  }
431  s = ss.str();
432  }
433  else
434  {
435  s = value_;
436  }
437 }
438 
439 // note, will return only the first child index that matches!
440 int XmlNode::getChildIndex(const string& childName, int strict) const
441 {
442  int index = -1;
443  for (int i = 0; i < children_.size(); i++)
444  {
445  if (children_[i].getName() == childName)
446  {
447  index = i;
448  }
449  }
450  if (strict != 0 && index < 0)
451  {
452  cerr << "In XmlNode with name: " << name_ << ", could not find index for child with name: " << childName << endl;
453  exit(1);
454  }
455  return index;
456 }
457 
458 XmlNode& XmlNode::getChild(const string& name) { return getChild(getChildIndex(name, 1)); }
459 
460 const XmlNode& XmlNode::getChild(const string& name) const { return getChild(getChildIndex(name, 1)); }
461 
463 {
464  if (i < 0 || i >= children_.size())
465  {
466  cerr << "Asked to get child node: " << i << ", but there are only " << getNumChildren() << "nodes" << endl;
467  exit(1);
468  }
469  return children_[i];
470 }
471 
472 const XmlNode& XmlNode::getChild(int i) const
473 {
474  if (i < 0 || i >= children_.size())
475  {
476  cerr << "Asked to get child node: " << i << ", but there are only " << getNumChildren() << "nodes" << endl;
477  exit(1);
478  }
479  return children_[i];
480 }
481 
482 int XmlNode::getAttributeIndex(const string& attrName, int strict) const
483 {
484  int index = -1;
485  for (int i = 0; i < attributes_.size(); i++)
486  {
487  if (attributes_[i].first == attrName)
488  {
489  index = i;
490  }
491  }
492  if (strict != 0 && index < 0)
493  {
494  cerr << "In XmlNode with name: " << name_ << ", could not find index for attribute with name: " << attrName << endl;
495  exit(1);
496  }
497  return index;
498 }
499 
500 string XmlNode::getAttributeName(int index) const { return attributes_[index].first; }
501 
502 string XmlNode::getAttribute(int index) const
503 {
504  if (index < 0 || index >= attributes_.size())
505  {
506  cerr << "in XmlNode with name: " << name_ << ", requested attribute with index " << index
507  << ", but this index is not present." << endl;
508  exit(1);
509  }
510  return attributes_[index].second;
511 }
512 
513 string XmlNode::getAttribute(const string& name) const { return attributes_[getAttributeIndex(name, 1)].second; }
514 
515 string XmlNode::getAttribute(const char* name) const
516 {
517  string sname(name);
518  return getAttribute(sname);
519 }
520 
521 std::string XmlNode::getValue() const
522 {
523  if (valueDeferred_)
524  {
525  std::string val;
526  readToString(val);
527  return val;
528  }
529  return value_;
530 }
531 
533 {
534  if (valueDeferred_)
535  {
536  return (podEnd_ - podStart_);
537  }
538  return value_.size();
539 }
540 
542 {
543  children_.push_back(nd);
544  return children_.back();
545 }
546 
548 {
549  XmlNode nd;
550  children_.push_back(nd);
551  return children_.back();
552 }
553 
554 // removes whitespace at the beginning and end of a string
555 string XmlNode::trimWhitespace(const string& str) const
556 {
557  size_t first = str.find_first_not_of(" \n\t\v\f\r");
558  if (string::npos == first)
559  {
560  return string("");
561  }
562  size_t last = str.find_last_not_of(" \n\t\v\f\r");
563  return str.substr(first, (last - first + 1));
564 }
565 
566 void XmlNode::handleTagString(const XmlStream& xs, int loc)
567 {
568  const XmlElement& elem = xs.elements[loc];
569  name_ = elem.name;
570  string tagstr = xs.getTag(elem);
571  if (elem.type == tagType::selfClosing)
572  {
573  isSelfClosing_ = true;
574  }
575  else
576  {
577  isSelfClosing_ = false;
578  }
579 
580  // take everything after the name
581  string decliningstring = tagstr.substr(tagstr.find(name_) + name_.size() + 1);
582 
583  // remove trailing > or /> if appropriate and trim whitespace from left and right ends
584  int numToRemove = 1;
585  if (isSelfClosing_)
586  {
587  numToRemove++;
588  }
589  decliningstring = decliningstring.substr(0, decliningstring.size() - numToRemove);
590  decliningstring = trimWhitespace(decliningstring);
591 
592  while (decliningstring.size() > 1)
593  {
594  attrpair att;
595  getNextKeyVal(decliningstring, att.first, att.second);
596  attributes_.push_back(att);
597  }
598 }
599 
600 void XmlNode::getNextKeyVal(string& contentstr, string& key, string& val) const
601 {
602  size_t breakone = getPosNextLiveChar(contentstr, '=');
603 
604  key = contentstr.substr(0, breakone);
605  key = trimWhitespace(key);
606  //cout << "in getNextKeyVal, key = \'" << key << "\'" << endl;
607  contentstr = contentstr.substr(breakone + 1);
608 
609  size_t firstquote = getPosNextLiveChar(contentstr, '\"');
610  if (firstquote == string::npos)
611  {
612  firstquote = getPosNextLiveChar(contentstr, '\'');
613  }
614  contentstr = contentstr.substr(firstquote + 1);
615  size_t secondquote = getPosNextLiveChar(contentstr, '\"');
616  if (secondquote == string::npos)
617  {
618  secondquote = getPosNextLiveChar(contentstr, '\'');
619  }
620  val = contentstr.substr(0, secondquote);
621  val = trimWhitespace(val);
622 
623  contentstr = contentstr.substr(secondquote + 1);
624 }
625 
626 size_t XmlNode::getPosNextLiveChar(const std::string& str, char c) const
627 {
628  size_t index = string::npos;
629  if (str[0] == c)
630  {
631  index = 0;
632  }
633  else
634  {
635  for (int i = 1; i < str.size(); i++)
636  {
637  if (str[i] == c && str[i - 1] != '\\')
638  {
639  index = i;
640  break;
641  }
642  }
643  }
644  return index;
645 }
646 
647 std::string XmlNode::getInStr(int is) const
648 {
649  std::stringstream ss;
650  for (int i = 0; i < is; i++)
651  {
652  ss << " ";
653  }
654  return ss.str();
655 }
656 
657 void XmlNode::write(ostream& os, int indentLevel) const
658 {
659  string str = getString(indentLevel);
660  os << str;
661 }
662 
663 string XmlNode::getString(int indentLevel) const
664 {
665  stringstream ss;
666  ss << getInStr(indentLevel);
667  ss << "<" << name_;
668 
669  for (int i = 0; i < attributes_.size(); i++)
670  {
671  ss << " " << attributes_[i].first << "=\"" << attributes_[i].second << "\"";
672  }
673  if (isSelfClosing_ == 1)
674  {
675  ss << "/>" << endl;
676  return ss.str();
677  }
678  else
679  {
680  ss << ">";
681  }
682  if (valInline_)
683  {
684  ss << getValue() << "</" << name_ << ">" << endl;
685  }
686  else
687  {
688  ss << endl;
689  for (int i = 0; i < children_.size(); i++)
690  {
691  ss << children_[i].getString(indentLevel + 2);
692  }
693 
694  if (getValueSize() > 0)
695  ss << getInStr(indentLevel + 2) << getValue() << endl;
696  ss << getInStr(indentLevel) << "</" << name_ << ">" << endl;
697  }
698  return ss.str();
699 }
700 
701 XmlNode::XmlNode(const XmlNode& c) : stream_(c.stream_)
702 {
704  name_ = c.name_;
707  value_ = c.value_;
709  podStart_ = c.podStart_;
710  podEnd_ = c.podEnd_;
711  children_ = c.children_;
712 }
713 
714 XmlNode::XmlNode(istream* _stream, int start, bool deferValue) : stream_(_stream)
715 {
716  XmlStream xl(_stream);
717  //cout << "Finished creating stream object" << endl;
718  //xl.listAll();
719  createFromStream(xl, start, deferValue);
720 }
721 
722 XmlNode::XmlNode(const XmlStream& xstream, std::istream* const _stream, int start, bool deferValue) : stream_(_stream)
723 {
724  createFromStream(xstream, start, deferValue);
725 }
726 
727 void XmlNode::createFromStream(const XmlStream& xstream, int start, bool deferValue)
728 {
729  valueDeferred_ = deferValue;
730  valInline_ = false;
731 
732  // this will populate the name and attributes
733  handleTagString(xstream, start);
734  tagType tt = xstream.elements[start].type;
735  if (tt == tagType::selfClosing)
736  {
737  // if self closing, then there is not POD and we are at the end
738  isSelfClosing_ = true;
739  }
740  else
741  {
742  // otherwise need to look for POD and subtags
743  isSelfClosing_ = false;
744 
745  vector<int> childIndices;
746  int podIndex = -1;
747  xstream.findChildElements(start, childIndices, podIndex);
748 
749  // if no children, try to put and POD inline
750  if (childIndices.size() == 0)
751  {
752  valInline_ = true;
753  }
754  else
755  {
756  valInline_ = false;
757  }
758 
759  // deal with POD if it exists
760  if (podIndex > 0)
761  {
762  podStart_ = xstream.elements[podIndex].startLoc;
763  podEnd_ = xstream.elements[podIndex].endLoc;
764  if (valueDeferred_ == false)
765  {
766  valueDeferred_ = true;
768  valueDeferred_ = false;
769  }
770  }
771 
772  // now sequentially create XmlNodes from subelements and add them to children vector
773  for (int i = 0; i < childIndices.size(); i++)
774  {
775  XmlNode child(xstream, this->stream_, childIndices[i], valueDeferred_);
776  children_.push_back(child);
777  }
778  }
779 }
XmlNode & addChild()
Definition: XmlRep.cpp:547
void getNextKeyVal(std::string &contentstr, std::string &key, std::string &val) const
Definition: XmlRep.cpp:600
void handleTagString(const XmlStream &xs, int loc)
Definition: XmlRep.cpp:566
int startComment(long position, long length) const
Definition: XmlRep.cpp:58
Definition: XmlRep.h:81
int checkForPOD(const XmlElement &before, const XmlElement &after) const
Definition: XmlRep.cpp:92
std::vector< attrpair > attributes_
Definition: XmlRep.h:90
std::vector< XmlElement > elements
Definition: XmlRep.h:75
int getChildIndex(const std::string &name, int strict=0) const
Definition: XmlRep.cpp:440
size_t getPosNextLiveChar(const std::string &str, char c) const
Definition: XmlRep.cpp:626
std::string trimWhitespace(const std::string &str) const
Definition: XmlRep.cpp:555
std::string getName() const
Definition: XmlRep.h:114
void listAll() const
Definition: XmlRep.cpp:333
std::string getValue() const
Definition: XmlRep.cpp:521
std::streampos podStart_
Definition: XmlRep.h:95
int addNextTag()
Definition: XmlRep.cpp:146
int getValueSize() const
Definition: XmlRep.cpp:532
bool isSelfClosing_
Definition: XmlRep.h:88
std::istream * stream_
Definition: XmlRep.h:98
XmlNode()
Definition: XmlRep.h:164
std::string getInStr(int is) const
Definition: XmlRep.cpp:647
std::pair< std::string, std::string > attrpair
Definition: XmlRep.h:84
std::string name
Definition: XmlRep.h:39
tagType
Definition: XmlRep.h:24
std::string value_
Definition: XmlRep.h:92
std::streampos endLoc
Definition: XmlRep.h:38
std::streampos startLoc
Definition: XmlRep.h:37
tagType type
Definition: XmlRep.h:40
std::vector< XmlNode > children_
Definition: XmlRep.h:97
void write(std::ostream &os, int indentLevel=0) const
Definition: XmlRep.cpp:657
void createFromStream(const XmlStream &stream, int start=0, bool deferValue=true)
Definition: XmlRep.cpp:727
bool valInline_
Definition: XmlRep.h:91
void findChildElements(int start, std::vector< int > &childIndices, int &podIndex) const
Definition: XmlRep.cpp:290
std::string name_
Definition: XmlRep.h:89
XmlNode & getChild(const std::string &name)
Definition: XmlRep.cpp:458
std::string getTagName(const std::string &tag, tagType type) const
Definition: XmlRep.cpp:18
std::string getStreamSection(const std::streampos &start, const std::streampos &end) const
Definition: XmlRep.cpp:276
int endComment(long position, long length) const
Definition: XmlRep.cpp:75
std::string getTag(const XmlElement &e) const
Definition: XmlRep.cpp:384
std::string getString(int indentLevel=0) const
Definition: XmlRep.cpp:663
int getNumChildren() const
Definition: XmlRep.h:150
int getAttributeIndex(const std::string &name, int strict=0) const
Definition: XmlRep.cpp:482
std::string getAttributeName(int index) const
Definition: XmlRep.cpp:500
std::streampos podEnd_
Definition: XmlRep.h:96
void getAttribute(const std::string &name, T &result) const
Definition: XmlRep.h:191
void readToString(std::string &s) const
Definition: XmlRep.cpp:396
bool valueDeferred_
Definition: XmlRep.h:93
XmlStream(std::istream *is)
Definition: XmlRep.cpp:358