QMCPACK
hdf_archive.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
8 // Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
9 //
10 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 
14 #ifndef QMCPLUSPLUS_HDF5_ARCHIVE_H
15 #define QMCPLUSPLUS_HDF5_ARCHIVE_H
16 
17 #include <config.h>
18 #include "hdf_datatype.h"
19 #include "hdf_dataspace.h"
20 #include "hdf_dataproxy.h"
21 #include "hdf_error_suppression.h"
22 #include "hdf_path.h"
23 #include "hdf_pete.h"
24 #include "hdf_stl.h"
25 #include "hdf_hyperslab.h"
26 
27 #include <bitset>
28 #include <filesystem>
29 #include <stack>
30 
31 #ifdef HAVE_MPI
32 namespace boost
33 {
34 namespace mpi3
35 {
36 class communicator;
37 }
38 } // namespace boost
39 #endif
40 
41 class Communicate;
42 
43 namespace qmcplusplus
44 {
45 
46 /// Suppress HDF5 warning and error messages.
47 extern hdf_error_suppression hide_hdf_errors;
48 
49 /** class to handle hdf file
50  */
52 {
53 private:
54  enum
55  {
59  };
60  static const hid_t is_closed = -1;
61  /** bitset of the io mode
62  * Mode[IS_PARALLEL] : true, if parallel
63  * Mode[IS_MASTER] : true, if the node is master
64  * Mode[NOIO] : true, if I/O is not performed
65  */
66  std::bitset<4> Mode;
67  ///file id
68  hid_t file_id;
69  ///access id
70  hid_t access_id;
71  ///transfer property
72  hid_t xfer_plist;
73  /// Link creation property list identifier
74  hid_t lcpl_id;
75  ///FILO to handle H5Group
76  std::stack<hid_t> group_id;
77  ///Track group names corresponding to group_id
78  std::vector<std::string> group_names;
79 
80  /** Name of file that hdf_archive thinks is open.
81  * This may not correspond to the actual file because the open call failed,
82  * or the file was closed. This information is useful for debugging.
83  */
84  std::string possible_filename_;
85 
86  ///set the access property
87  void set_access_plist(Communicate* comm, bool request_pio);
88 #ifdef HAVE_MPI
89  void set_access_plist(boost::mpi3::communicator& comm, bool request_pio);
90 #endif
91  void set_access_plist();
92 
93 public:
94  /** constructor
95  * @param c communicator
96  * @param request_pio turns on parallel I/O,
97  * if true and PHDF5 is available, hdf_archive is in parallel collective IO mode
98  * if true and PHDF5 is not available, hdf_archive is in master-only IO mode
99  * if false, hdf_archive is in independent IO mode
100  */
101  template<class Comm = Communicate*>
102  hdf_archive(Comm c, bool request_pio = false)
103  : file_id(is_closed), access_id(H5P_DEFAULT), xfer_plist(H5P_DEFAULT), lcpl_id(H5P_DEFAULT)
104  {
106  throw std::runtime_error("HDF5 library warnings and errors not suppressed from output.\n");
107  set_access_plist(c, request_pio);
108  }
109 
110  hdf_archive() : file_id(is_closed), access_id(H5P_DEFAULT), xfer_plist(H5P_DEFAULT), lcpl_id(H5P_DEFAULT)
111  {
113  throw std::runtime_error("HDF5 library warnings and errors not suppressed from output.\n");
115  }
116  ///destructor
117  ~hdf_archive();
118 
119  ///return true if parallel i/o
120  inline bool is_parallel() const { return Mode[IS_PARALLEL]; }
121 
122  ///return true if master in parallel i/o
123  inline bool is_master() const { return Mode[IS_MASTER]; }
124 
125  ///return file_id. should be only be used for connecting to old codes when porting
126  hid_t getFileID() const { return file_id; }
127 
128  /** create a file
129  * @param fname name of hdf5 file
130  * @param flags i/o mode
131  * @return true, if creation is successful
132  */
133  bool create(const std::filesystem::path& fname, unsigned flags = H5F_ACC_TRUNC);
134 
135  /** open a file
136  * @param fname name of hdf5 file
137  * @param flags i/o mode
138  * @return file_id, if open is successful
139  */
140  bool open(const std::filesystem::path& fname, unsigned flags = H5F_ACC_RDWR);
141 
142  ///close all the open groups and file
143  void close();
144 
145  ///flush a file
146  inline void flush()
147  {
148  if (file_id != is_closed)
149  H5Fflush(file_id, H5F_SCOPE_LOCAL);
150  }
151 
152  ///return true if the file is closed
153  inline bool closed() { return file_id == is_closed; }
154 
155  /** check if aname is a group
156  * @param aname group's name
157  * @return true, if aname exists and it is a group
158  */
159  bool is_group(const std::string& aname);
160 
161  /** check if aname is a dataset
162  * @param aname dataset's name
163  * @return true, if aname exists and it is a dataset
164  */
165  bool is_dataset(const std::string& aname)
166  {
167  if (Mode[NOIO])
168  return true;
169  hid_t p = group_id.empty() ? file_id : group_id.top();
170  int dummy_data;
171  h5data_proxy<int> e(dummy_data);
172  return e.check_existence(p, aname);
173  }
174 
175  /** check if aname is a dataset of type T
176  * @param aname group's name
177  * @return true, if aname is a dataset of type T
178  */
179  template<typename T>
180  bool is_dataset_of_type(const std::string& aname)
181  {
182  if (Mode[NOIO])
183  return true;
184  hid_t p = group_id.empty() ? file_id : group_id.top();
185  T dummy_data;
186  h5data_proxy<T> e(dummy_data);
187  return e.check_type(p, aname);
188  }
189 
190  /** return the top of the group stack
191  */
192  inline hid_t top() const { return group_id.empty() ? file_id : group_id.top(); }
193 
194  /** check if any groups are open
195  * group stack will have entries if so
196  * @return true if any groups are open
197  */
198  inline bool open_groups() { return group_id.empty(); }
199 
200  /** push a group to the group stack
201  * @param gname name of the group
202  * @param createit if true, group is create when missing
203  */
204  void push(const std::string& gname, bool createit = true);
205  void push(const hdf_path& gname, bool createit = true);
206 
207 
208  inline void pop()
209  {
210  if (file_id == is_closed || group_id.empty())
211  return;
212  hid_t g = group_id.top();
213  group_id.pop();
214  group_names.pop_back();
215  herr_t err = H5Gclose(g);
216  if (err < 0)
217  throw std::runtime_error("H5Gclose failed with error.");
218  }
219 
220  /** Return a string representation of the current group stack
221  */
222  std::string group_path_as_string() const;
223 
224  /** read the shape of multidimensional filespace from the group aname
225  * this function can be used to query dataset for preparing containers.
226  * The dimensions contributed by T is excluded.
227  * See how exactly user dimensions are calculated in getDataShape function definition.
228  * @return true if successful
229  */
230  template<typename T>
231  bool getShape(const std::string& aname, std::vector<int>& sizes_out)
232  {
233  if (Mode[NOIO])
234  return true;
235  hid_t p = group_id.empty() ? file_id : group_id.top();
236  return getDataShape<T>(p, aname, sizes_out);
237  }
238 
239  /** write the data to the group aname and return status
240  * use write() for inbuilt error checking
241  * @return true if successful
242  */
243  template<typename T>
244  bool writeEntry(T& data, const std::string& aname)
245  {
246  if (Mode[NOIO])
247  return true;
248  if (!(Mode[IS_PARALLEL] || Mode[IS_MASTER]))
249  throw std::runtime_error("Only write data in parallel or by master but not every rank!");
250  hid_t p = group_id.empty() ? file_id : group_id.top();
252  return e.write(data, p, aname, xfer_plist);
253  }
254 
255  /** write the data to the group aname and check status
256  * runtime error is issued on I/O error
257  */
258  template<typename T>
259  void write(T& data, const std::string& aname)
260  {
261  if (!writeEntry(data, aname))
262  {
263  throw std::runtime_error("HDF5 write failure in hdf_archive::write " + aname);
264  }
265  }
266 
267  /** write the container data with a specific shape and check status
268  * @param data container, linear storage required.
269  * @param shape shape on the hdf file
270  * @param aname dataset name in the file
271  * runtime error is issued on I/O error
272  */
273  template<typename T, typename IT, std::size_t RANK>
274  void writeSlabReshaped(T& data, const std::array<IT, RANK>& shape, const std::string& aname)
275  {
276  std::array<hsize_t, RANK> globals, counts, offsets;
277  for (int dim = 0; dim < RANK; dim++)
278  {
279  globals[dim] = static_cast<hsize_t>(shape[dim]);
280  counts[dim] = static_cast<hsize_t>(shape[dim]);
281  offsets[dim] = 0;
282  }
283 
284  hyperslab_proxy<T, RANK> pxy(data, globals, counts, offsets);
285  write(pxy, aname);
286  }
287 
288  /** read the data from the group aname and return status
289  * use read() for inbuilt error checking
290  * @return true if successful
291  */
292  template<typename T, typename = std::enable_if_t<!std::is_const<T>::value>>
293  bool readEntry(T& data, const std::string& aname)
294  {
295  if (Mode[NOIO])
296  return true;
297  hid_t p = group_id.empty() ? file_id : group_id.top();
298  h5data_proxy<T> e(data);
299  return e.read(data, p, aname, xfer_plist);
300  }
301 
302  /** read the data from the group aname and check status
303  * runtime error is issued on I/O error
304  */
305  template<typename T, typename = std::enable_if_t<!std::is_const<T>::value>>
306  void read(T& data, const std::string& aname)
307  {
308  if (!readEntry(data, aname))
309  {
310  throw std::runtime_error("HDF5 read failure in hdf_archive::read " + aname);
311  }
312  }
313 
314  /** read file dataset with a specific shape into a container and check status
315  * @param data container, linear storage required.
316  * @param shape shape on the hdf file
317  * @param aname dataset name in the file
318  * runtime error is issued on I/O error
319  */
320  template<typename T, typename IT, std::size_t RANK, typename = std::enable_if_t<!std::is_const<T>::value>>
321  void readSlabReshaped(T& data, const std::array<IT, RANK>& shape, const std::string& aname)
322  {
323  std::array<hsize_t, RANK> globals, counts, offsets;
324  for (int dim = 0; dim < RANK; dim++)
325  {
326  globals[dim] = static_cast<hsize_t>(shape[dim]);
327  counts[dim] = static_cast<hsize_t>(shape[dim]);
328  offsets[dim] = 0;
329  }
330 
331  hyperslab_proxy<T, RANK> pxy(data, globals, counts, offsets);
332  read(pxy, aname);
333  }
334 
335  /** read a portion of the data from the group aname and check status
336  * runtime error is issued on I/O error
337  *
338  * note the readSpec array must have dimensionality corresponding to the dataset,
339  * values for a dimension must be [0,num_entries-1] for that dimension to specify
340  * which value to hold and a -1 to grab all elements from that dimension
341  * for example, if the dataset was [5,2,6] and the vector contained (2,1,-1),
342  * this would grab 6 elements corresponding to [2,1,:]
343  */
344  template<typename T, typename IT, std::size_t RANK, typename = std::enable_if_t<!std::is_const<T>::value>>
345  void readSlabSelection(T& data, const std::array<IT, RANK>& readSpec, const std::string& aname)
346  {
347  std::array<hsize_t, RANK> globals, counts, offsets;
348  for (int dim = 0; dim < RANK; dim++)
349  {
350  globals[dim] = 0;
351  if (readSpec[dim] < 0)
352  {
353  counts[dim] = 0;
354  offsets[dim] = 0;
355  }
356  else
357  {
358  counts[dim] = 1;
359  offsets[dim] = static_cast<hsize_t>(readSpec[dim]);
360  }
361  }
362 
363  hyperslab_proxy<T, RANK> pxy(data, globals, counts, offsets);
364  read(pxy, aname);
365  }
366 
367  inline void unlink(const std::string& aname)
368  {
369  if (Mode[NOIO])
370  return;
371  hid_t p = group_id.empty() ? file_id : group_id.top();
372  herr_t status = H5Ldelete(p, aname.c_str(), H5P_DEFAULT);
373  }
374 };
375 
376 } // namespace qmcplusplus
377 #endif
define h5_space_type to handle basic datatype for hdf5
bool closed()
return true if the file is closed
Definition: hdf_archive.h:153
void write(T &data, const std::string &aname)
write the data to the group aname and check status runtime error is issued on I/O error ...
Definition: hdf_archive.h:259
bool open(const std::filesystem::path &fname, unsigned flags=H5F_ACC_RDWR)
open a file
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
hid_t top() const
return the top of the group stack
Definition: hdf_archive.h:192
hid_t getFileID() const
return file_id. should be only be used for connecting to old codes when porting
Definition: hdf_archive.h:126
bool open_groups()
check if any groups are open group stack will have entries if so
Definition: hdf_archive.h:198
hid_t lcpl_id
Link creation property list identifier.
Definition: hdf_archive.h:74
class to use file space hyperslab with a serialized container
Definition: hdf_hyperslab.h:35
void unlink(const std::string &aname)
Definition: hdf_archive.h:367
void close()
close all the open groups and file
Definition: hdf_archive.cpp:38
bool is_master() const
return true if master in parallel i/o
Definition: hdf_archive.h:123
std::stack< hid_t > group_id
FILO to handle H5Group.
Definition: hdf_archive.h:76
std::string possible_filename_
Name of file that hdf_archive thinks is open.
Definition: hdf_archive.h:84
class to handle hdf file
Definition: hdf_archive.h:51
void readSlabReshaped(T &data, const std::array< IT, RANK > &shape, const std::string &aname)
read file dataset with a specific shape into a container and check status
Definition: hdf_archive.h:321
std::bitset< 4 > Mode
bitset of the io mode Mode[IS_PARALLEL] : true, if parallel Mode[IS_MASTER] : true, if the node is master Mode[NOIO] : true, if I/O is not performed
Definition: hdf_archive.h:66
bool is_dataset_of_type(const std::string &aname)
check if aname is a dataset of type T
Definition: hdf_archive.h:180
bool is_parallel() const
return true if parallel i/o
Definition: hdf_archive.h:120
Wrapping information on parallelism.
Definition: Communicate.h:68
void readSlabSelection(T &data, const std::array< IT, RANK > &readSpec, const std::string &aname)
read a portion of the data from the group aname and check status runtime error is issued on I/O error...
Definition: hdf_archive.h:345
hdf_archive(Comm c, bool request_pio=false)
constructor
Definition: hdf_archive.h:102
bool getShape(const std::string &aname, std::vector< int > &sizes_out)
read the shape of multidimensional filespace from the group aname this function can be used to query ...
Definition: hdf_archive.h:231
void writeSlabReshaped(T &data, const std::array< IT, RANK > &shape, const std::string &aname)
write the container data with a specific shape and check status
Definition: hdf_archive.h:274
hdf_error_suppression hide_hdf_errors
Suppress HDF5 warning and error messages.
Definition: hdf_archive.cpp:23
bool is_dataset(const std::string &aname)
check if aname is a dataset
Definition: hdf_archive.h:165
static const hid_t is_closed
Definition: hdf_archive.h:60
hid_t access_id
access id
Definition: hdf_archive.h:70
hid_t file_id
file id
Definition: hdf_archive.h:68
void push(const std::string &gname, bool createit=true)
push a group to the group stack
bool create(const std::filesystem::path &fname, unsigned flags=H5F_ACC_TRUNC)
create a file
std::string group_path_as_string() const
Return a string representation of the current group stack.
hid_t xfer_plist
transfer property
Definition: hdf_archive.h:72
bool is_group(const std::string &aname)
check if aname is a group
void read(T &data, const std::string &aname)
read the data from the group aname and check status runtime error is issued on I/O error ...
Definition: hdf_archive.h:306
bool readEntry(T &data, const std::string &aname)
read the data from the group aname and return status use read() for inbuilt error checking ...
Definition: hdf_archive.h:293
static bool enabled
status of hdf_error_suppression. An instance of this class changes enabled to true.
std::vector< std::string > group_names
Track group names corresponding to group_id.
Definition: hdf_archive.h:78
bool writeEntry(T &data, const std::string &aname)
write the data to the group aname and return status use write() for inbuilt error checking ...
Definition: hdf_archive.h:244
generic h5data_proxy<T> for scalar basic datatypes defined in hdf_dataspace.h Note if the dataset to ...
Definition: hdf_dataproxy.h:29
void flush()
flush a file
Definition: hdf_archive.h:146