QMCPACK
hdf_wrapper_functions.h
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2019 QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
8 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
9 // Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
10 // Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
11 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
12 //
13 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
14 //////////////////////////////////////////////////////////////////////////////////////
15 
16 
17 #ifndef QMCPLUSPLUS_HDF_WRAPPER_FUNCTIONS_H
18 #define QMCPLUSPLUS_HDF_WRAPPER_FUNCTIONS_H
19 
20 /**@file hdf_wrapper_functions.h
21  * @brief free template functions wrapping HDF5 calls.
22  */
23 
24 #include <vector>
25 #include "hdf_datatype.h"
26 #include "hdf_dataspace.h"
27 
28 namespace qmcplusplus
29 {
30 /** free template function to read the (user) dimension and shape of the dataset.
31  * The dimensions contributed by T is excluded.
32  * @tparam T data type supported by h5_space_type
33  * @param grp group id
34  * @param aname name of the dataspace
35  * @param sizes_out sizes of each direction. For a scalar, sizes_out.size() == 0
36  * @return true if sizes_out is extracted successfully
37  *
38  * For example, if the data on the file is Matrix<TinyVector<std::complex<double>, 3>>
39  * The dataset on the file has a rank of 2 (matrix) + 1 (TinyVector) + 1 (std::complex) + 0 (double) = 4
40  * getDataShape<TinyVector<std::complex<double>, 3>> only returns the first 2 dimension
41  * getDataShape<std::complex<double>> only returns the first 3 dimension
42  * getDataShape<double> returns all the 4 dimension
43  */
44 template<typename T, typename IT>
45 inline bool getDataShape(hid_t grp, const std::string& aname, std::vector<IT>& sizes_out)
46 {
47  using TSpaceType = h5_space_type<T, 0>;
48  TSpaceType TSpace;
49 
50  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
51  hid_t dataspace = H5Dget_space(h1);
52  int rank = H5Sget_simple_extent_ndims(dataspace);
53 
54  bool success = false;
55  if (h1 >= 0 && dataspace >= 0 && rank >= 0)
56  {
57  // check if the rank is sufficient to hold the data type
58  if (rank < TSpaceType::rank)
59  throw std::runtime_error(aname + " dataset is too small for the requested data type");
60  else
61  {
62  std::vector<hsize_t> sizes_in(rank);
63  int status_n = H5Sget_simple_extent_dims(dataspace, sizes_in.data(), NULL);
64 
65  // check if the lowest dimensions match the data type
66  int user_rank = rank - TSpaceType::added_rank();
67  bool size_match = true;
68  for (int dim = user_rank, dim_type = 0; dim < rank; dim++, dim_type++)
69  if (sizes_in[dim] != TSpace.dims[dim_type])
70  size_match = false;
71  if (!size_match)
72  throw std::runtime_error("The lower dimensions (container element type) of " + aname +
73  " dataset do not match the requested data type");
74  else
75  {
76  // save the sizes of each directions excluding dimensions contributed by the data type
77  sizes_out.resize(user_rank);
78  for (int dim = 0; dim < user_rank; dim++)
79  sizes_out[dim] = static_cast<IT>(sizes_in[dim]);
80  success = true;
81  }
82  }
83  }
84 
85  H5Sclose(dataspace);
86  H5Dclose(h1);
87  return success;
88 }
89 
90 /** free function to check dimension
91  * @param grp group id
92  * @param aname name of the dataspace
93  * @param rank rank of the multi-dimensional array
94  * @param dims[rank] size for each direction, return the actual size on file
95  * @return true if the dims is the same as the dataspace
96  */
97 template<typename T>
98 inline bool checkShapeConsistency(hid_t grp, const std::string& aname, int rank, hsize_t* dims)
99 {
100  using TSpaceType = h5_space_type<T, 0>;
101 
102  std::vector<hsize_t> dims_in;
103  if (getDataShape<T>(grp, aname, dims_in))
104  {
105  const int user_rank = rank - TSpaceType::added_rank();
106  if (dims_in.size() != user_rank)
107  throw std::runtime_error(aname + " dataspace rank does not match\n");
108 
109  bool is_same = true;
110  for (int i = 0; i < user_rank; ++i)
111  {
112  is_same &= (dims_in[i] == dims[i]);
113  dims[i] = dims_in[i];
114  }
115  return is_same;
116  }
117  else
118  return false;
119 }
120 
121 /** return true, if successful */
122 template<typename T>
123 inline bool h5d_read(hid_t grp, const std::string& aname, T* first, hid_t xfer_plist)
124 {
125  if (grp < 0)
126  return true;
127  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
128  if (h1 < 0)
129  return false;
130  hid_t h5d_type_id = get_h5_datatype(*first);
131  herr_t ret = H5Dread(h1, h5d_type_id, H5S_ALL, H5S_ALL, xfer_plist, first);
132  H5Dclose(h1);
133  return ret != -1;
134 }
135 
136 inline bool h5d_check_existence(hid_t grp, const std::string& aname)
137 {
138  if (grp < 0)
139  return true;
140  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
141  if (h1 < 0)
142  {
143  H5Dclose(h1);
144  return false;
145  }
146  H5Dclose(h1);
147  return true;
148 }
149 
150 template<typename T>
151 inline bool h5d_check_type(hid_t grp, const std::string& aname)
152 {
153  if (grp < 0)
154  return true;
155  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
156  T temp(0);
157  hid_t h5d_type_id = get_h5_datatype(temp);
158  hid_t datatype = H5Dget_type(h1);
159  if (datatype == H5I_INVALID_HID)
160  throw std::runtime_error(aname + " dataset either does not exist or there was an error determining its type.");
161  htri_t equality_check = H5Tequal(datatype, h5d_type_id);
162  H5Dclose(h1);
163  switch (equality_check)
164  {
165  case 1:
166  return true;
167  case 0:
168  return false;
169  default:
170  throw std::runtime_error("Type comparison attempted with an invalid type or nonexistent dataset " + aname);
171  }
172 }
173 
174 template<typename T>
175 inline bool h5d_write(hid_t grp,
176  const std::string& aname,
177  hsize_t ndims,
178  const hsize_t* dims,
179  const T* first,
180  hid_t xfer_plist)
181 {
182  if (grp < 0)
183  return true;
184  hid_t h5d_type_id = get_h5_datatype(*first);
185  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
186  herr_t ret = -1;
187  if (h1 < 0) //missing create one
188  {
189  hid_t dataspace = H5Screate_simple(ndims, dims, NULL);
190  hid_t dataset = H5Dcreate(grp, aname.c_str(), h5d_type_id, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
191  ret = H5Dwrite(dataset, h5d_type_id, H5S_ALL, H5S_ALL, xfer_plist, first);
192  H5Sclose(dataspace);
193  H5Dclose(dataset);
194  }
195  else
196  {
197  ret = H5Dwrite(h1, h5d_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, first);
198  }
199  H5Dclose(h1);
200  return ret != -1;
201 }
202 
203 // MAM: Make new h5d_read/write overloads that take more parameters which allow you to
204 // use a hyperslab on the memory space too. Then use it through a template specialization of
205 // hyperslap for multi::array that allows you to define the memory space hyperslab using
206 // shape and strides.
207 
208 /** return true, if successful */
209 template<typename T>
210 bool h5d_read(hid_t grp,
211  const std::string& aname,
212  hsize_t ndims,
213  const hsize_t* gcounts,
214  const hsize_t* counts,
215  const hsize_t* offsets,
216  T* first,
217  hid_t xfer_plist)
218 {
219  if (grp < 0)
220  return true;
221  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
222  if (h1 < 0)
223  return false;
224  //herr_t ret = H5Dread(h1, h5d_type_id, H5S_ALL, H5S_ALL, xfer_plist, first);
225 
226  hid_t dataspace = H5Dget_space(h1);
227  hid_t memspace = H5Screate_simple(ndims, counts, NULL);
228  // According to the HDF5 manual (https://support.hdfgroup.org/HDF5/doc/RM/H5S/H5Sselect_hyperslab.htm)
229  // , the fifth argument (count) means the number of hyper-slabs to select along each dimensions
230  // while the sixth argument (block) is the size of each hyper-slab.
231  // To write a single hyper-slab of size counts in a dataset, we call
232  // H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offsets, NULL, ones.data(), counts);
233  // The vector "ones" means we want to write one hyper-slab (block) along each dimensions.
234  // The result is equivalent to calling
235  // H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offsets, NULL, counts, NULL);
236  // , but it implies writing count hyper-slabs along each dimension and each hyper-slab is of size one.
237  const std::vector<hsize_t> ones(ndims, 1);
238  herr_t ret = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offsets, NULL, ones.data(), counts);
239 
240  hid_t h5d_type_id = get_h5_datatype(*first);
241  ret = H5Dread(h1, h5d_type_id, memspace, dataspace, xfer_plist, first);
242 
243  H5Sclose(dataspace);
244  H5Sclose(memspace);
245 
246  H5Dclose(h1);
247  return ret != -1;
248 }
249 
250 
251 template<typename T>
252 inline bool h5d_write(hid_t grp,
253  const std::string& aname,
254  hsize_t ndims,
255  const hsize_t* gcounts,
256  const hsize_t* counts,
257  const hsize_t* offsets,
258  const T* first,
259  hid_t xfer_plist)
260 {
261  if (grp < 0)
262  return true;
263  hid_t h5d_type_id = get_h5_datatype(*first);
264  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
265  hid_t filespace, memspace;
266  herr_t ret = -1;
267 
268  const std::vector<hsize_t> ones(ndims, 1);
269  if (h1 < 0) //missing create one
270  {
271  hid_t dataspace = H5Screate_simple(ndims, gcounts, NULL);
272  hid_t dataset = H5Dcreate(grp, aname.c_str(), h5d_type_id, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
273 
274  hid_t filespace = H5Dget_space(dataset);
275  ret = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offsets, NULL, ones.data(), counts);
276 
277  hid_t memspace = H5Screate_simple(ndims, counts, NULL);
278  ret = H5Dwrite(dataset, h5d_type_id, memspace, filespace, xfer_plist, first);
279 
280  H5Dclose(memspace);
281  H5Sclose(filespace);
282  H5Dclose(dataset);
283  H5Sclose(dataspace);
284  }
285  else
286  {
287  filespace = H5Dget_space(h1);
288  ret = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offsets, NULL, ones.data(), counts);
289 
290  memspace = H5Screate_simple(ndims, counts, NULL);
291  ret = H5Dwrite(h1, h5d_type_id, memspace, filespace, xfer_plist, first);
292 
293  H5Sclose(filespace);
294  H5Dclose(memspace);
295  }
296  H5Dclose(h1);
297  return ret != -1;
298 }
299 
300 /** return true, if successful */
301 template<typename T>
302 bool h5d_read(hid_t grp,
303  const std::string& aname,
304  hsize_t ndims,
305  const hsize_t* gcounts,
306  const hsize_t* counts,
307  const hsize_t* offsets,
308  hsize_t mem_ndims,
309  const hsize_t* mem_gcounts,
310  const hsize_t* mem_counts,
311  const hsize_t* mem_offsets,
312  T* first,
313  hid_t xfer_plist)
314 {
315  if (grp < 0)
316  return true;
317  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
318  if (h1 < 0)
319  return false;
320 
321  hid_t dataspace = H5Dget_space(h1);
322  if (ndims != H5Sget_simple_extent_ndims(dataspace))
323  throw std::runtime_error(aname + " dataspace does not match ");
324  // check gcounts???
325  const std::vector<hsize_t> ones(std::max(ndims, mem_ndims), 1);
326  herr_t ret = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offsets, NULL, ones.data(), counts);
327 
328  hid_t memspace = H5Screate_simple(mem_ndims, mem_gcounts, NULL);
329  herr_t mem_ret = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, mem_offsets, NULL, ones.data(), mem_counts);
330 
331  hid_t h5d_type_id = get_h5_datatype(*first);
332  ret = H5Dread(h1, h5d_type_id, memspace, dataspace, xfer_plist, first);
333 
334  H5Sclose(dataspace);
335  H5Sclose(memspace);
336 
337  H5Dclose(h1);
338  return ret != -1;
339 }
340 
341 template<typename T>
342 inline bool h5d_write(hid_t grp,
343  const std::string& aname,
344  hsize_t ndims,
345  const hsize_t* gcounts,
346  const hsize_t* counts,
347  const hsize_t* offsets,
348  hsize_t mem_ndims,
349  const hsize_t* mem_gcounts,
350  const hsize_t* mem_counts,
351  const hsize_t* mem_offsets,
352  const T* first,
353  hid_t xfer_plist)
354 {
355  if (grp < 0)
356  return true;
357  std::cout << " h5d_write: " << mem_ndims << " " << *mem_gcounts << " " << *(mem_gcounts + 1) << " "
358  << *(mem_gcounts + 2) << " " << *mem_counts << " " << *(mem_counts + 1) << " " << *(mem_counts + 2) << " "
359  << *mem_offsets << " " << *(mem_offsets + 1) << " " << *(mem_offsets + 2) << " " << std::endl;
360  hid_t h5d_type_id = get_h5_datatype(*first);
361  hid_t h1 = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
362  herr_t ret = -1;
363 
364  const std::vector<hsize_t> ones(std::max(ndims, mem_ndims), 1);
365  if (h1 < 0) //missing create one
366  {
367  hid_t dataspace = H5Screate_simple(ndims, gcounts, NULL);
368  hid_t dataset = H5Dcreate(grp, aname.c_str(), h5d_type_id, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
369 
370  hid_t filespace = H5Dget_space(dataset);
371  ret = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offsets, NULL, ones.data(), counts);
372 
373  hid_t memspace = H5Screate_simple(mem_ndims, mem_gcounts, NULL);
374  ret = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, mem_offsets, NULL, ones.data(), mem_counts);
375  ret = H5Dwrite(dataset, h5d_type_id, memspace, filespace, xfer_plist, first);
376 
377  H5Dclose(memspace);
378  H5Sclose(filespace);
379  H5Dclose(dataset);
380  H5Sclose(dataspace);
381  }
382  else
383  {
384  hid_t filespace = H5Dget_space(h1);
385  ret = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offsets, NULL, ones.data(), counts);
386 
387  hid_t memspace = H5Screate_simple(mem_ndims, mem_gcounts, NULL);
388  ret = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, mem_offsets, NULL, ones.data(), mem_counts);
389  ret = H5Dwrite(h1, h5d_type_id, memspace, filespace, xfer_plist, first);
390 
391  H5Sclose(filespace);
392  H5Dclose(memspace);
393  }
394  H5Dclose(h1);
395  return ret != -1;
396 }
397 
398 template<typename T>
399 inline bool h5d_append(hid_t grp,
400  const std::string& aname,
401  hsize_t& current,
402  hsize_t ndims,
403  const hsize_t* const dims,
404  const T* const first,
405  hsize_t chunk_size = 1,
406  hid_t xfer_plist = H5P_DEFAULT)
407 {
408  //app_log()<<omp_get_thread_num()<<" h5d_append group = "<<grp<<" name = "<<aname.c_str()<< std::endl;
409  if (grp < 0)
410  return true;
411  hid_t h5d_type_id = get_h5_datatype(*first);
412  hid_t dataspace;
413  hid_t memspace;
414  hid_t dataset = H5Dopen(grp, aname.c_str(), H5P_DEFAULT);
415  std::vector<hsize_t> max_dims(ndims);
416  max_dims[0] = H5S_UNLIMITED;
417  for (int d = 1; d < ndims; ++d)
418  max_dims[d] = dims[d];
419  herr_t ret = -1;
420  if (dataset < 0) //missing create one
421  {
422  //set file pointer
423  current = 0;
424  // set max and chunk dims
425  std::vector<hsize_t> chunk_dims(ndims);
426  chunk_dims[0] = chunk_size;
427  for (int d = 1; d < ndims; ++d)
428  chunk_dims[d] = dims[d];
429  // create a dataspace sized to the current buffer
430  dataspace = H5Screate_simple(ndims, dims, max_dims.data());
431  // create dataset property list
432  hid_t p = H5Pcreate(H5P_DATASET_CREATE);
433  // set layout (chunked, contiguous)
434  hid_t sl = H5Pset_layout(p, H5D_CHUNKED);
435  // set chunk size
436  hid_t cs = H5Pset_chunk(p, ndims, chunk_dims.data());
437  // create the dataset
438  dataset = H5Dcreate(grp, aname.c_str(), h5d_type_id, dataspace, H5P_DEFAULT, p, H5P_DEFAULT);
439  // create memory dataspace, size of current buffer
440  memspace = H5Screate_simple(ndims, dims, NULL);
441  // write the data for the first time
442  ret = H5Dwrite(dataset, h5d_type_id, memspace, dataspace, xfer_plist, first);
443  // update the "file pointer"
444  current = dims[0];
445 
446  //app_log()<<" creating dataset"<< std::endl;
447  //if(dataspace<0) app_log()<<" dataspace could not be created"<< std::endl;
448  //else app_log()<<" dataspace creation successful"<< std::endl;
449  //if(p<0) app_log()<<" property list could not be created"<< std::endl;
450  //else app_log()<<" property list creation successful"<< std::endl;
451  //if(sl<0) app_log()<<" layout could not be set"<< std::endl;
452  //else app_log()<<" layout set successfully"<< std::endl;
453  //if(cs<0) app_log()<<" chunk size could not be set"<< std::endl;
454  //else app_log()<<" chunk size set successfully"<< std::endl;
455  //if(dataset<0) app_log()<<" dataset could not be created"<< std::endl;
456  //else app_log()<<" dataset creation successful"<< std::endl;
457  //if(memspace<0) app_log()<<" memspace could not be created"<< std::endl;
458  //else app_log()<<" memspace creation successful"<< std::endl;
459  //if(ret<0) app_log()<<" data could not be written"<< std::endl;
460  //else app_log()<<" data write successful"<< std::endl;
461  //H5Eprint(NULL);
462 
463  // close the property list
464  H5Pclose(p);
465  }
466  else
467  {
468  // new end of file
469  std::vector<hsize_t> start(ndims);
470  std::vector<hsize_t> end(ndims);
471  for (int d = 1; d < ndims; ++d)
472  {
473  start[d] = 0;
474  end[d] = dims[d];
475  }
476  start[0] = current;
477  end[0] = start[0] + dims[0];
478  //extend the dataset (file)
479  herr_t he = H5Dset_extent(dataset, end.data());
480  //get the corresponding dataspace (filespace)
481  dataspace = H5Dget_space(dataset);
482  //set the extent
483  herr_t hse = H5Sset_extent_simple(dataspace, ndims, end.data(), max_dims.data());
484  //select hyperslab/slice of multidimensional data for appended write
485  const std::vector<hsize_t> ones(ndims, 1);
486  herr_t hsh = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, start.data(), NULL, ones.data(), dims);
487  //create memory space describing current data block
488  memspace = H5Screate_simple(ndims, dims, NULL);
489  //append the datablock to the dataset
490  ret = H5Dwrite(dataset, h5d_type_id, memspace, dataspace, H5P_DEFAULT, first);
491  // update the "file pointer"
492  current = end[0];
493 
494  //app_log()<<" appending to dataset"<< std::endl;
495  //app_log()<<" ndims = "<<ndims<< std::endl;
496  //app_log()<<" dims = "<<dims[0]<<" "<<dims[1]<< std::endl;
497  //app_log()<<" start = "<<start[0]<<" "<<start[1]<< std::endl;
498  //app_log()<<" end = "<<end[0]<<" "<<end[1]<< std::endl;
499  //app_log()<<" current = "<<current<<" "<<&current<< std::endl;
500  //if(hse<0) app_log()<<" set_extent failed"<< std::endl;
501  //else app_log()<<" set_extent successful"<< std::endl;
502  //if(hsh<0) app_log()<<" select_hyperslab failed"<< std::endl;
503  //else app_log()<<" select_hyperslab successful"<< std::endl;
504  //if(he<0) app_log()<<" extend failed"<< std::endl;
505  //else app_log()<<" extend successful"<< std::endl;
506  //if(dataspace<0) app_log()<<" dataspace could not be gotten"<< std::endl;
507  //else app_log()<<" dataspace get successful"<< std::endl;
508  //if(memspace<0) app_log()<<" memspace could not be created"<< std::endl;
509  //else app_log()<<" memspace creation successful"<< std::endl;
510  //if(ret<0) app_log()<<" data could not be written"<< std::endl;
511  //else app_log()<<" data write successful"<< std::endl;
512  //H5Eprint(NULL);
513  }
514  // cleanup
515  H5Sclose(memspace);
516  H5Sclose(dataspace);
517  H5Dclose(dataset);
518  return ret != -1;
519 }
520 
521 } // namespace qmcplusplus
522 #endif
define h5_space_type to handle basic datatype for hdf5
bool h5d_append(hid_t grp, const std::string &aname, hsize_t &current, hsize_t ndims, const hsize_t *const dims, const T *const first, hsize_t chunk_size=1, hid_t xfer_plist=H5P_DEFAULT)
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
bool getDataShape(hid_t grp, const std::string &aname, std::vector< IT > &sizes_out)
free template function to read the (user) dimension and shape of the dataset.
bool is_same(const xmlChar *a, const char *b)
bool h5d_check_existence(hid_t grp, const std::string &aname)
bool h5d_check_type(hid_t grp, const std::string &aname)
std::vector< int > dims
bool h5d_read(hid_t grp, const std::string &aname, T *first, hid_t xfer_plist)
return true, if successful
hid_t get_h5_datatype(const T &)
map C types to hdf5 native types bool is explicit removed due to the fact that it is implementation-d...
bool checkShapeConsistency(hid_t grp, const std::string &aname, int rank, hsize_t *dims)
free function to check dimension
bool h5d_write(hid_t grp, const std::string &aname, hsize_t ndims, const hsize_t *dims, const T *first, hid_t xfer_plist)