QMCPACK
RunTimeManager.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2017 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 
13 /** @file RunTimeManager.cpp
14  * @brief Class for determining elapsed run time enabling simulations to adjust to time limits.
15 
16  */
17 #include "RunTimeManager.h"
18 #include <sstream>
19 #include <fstream>
20 #include <iomanip>
21 #include <cstdio>
22 
23 namespace qmcplusplus
24 {
26 
27 template class RunTimeManager<ChronoClock>;
28 template class RunTimeManager<FakeChronoClock>;
29 
30 template<class CLOCK>
31 LoopTimer<CLOCK>::LoopTimer() : nloop(0), ticking(false), total_time(0.0)
32 {}
33 
34 template<class CLOCK>
36 {
37  if (ticking)
38  throw std::runtime_error("LoopTimer started already!");
39  start_time = CLOCK::now();
40  ticking = true;
41 }
42 
43 template<class CLOCK>
45 {
46  if (!ticking)
47  throw std::runtime_error("LoopTimer didn't start but called stop!");
48  nloop++;
49  std::chrono::duration<double> elapsed = CLOCK::now() - start_time;
50  total_time += elapsed.count();
51 
52  ticking = false;
53 }
54 
55 template<class CLOCK>
57 {
58  if (nloop > 0)
59  return total_time / nloop;
60  return 0.0;
61 }
62 
63 template class LoopTimer<ChronoClock>;
64 template class LoopTimer<FakeChronoClock>;
65 
66 template<class CLOCK>
68  int maxCPUSecs,
69  const std::string& stop_file_prefix,
70  bool cleanup)
71  : MaxCPUSecs(maxCPUSecs),
72  runtimeManager(rm),
73  stop_filename_(stop_file_prefix + ".STOP"),
74  stop_status_(StopStatus::CONTINUE)
75 {
76  if (stop_file_prefix.empty())
77  throw std::runtime_error("Stop control filename prefix must not be empty!");
78 
79  if (cleanup)
80  {
81  std::remove(stop_filename_.c_str());
82  if (std::ifstream(stop_filename_.c_str()))
83  throw std::runtime_error("Failed to delete the existing stop control file \"" + stop_filename_ +
84  "\", cannot continue!");
85  }
86 
87  m_runtime_safety_padding = 30.0; // generous 30 seconds to allow for shut down?
88  m_loop_margin = 1.1; // 10% margin on average loop time?
89 }
90 
91 template<class CLOCK>
93 {
94  m_loop_time = loop_timer.get_time_per_iteration();
95  m_elapsed = runtimeManager.elapsed();
96 
97  if (m_elapsed >= MaxCPUSecs)
98  {
99  stop_status_ = StopStatus::MAX_SECONDS_PASSED;
100  return false;
101  }
102 
103  m_remaining = MaxCPUSecs - m_elapsed;
104  bool enough_time = true;
105  if ((m_loop_margin * m_loop_time + m_runtime_safety_padding) > m_remaining)
106  enough_time = false;
107 
108  stop_status_ = StopStatus::NOT_ENOUGH_TIME;
109  return enough_time;
110 }
111 
112 template<class CLOCK>
114 {
115  if (std::ifstream(stop_filename_.c_str()))
116  {
117  stop_status_ = StopStatus::STOP_FILE;
118  return true;
119  }
120  else
121  return false;
122 }
123 
124 template<class CLOCK>
126 {
127  bool need_to_stop = false;
128  need_to_stop |= !enough_time_for_next_iteration(loop_timer);
129  need_to_stop |= stop_file_requested();
130  return need_to_stop;
131 }
132 
133 template<class CLOCK>
134 std::string RunTimeControl<CLOCK>::generateProgressMessage(const std::string& driverName,
135  int block,
136  int num_blocks) const
137 {
138  std::stringstream log;
139  if (block == 0 || block + 1 == num_blocks / 4 || block + 1 == num_blocks / 2 || block + 1 == (num_blocks * 3) / 4 ||
140  block + 1 == num_blocks)
141  {
142  log << "Completed block " << std::setw(4) << block + 1 << " of " << num_blocks << " average "
143  << std::setprecision(4) << m_loop_time << " secs/block after " << std::setprecision(4) << m_elapsed << " secs"
144  << std::endl;
145  }
146  return log.str();
147 }
148 
149 template<class CLOCK>
150 std::string RunTimeControl<CLOCK>::generateStopMessage(const std::string& driverName, int block) const
151 {
152  std::stringstream log;
153  log << "RunTimeControl takes action in " << driverName << " driver." << std::endl;
154  if (stop_status_ == StopStatus::MAX_SECONDS_PASSED)
155  log << "Time limit reached. Stopping after block " << block << std::endl
156  << "Hard limit (seconds) " << MaxCPUSecs << ", elapsed (seconds) " << m_elapsed << std::endl;
157  else if (stop_status_ == StopStatus::NOT_ENOUGH_TIME)
158  {
159  log << "Insufficient time for next block. Stopping after block " << block << std::endl;
160  log << " Iteration time per " << driverName << " block (seconds) = " << m_loop_time << std::endl;
161  log << " Elapsed time (seconds) = " << m_elapsed << std::endl;
162  log << " Remaining time (seconds) = " << m_remaining << std::endl;
163  }
164  else if (stop_status_ == StopStatus::STOP_FILE)
165  log << "Stop requested from the control file \"" + stop_filename_ + "\", stopping after block " << block
166  << std::endl;
167  else
168  throw std::runtime_error("Unidentified stop status!");
169 
170  return log.str();
171 }
172 
173 template class RunTimeControl<ChronoClock>;
174 template class RunTimeControl<FakeChronoClock>;
175 
176 } // namespace qmcplusplus
std::string generateProgressMessage(const std::string &driverName, int block, int num_blocks) const
generate terse progress messages
RunTimeControl(RunTimeManager< CLOCK > &rm, int maxCPUSecs, const std::string &stop_file_prefix, bool cleanup=true)
constructor
helper functions for EinsplineSetBuilder
Definition: Configuration.h:43
RunTimeManager< ChronoClock > run_time_manager
const char num_blocks[]
Definition: HDFVersion.h:44
std::string generateStopMessage(const std::string &driverName, int block) const
generate stop message explaining why
const std::string stop_filename_
the prefix of the stop file (stop_file_prefix + ".STOP")
bool enough_time_for_next_iteration(LoopTimer< CLOCK > &loop_timer)
MakeReturn< UnaryNode< FnLog, typename CreateLeaf< Vector< T1, C1 > >::Leaf_t > >::Expression_t log(const Vector< T1, C1 > &l)
Class for determining elapsed run time enabling simulations to adjust to time limits.
bool checkStop(LoopTimer< CLOCK > &loop_timer)
check if the run needs to stop because of walltime or stop control file.
double get_time_per_iteration() const