Octopus
walltimer.F90
Go to the documentation of this file.
1!! Copyright (C) 2019 M. Lueders, Heiko Appel
2!! Copyright (C) 2021 I-Te Lu
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
25
26#include "global.h"
27
28module walltimer_oct_m
29 use debug_oct_m
30 use global_oct_m
31 use io_oct_m
32 use, intrinsic :: iso_fortran_env
33 use loct_oct_m
35 use mpi_oct_m
37 use parser_oct_m
38
39 implicit none
40
41 private
42
43 real(real64) :: start_time
44 real(real64) :: last_tap
45 real(real64) :: iteration_time
46 real(real64) :: margin
47 real(real64) :: duration
48 real(real64) :: restart_last_tap
49 real(real64) :: restart_duration
50
51 logical :: active
52 logical :: auto_tap
53 logical :: restart_active
54
55 public :: &
62
63contains
65 subroutine walltimer_init(auto)
66 logical, optional, intent(in) :: auto
67
68 real(real64) :: alarm_time, write_time, restart_alarm_time, slurm_start_time, slurm_end_time, slurm_time
69 character(len=128) :: env_start, env_end
70 integer :: status_start, status_end
71 logical :: walltime_slurm
72
73 push_sub(walltimer_init)
74
75 start_time = m_zero
76 last_tap = m_zero
77 iteration_time = m_zero
78 margin = m_zero
79
80 active = .false.
81 auto_tap = optional_default(auto, .true.)
82
83 ! The following have to be moved to the right place, after the names for the variables have been confirmed:
84
85 !%Variable WalltimeSlurm
86 !%Type logical
87 !%Default yes
88 !%Section Execution::IO
89 !%Description
90 !% Parse environment variables SLURM_JOB_START_TIME and SLURM_JOB_END_TIME to get the slurm job
91 !% length. Set this as default for the variable Walltime, i.e., always activate the walltimer
92 !% for slurm jobs. This prevents jobs from running into the timelimit.
93 !%End
94 call parse_variable(global_namespace, 'WalltimeSlurm', .true., walltime_slurm)
95 slurm_time = m_zero
96 ! read environment variables set by slurm to determine the length of the job
97 call get_environment_variable("SLURM_JOB_START_TIME", env_start, status=status_start)
98 call get_environment_variable("SLURM_JOB_END_TIME", env_end, status=status_end)
99 if (status_start == 0 .and. status_end == 0) then
100 read(env_start, *, iostat=status_start) slurm_start_time
101 read(env_end, *, iostat=status_end) slurm_end_time
102 if (status_start == 0 .and. status_end == 0) then
103 ! only set the time if the environment variables are defined and could be read
104 slurm_time = (slurm_end_time - slurm_start_time) / 60.0_real64
105 end if
106 end if
107
108 !%Variable Walltime
109 !%Type float
110 !%Default 0 (or given by the slurm time limit)
111 !%Section Execution::IO
112 !%Description
113 !% Time in minutes before which the restart file will be written. This is to make sure that at least one restart
114 !% file can be written before the code might be killed to to exceeding the given CPU time.
115 !% If a finite time (in minutes) is specified, the code will write the restart file when the next
116 !% iteration (plus the RestartWriteTime) would exceed the given time.
117 !% A value less than 1 second (1/60 minutes) will disable the timer.
118 !% For slurm jobs, this is set by default to the time limit of the job (can be deactivated by setting
119 !% WalltimeSlurm = no).
120 !%End
121 call parse_variable(global_namespace, 'Walltime', slurm_time, alarm_time)
122 call set_alarm(alarm_time*60.0_real64)
124 !%Variable RestartWriteTime
125 !%Type float
126 !%Default 5
127 !%Section Execution::IO
128 !%Description
129 !% The RestartWriteTime (in minutes) will be subtracted from the WallTime to allow time for writing the restart file.
130 !% In huge calculations, this value should be increased.
131 !%End
132 call parse_variable(global_namespace, 'RestartWriteTime', 5.0_real64, write_time)
133 if (write_time > alarm_time/m_four) write_time = alarm_time/m_four
134 call set_margin(write_time*60.0_real64)
135
136 !%Variable RestartWallTimePeriod
137 !%Type float
138 !%Default 120
139 !%Section Execution::IO
140 !%Description
141 !% Period Time (in minutes) at which the restart file will be written.
142 !% If a finite time (in minutes) is specified, the code will write the restart file every period.
143 !%End
144 call parse_variable(global_namespace, 'RestartWallTimePeriod', 120.0_real64, restart_alarm_time)
145 call set_restart_alarm(restart_alarm_time*60.0_real64)
146 restart_active = .false.
148 call start()
149
150 pop_sub(walltimer_init)
151 end subroutine walltimer_init
152
154 subroutine walltimer_end()
155
156 push_sub(walltimer_end)
157
158 active = .false.
159
161 end subroutine walltimer_end
162
164 subroutine set_alarm(time)
165 real(real64) :: time
166
167 push_sub(set_alarm)
168
169 duration = time
170
171 pop_sub(set_alarm)
172 end subroutine set_alarm
173
175 subroutine set_restart_alarm(time)
176 real(real64) :: time
177
178 push_sub(set_restart_alarm)
179
180 restart_duration = time
181
182 pop_sub(set_restart_alarm)
183 end subroutine set_restart_alarm
184
186 subroutine set_margin(time)
187
188 real(real64) :: time
189
190 push_sub(set_margin)
191
192 margin = time
193
194 pop_sub(set_margin)
195 end subroutine set_margin
196
198 subroutine start()
199
200 push_sub(start)
201
202 start_time = loct_clock()
203 last_tap = start_time
204 restart_last_tap = start_time
205
207 if (duration > m_one) active = .true.
209 if (restart_duration > 60.0_real64) restart_active = .true.
210
211 pop_sub(start)
212 end subroutine start
213
215 subroutine walltimer_tap(print)
216 logical, optional, intent(in) :: print
217
218 real(real64) :: now
219
220 push_sub(walltimer_tap)
221
222 now = loct_clock()
223
224 iteration_time = now - last_tap
225 last_tap = now
226
227 if (optional_default(print, .false.)) then
228 write(message(1), '("Walltimer_tap: elapsed time = ",F6.2," (", 3F10.5, "), active = ",L1 )') &
229 now - start_time, duration, iteration_time, margin, active
230 call messages_info(1, all_nodes=.true.)
231 end if
232
233 pop_sub(walltimer_tap)
234 end subroutine walltimer_tap
235
237 logical function walltimer_alarm(comm, print)
238 type(mpi_comm), intent(in) :: comm
239 logical, optional, intent(in) :: print
240
241#ifdef HAVE_MPI
242 logical :: alarm
243#endif
244 real(real64) :: now
245
246 push_sub(walltimer_alarm)
247
248 now = loct_clock()
250 if (optional_default(print, .false.)) then
251 write(message(1), '("Walltimer_alarm: elapsed time = ",F6.2," (", 3F10.5, "), active = ",L1 )') &
252 now - start_time, duration, iteration_time, margin, active
253 call messages_info(1, all_nodes=.true.)
254 end if
255
256 if (auto_tap) call walltimer_tap()
257
258 walltimer_alarm = active .and. (now > start_time + duration - iteration_time - margin)
260 ! All processes need to know that the walltimer was triggered.
261#ifdef HAVE_MPI
262 call mpi_debug_in(comm, c_mpi_allreduce)
263 call mpi_allreduce(walltimer_alarm, alarm, 1, mpi_logical, mpi_lor, comm)
264 call mpi_debug_out(comm, c_mpi_allreduce)
266#endif
267
268 if (walltimer_alarm) then
269 write(message(1), '("Walltimer stopping execution after = ",F6.2," minutes.")') (now - start_time)/60.0_real64
271 ! Switch status to indicate that the walltimer aborted the calculation.
272 ! This can be used to communicate the status of octopus to a queuing system and to daisy-chain jobs.
273 call messages_switch_status('walltimer-aborted')
274 end if
275
276 pop_sub(walltimer_alarm)
277 end function walltimer_alarm
278
279 logical function restart_walltime_period_alarm(comm)
280 type(mpi_comm), intent(in) :: comm
282#ifdef HAVE_MPI
283 logical :: alarm
284#endif
285 real(real64) :: now
286
288
289 now = loct_clock()
290
291 restart_walltime_period_alarm = restart_active .and. (now > restart_last_tap + restart_duration)
292
293 ! All processes need to know that the walltimer was triggered.
294#ifdef HAVE_MPI
295 call mpi_debug_in(comm, c_mpi_allreduce)
296 call mpi_allreduce(restart_walltime_period_alarm, alarm, 1, mpi_logical, mpi_lor, comm)
297 call mpi_debug_out(comm, c_mpi_allreduce)
299#endif
300
302 restart_last_tap = now
303 write(message(1), '(a)') "Restart walltime period is reached: writing restart data"
304 call messages_info(1)
305 end if
306
309
311 function walltimer_get_start_time() result(t)
312 real(real64) :: t
313 t = start_time
314 end function walltimer_get_start_time
315end module walltimer_oct_m
316
317!! Local Variables:
318!! mode: f90
319!! coding: utf-8
320!! End:
unsigned int alarm(unsigned int __seconds) __attribute__((__nothrow__
real(real64), parameter, public m_zero
Definition: global.F90:190
real(real64), parameter, public m_four
Definition: global.F90:194
real(real64), parameter, public m_one
Definition: global.F90:191
Definition: io.F90:116
subroutine, public messages_switch_status(status)
create status file for asynchronous communication
Definition: messages.F90:651
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:600
type(namespace_t), public global_namespace
Definition: namespace.F90:134
This module provices a simple timer class which can be used to trigger the writing of a restart file ...
Definition: walltimer.F90:123
subroutine, public walltimer_init(auto)
initialize the timer
Definition: walltimer.F90:161
subroutine, public walltimer_end()
destructor
Definition: walltimer.F90:250
subroutine set_alarm(time)
set alarm interval in seconds
Definition: walltimer.F90:260
logical function, public walltimer_alarm(comm, print)
indicate whether time is up
Definition: walltimer.F90:333
real(real64) function, public walltimer_get_start_time()
Return the walltimer start time.
Definition: walltimer.F90:407
logical function, public restart_walltime_period_alarm(comm)
Definition: walltimer.F90:375
subroutine start()
start the timer (save starting time)
Definition: walltimer.F90:294
subroutine, public walltimer_tap(print)
measure time of one iteration
Definition: walltimer.F90:311
subroutine set_restart_alarm(time)
set restart alarm interval in seconds
Definition: walltimer.F90:271
subroutine set_margin(time)
set safty margin in seconds
Definition: walltimer.F90:282
int true(void)