Octopus
debug.F90
Go to the documentation of this file.
1!! Copyright (C) 2016 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
21module debug_oct_m
22 use global_oct_m
24 use mpi_oct_m
26 use loct_oct_m
27 use parser_oct_m
28
29 implicit none
30
31 private
32 public :: &
33 debug_t, &
34 debug_init, &
39 debug, &
44
45 type debug_t
46 private
47 logical, public :: info
48 logical, public :: trace
49 logical, public :: trace_term
50 logical, public :: trace_file
51 logical :: extra_checks
52 logical, public :: interaction_graph
53 logical, public :: interaction_graph_full
54 logical, public :: propagation_graph
55 integer :: bits
56 end type debug_t
57
58 type(debug_t), save :: debug
59
61 integer, parameter :: unit_offset = 1000
62
63contains
64
65 subroutine debug_init(this, namespace)
66 type(debug_t), intent(out) :: this
67 type(namespace_t), intent(in) :: namespace
68
69 character(len=256) :: node_hook
70 logical :: file_exists, mpi_debug_hook
71 integer :: sec, usec
72
73 !%Variable Debug
74 !%Type flag
75 !%Default no
76 !%Section Execution::Debug
77 !%Description
78 !% This variable controls the amount of debugging information
79 !% generated by Octopus. You can use include more than one option
80 !% with the + operator.
81 !%Option no 0
82 !% (default) <tt>Octopus</tt> does not enter debug mode.
83 !%Option info 1
84 !% Octopus prints additional information to the terminal.
85 !%Option trace 2
86 !% Octopus generates a stack trace as it enters end exits
87 !% subroutines. This information is reported if Octopus stops with
88 !% an error.
89 !%Option trace_term 4
90 !% The trace is printed to the terminal as Octopus enters or exits subroutines. This slows down execution considerably.
91 !%Option trace_file 8
92 !% The trace is written to files in the <tt>debug</tt>
93 !% directory. For each node (when running in parallel) there is a file called
94 !% <tt>debug_trace.&lt;rank&gt;</tt>. Writing these files slows down the code by a huge factor and
95 !% it is usually only necessary for parallel runs.
96 !%Option extra_checks 16
97 !% This enables Octopus to perform some extra checks, to ensure
98 !% code correctness, that might be too costly for regular runs.
99 !%Option interaction_graph 32
100 !% Octopus generates a dot file containing the graph for a multisystem run.
101 !%Option interaction_graph_full 64
102 !% Octopus generates a dot file containing the graph for a multisystem run including ghost interactions.
103 !%Option propagation_graph 128
104 !% Octopus generates a file with information for the propagation diagram.
105 !%End
106 call parse_variable(namespace, 'Debug', option__debug__no, this%bits)
107
108 call from_bits(this)
109
110 call mpi_debug_init(mpi_world%rank, this%info)
111
112 if (this%info) then
113 !%Variable MPIDebugHook
114 !%Type logical
115 !%Default no
116 !%Section Execution::Debug
117 !%Description
118 !% When debugging the code in parallel it is usually difficult to find the origin
119 !% of race conditions that appear in MPI communications. This variable introduces
120 !% a facility to control separate MPI processes. If set to yes, all nodes will
121 !% start up, but will get trapped in an endless loop. In every cycle of the loop
122 !% each node is sleeping for one second and is then checking if a file with the
123 !% name <tt>node_hook.xxx</tt> (where <tt>xxx</tt> denotes the node number) exists. A given node can
124 !% only be released from the loop if the corresponding file is created. This allows
125 !% to selectively run, <i>e.g.</i>, a compute node first followed by the master node. Or, by
126 !% reversing the file creation of the node hooks, to run the master first followed
127 !% by a compute node.
128 !%End
129 call parse_variable(global_namespace, 'MPIDebugHook', .false., mpi_debug_hook)
130 if (mpi_debug_hook) then
131 call loct_gettimeofday(sec, usec)
132 call epoch_time_diff(sec,usec)
133 write(stdout,'(a,i6,a,i6.6,20x,a)') '* I ',sec,'.',usec,' | MPI debug hook'
134
135 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' In debug hook'
136 write(node_hook,'(i3.3)') mpi_world%rank
137 file_exists = .false.
139 do while (.not. file_exists)
140 inquire(file='node_hook.'//node_hook, exist=file_exists)
142 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, &
143 ' - still sleeping. To release me touch: node_hook.'//trim(node_hook)
144 end do
146 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' Leaving debug hook'
147 ! remove possible debug hooks
148 call loct_rm('node_hook.'//trim(node_hook))
149
150 call loct_gettimeofday(sec, usec)
151 call epoch_time_diff(sec,usec)
152 write(stdout,'(a,i6,a,i6.6,20x,a)') '* O ', sec, '.', usec,' | MPI debug hook'
153 end if
154 end if
155
156 end subroutine debug_init
157
158 !--------------------------------------------------
159
160 subroutine debug_enable(this)
161 type(debug_t), intent(inout) :: this
162
163 this%info = .true.
164 this%trace = .true.
165 this%trace_term = .true.
166 this%trace_file = .true.
167 this%interaction_graph = .true.
168 this%interaction_graph_full = .true.
169 this%propagation_graph = .true.
170
171 end subroutine debug_enable
172
173 !--------------------------------------------------
174
175 subroutine debug_disable(this)
176 type(debug_t), intent(inout) :: this
177
178 call from_bits(this)
179
180 end subroutine debug_disable
181
182 !--------------------------------------------------
183
184 subroutine debug_delete_trace()
185
186 integer :: iunit
187 character(len=6) :: filenum
188
189 iunit = mpi_world%rank + unit_offset
190 write(filenum, '(i6.6)') iunit - unit_offset
191 call loct_mkdir('debug')
192 call loct_rm('debug/debug_trace.node.'//filenum)
193
194 end subroutine debug_delete_trace
195
196 ! ---------------------------------------------------------
197
198 subroutine debug_open_trace(iunit)
199 integer, intent(out) :: iunit
200
201 character(len=6) :: filenum
202
203 iunit = mpi_world%rank + unit_offset
204 write(filenum, '(i6.6)') iunit - unit_offset
205 call loct_mkdir('debug')
206 open(iunit, file = 'debug/debug_trace.node.'//filenum, &
207 action='write', status='unknown', position='append')
208
209 end subroutine debug_open_trace
210
211 ! ---------------------------------------------------------
212
213 subroutine from_bits(this)
214 type(debug_t), intent(inout) :: this
215
216 this%info = (bitand(this%bits, option__debug__info) /= 0)
217 this%trace_term = (bitand(this%bits, option__debug__trace_term) /= 0)
218 this%trace_file = (bitand(this%bits, option__debug__trace_file) /= 0)
219 this%trace = (bitand(this%bits, option__debug__trace) /= 0) .or. this%trace_term .or. this%trace_file
220 this%extra_checks = (bitand(this%bits, option__debug__extra_checks) /= 0) .or. this%trace_term .or. this%trace_file
221 this%interaction_graph = (bitand(this%bits, option__debug__interaction_graph) /= 0)
222 this%interaction_graph_full = (bitand(this%bits, option__debug__interaction_graph_full) /= 0)
223 this%propagation_graph = (bitand(this%bits, option__debug__propagation_graph) /= 0)
224
225 end subroutine from_bits
226
227
228 ! ---------------------------------------------------------
229 subroutine epoch_time_diff(sec, usec)
230 integer, intent(inout) :: sec
231 integer, intent(inout) :: usec
232
233 ! this is called by push/pop so there cannot be a push/pop in this routine
234
235 call time_diff(s_epoch_sec, s_epoch_usec, sec, usec)
236 end subroutine epoch_time_diff
237
238
239 ! ---------------------------------------------------------
242 subroutine time_diff(sec1, usec1, sec2, usec2)
243 integer, intent(in) :: sec1
244 integer, intent(in) :: usec1
245 integer, intent(inout) :: sec2
246 integer, intent(inout) :: usec2
247
248 ! this is called by push/pop so there cannot be a push/pop in this routine
249
250 ! Correct overflow.
251 if (usec2 - usec1 < 0) then
252 usec2 = 1000000 + usec2
253 if (sec2 >= sec1) then
254 sec2 = sec2 - 1
255 end if
256 end if
257
258 ! Replace values.
259 if (sec2 >= sec1) then
260 sec2 = sec2 - sec1
261 end if
262 usec2 = usec2 - usec1
263
264 end subroutine time_diff
265
266
267#ifndef NDEBUG
268 ! ---------------------------------------------------------
270 subroutine debug_push_sub(sub_name)
271 character(len=*), intent(in) :: sub_name
272
273 integer, parameter :: max_recursion_level = 50
274 integer iunit, sec, usec
275
276 if (.not. debug%trace) return
278 call loct_gettimeofday(sec, usec)
279 call epoch_time_diff(sec, usec)
280
282 if (no_sub_stack >= max_recursion_level) then
283 sub_stack(max_recursion_level) = 'debug_push_sub'
284 write(stderr, '(a,i3,a)') 'Too many recursion levels in debug trace (max=', max_recursion_level, ')'
285 call mpi_world%abort()
286 stop
287 end if
288
289 sub_stack(no_sub_stack) = trim(debug_clean_path(sub_name))
292 if (debug%trace_file) then
293 call debug_open_trace(iunit)
294 call push_sub_write(iunit)
295 ! close file to ensure flushing
296 close(iunit)
297 end if
298
299 if (debug%trace_term .and. mpi_grp_is_root(mpi_world)) then
300 ! write to stderr if we are node 0
301 call push_sub_write(stderr)
302 end if
303
304 contains
305
306 subroutine push_sub_write(iunit_out)
307 integer, intent(in) :: iunit_out
308
309 integer :: ii
310 character(len=1000) :: tmpstr
311
312 write(tmpstr,'(a,i6,a,i6.6,f20.6,i8,a)') "* I ", &
313 sec, '.', usec, &
314 loct_clock(), &
315 loct_get_memory_usage() / 1024, " | "
316 do ii = no_sub_stack - 1, 1, -1
317 write(tmpstr, '(2a)') trim(tmpstr), "..|"
318 end do
319 write(tmpstr, '(2a)') trim(tmpstr), trim(debug_clean_path(sub_name))
320 write(iunit_out, '(a)') trim(tmpstr)
321
322 end subroutine push_sub_write
323
324 end subroutine debug_push_sub
325
326 ! ---------------------------------------------------------
328 subroutine debug_pop_sub(sub_name)
329 character(len=*), intent(in) :: sub_name
330
331 character(len=80) :: sub_name_short
332 integer iunit, sec, usec
333
334 if (.not. debug%trace) return
336 call loct_gettimeofday(sec, usec)
337 call epoch_time_diff(sec, usec)
338
339 if (no_sub_stack <= 0) then
340 no_sub_stack = 1
341 sub_stack(1) = 'pop_sub'
342 write(stderr, '(a)') 'Too few recursion levels in debug trace'
343 call mpi_world%abort()
344 stop
345 end if
346
347 ! the name might be truncated in sub_stack, so we copy to a string
348 ! of the same size
349 sub_name_short = trim(debug_clean_path(sub_name))
350
351 if (sub_name_short /= sub_stack(no_sub_stack)) then
352 write(stderr, '(a)') 'Wrong sub name on pop_sub :'
353 write(stderr, '(2a)') ' got : ', sub_name_short
354 write(stderr, '(2a)') ' expected : ', sub_stack(no_sub_stack)
355 call mpi_world%abort()
356 stop
357 end if
358
359 if (debug%trace_file) then
360 call debug_open_trace(iunit)
361 call pop_sub_write(iunit)
362 ! close file to ensure flushing
363 close(iunit)
364 end if
365
366 if (debug%trace_term .and. mpi_grp_is_root(mpi_world)) then
367 ! write to stderr if we are node 0
368 call pop_sub_write(stderr)
369 end if
370
372
373 contains
374
375 subroutine pop_sub_write(iunit_out)
376 integer, intent(in) :: iunit_out
377
378 integer :: ii
379 character(len=1000) :: tmpstr
380
381 write(tmpstr,'(a,i6,a,i6.6,f20.6,i8, a)') "* O ", &
382 sec, '.', usec, &
384 loct_get_memory_usage() / 1024, " | "
385 do ii = no_sub_stack - 1, 1, -1
386 write(tmpstr,'(2a)') trim(tmpstr), "..|"
387 end do
388 write(tmpstr,'(2a)') trim(tmpstr), trim(sub_stack(no_sub_stack))
389
390 write(iunit_out, '(a)') trim(tmpstr)
391
392 end subroutine pop_sub_write
393
394 end subroutine debug_pop_sub
395#endif
396
397 ! -----------------------------------------------------------
399 character(len=MAX_PATH_LEN) function debug_clean_path(filename) result(clean_path)
400 character(len=*), intent(in) :: filename
401
402 integer :: pos
403
404 pos = index(filename, 'src/', back = .true.)
405 if (pos == 0) then
406 ! 'src/' does not occur
407 clean_path = filename
408 else
409 ! remove 'src/'
410 clean_path = filename(pos+4:)
411 end if
412
413 end function debug_clean_path
414
415end module debug_oct_m
416
417!! Local Variables:
418!! mode: f90
419!! coding: utf-8
420!! End:
subroutine pop_sub_write(iunit_out)
Definition: debug.F90:469
subroutine push_sub_write(iunit_out)
Definition: debug.F90:400
File-handling.
Definition: loct.F90:216
character(len=max_path_len) function, public debug_clean_path(filename)
Prune a filename path to only include subdirectories of the "src" directory.
Definition: debug.F90:493
subroutine, public debug_enable(this)
Definition: debug.F90:254
type(debug_t), save, public debug
Definition: debug.F90:151
subroutine, public debug_pop_sub(sub_name)
Pop a routine from the debug trace.
Definition: debug.F90:422
subroutine, public debug_open_trace(iunit)
Definition: debug.F90:292
subroutine, public epoch_time_diff(sec, usec)
Definition: debug.F90:323
subroutine from_bits(this)
Definition: debug.F90:307
subroutine, public debug_init(this, namespace)
Definition: debug.F90:159
subroutine, public debug_disable(this)
Definition: debug.F90:269
subroutine time_diff(sec1, usec1, sec2, usec2)
Computes t2 <- t2-t1. sec1,2 and usec1,2 are seconds,microseconds of t1,2.
Definition: debug.F90:336
subroutine, public debug_push_sub(sub_name)
Push a routine to the debug trace.
Definition: debug.F90:364
subroutine, public debug_delete_trace()
Definition: debug.F90:278
integer, public s_epoch_sec
global epoch time (time at startup)
Definition: global.F90:232
integer, public no_sub_stack
Definition: global.F90:237
real(real64), dimension(50), public time_stack
Definition: global.F90:236
character(len=80), dimension(50), public sub_stack
The stack.
Definition: global.F90:235
integer, public s_epoch_usec
Definition: global.F90:232
subroutine, public mpi_debug_init(rank, info)
Definition: mpi_debug.F90:182
logical function mpi_grp_is_root(grp)
Definition: mpi.F90:425
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:262
type(namespace_t), public global_namespace
Definition: namespace.F90:132
int true(void)