Octopus
debug.F90
Go to the documentation of this file.
1!! Copyright (C) 2016 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
21module debug_oct_m
22 use global_oct_m
24 use mpi_oct_m
26 use loct_oct_m
27 use parser_oct_m
28
29 implicit none
30
31 private
32 public :: &
33 debug_t, &
34 debug_init, &
39 debug, &
41
42 type debug_t
43 private
44 logical, public :: info
45 logical, public :: trace
46 logical, public :: trace_term
47 logical, public :: trace_file
48 logical :: extra_checks
49 logical, public :: interaction_graph
50 logical, public :: interaction_graph_full
51 logical, public :: propagation_graph
52 integer :: bits
53 end type debug_t
54
55 type(debug_t), save :: debug
56
58 integer, parameter :: unit_offset = 1000
59
60contains
61
62 subroutine debug_init(this, namespace)
63 type(debug_t), intent(out) :: this
64 type(namespace_t), intent(in) :: namespace
65
66 character(len=256) :: node_hook
67 logical :: file_exists, mpi_debug_hook
68 integer :: sec, usec
69
70 !%Variable Debug
71 !%Type flag
72 !%Default no
73 !%Section Execution::Debug
74 !%Description
75 !% This variable controls the amount of debugging information
76 !% generated by Octopus. You can use include more than one option
77 !% with the + operator.
78 !%Option no 0
79 !% (default) <tt>Octopus</tt> does not enter debug mode.
80 !%Option info 1
81 !% Octopus prints additional information to the terminal.
82 !%Option trace 2
83 !% Octopus generates a stack trace as it enters end exits
84 !% subroutines. This information is reported if Octopus stops with
85 !% an error.
86 !%Option trace_term 4
87 !% The trace is printed to the terminal as Octopus enters or exits subroutines. This slows down execution considerably.
88 !%Option trace_file 8
89 !% The trace is written to files in the <tt>debug</tt>
90 !% directory. For each node (when running in parallel) there is a file called
91 !% <tt>debug_trace.&lt;rank&gt;</tt>. Writing these files slows down the code by a huge factor and
92 !% it is usually only necessary for parallel runs.
93 !%Option extra_checks 16
94 !% This enables Octopus to perform some extra checks, to ensure
95 !% code correctness, that might be too costly for regular runs.
96 !%Option interaction_graph 32
97 !% Octopus generates a dot file containing the graph for a multisystem run.
98 !%Option interaction_graph_full 64
99 !% Octopus generates a dot file containing the graph for a multisystem run including ghost interactions.
100 !%Option propagation_graph 128
101 !% Octopus generates a file with information for the propagation diagram.
102 !%End
103 call parse_variable(namespace, 'Debug', option__debug__no, this%bits)
104
105 call from_bits(this)
107 call mpi_debug_init(mpi_world%rank, this%info)
108
109 if (this%info) then
110 !%Variable MPIDebugHook
111 !%Type logical
112 !%Default no
113 !%Section Execution::Debug
114 !%Description
115 !% When debugging the code in parallel it is usually difficult to find the origin
116 !% of race conditions that appear in MPI communications. This variable introduces
117 !% a facility to control separate MPI processes. If set to yes, all nodes will
118 !% start up, but will get trapped in an endless loop. In every cycle of the loop
119 !% each node is sleeping for one second and is then checking if a file with the
120 !% name <tt>node_hook.xxx</tt> (where <tt>xxx</tt> denotes the node number) exists. A given node can
121 !% only be released from the loop if the corresponding file is created. This allows
122 !% to selectively run, <i>e.g.</i>, a compute node first followed by the master node. Or, by
123 !% reversing the file creation of the node hooks, to run the master first followed
124 !% by a compute node.
125 !%End
126 call parse_variable(global_namespace, 'MPIDebugHook', .false., mpi_debug_hook)
127 if (mpi_debug_hook) then
128 call loct_gettimeofday(sec, usec)
129 call epoch_time_diff(sec,usec)
130 write(stdout,'(a,i6,a,i6.6,20x,a)') '* I ',sec,'.',usec,' | MPI debug hook'
132 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' In debug hook'
133 write(node_hook,'(i3.3)') mpi_world%rank
134 file_exists = .false.
136 do while (.not. file_exists)
137 inquire(file='node_hook.'//node_hook, exist=file_exists)
138 call loct_nanosleep(1,0)
139 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, &
140 ' - still sleeping. To release me touch: node_hook.'//trim(node_hook)
141 end do
142
143 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' Leaving debug hook'
144 ! remove possible debug hooks
145 call loct_rm('node_hook.'//trim(node_hook))
146
147 call loct_gettimeofday(sec, usec)
148 call epoch_time_diff(sec,usec)
149 write(stdout,'(a,i6,a,i6.6,20x,a)') '* O ', sec, '.', usec,' | MPI debug hook'
150 end if
151 end if
152
153 end subroutine debug_init
154
155 !--------------------------------------------------
156
157 subroutine debug_enable(this)
158 type(debug_t), intent(inout) :: this
159
160 this%info = .true.
161 this%trace = .true.
162 this%trace_term = .true.
163 this%trace_file = .true.
164 this%interaction_graph = .true.
165 this%interaction_graph_full = .true.
166 this%propagation_graph = .true.
167
168 end subroutine debug_enable
169
170 !--------------------------------------------------
171
172 subroutine debug_disable(this)
173 type(debug_t), intent(inout) :: this
174
175 call from_bits(this)
176
177 end subroutine debug_disable
178
179 !--------------------------------------------------
180
181 subroutine debug_delete_trace()
182
183 integer :: iunit
184 character(len=6) :: filenum
185
186 iunit = mpi_world%rank + unit_offset
187 write(filenum, '(i6.6)') iunit - unit_offset
188 call loct_mkdir('debug')
189 call loct_rm('debug/debug_trace.node.'//filenum)
190
191 end subroutine debug_delete_trace
192
193 ! ---------------------------------------------------------
194
195 subroutine debug_open_trace(iunit)
196 integer, intent(out) :: iunit
197
198 character(len=6) :: filenum
199
200 iunit = mpi_world%rank + unit_offset
201 write(filenum, '(i6.6)') iunit - unit_offset
202 call loct_mkdir('debug')
203 open(iunit, file = 'debug/debug_trace.node.'//filenum, &
204 action='write', status='unknown', position='append')
205
206 end subroutine debug_open_trace
207
208 ! ---------------------------------------------------------
209
210 subroutine from_bits(this)
211 type(debug_t), intent(inout) :: this
212
213 this%info = (bitand(this%bits, option__debug__info) /= 0)
214 this%trace_term = (bitand(this%bits, option__debug__trace_term) /= 0)
215 this%trace_file = (bitand(this%bits, option__debug__trace_file) /= 0)
216 this%trace = (bitand(this%bits, option__debug__trace) /= 0) .or. this%trace_term .or. this%trace_file
217 this%extra_checks = (bitand(this%bits, option__debug__extra_checks) /= 0) .or. this%trace_term .or. this%trace_file
218 this%interaction_graph = (bitand(this%bits, option__debug__interaction_graph) /= 0)
219 this%interaction_graph_full = (bitand(this%bits, option__debug__interaction_graph_full) /= 0)
220 this%propagation_graph = (bitand(this%bits, option__debug__propagation_graph) /= 0)
221
222 end subroutine from_bits
223
224
225 ! ---------------------------------------------------------
226 subroutine epoch_time_diff(sec, usec)
227 integer, intent(inout) :: sec
228 integer, intent(inout) :: usec
229
230 ! this is called by push/pop so there cannot be a push/pop in this routine
231
232 call time_diff(s_epoch_sec, s_epoch_usec, sec, usec)
233 end subroutine epoch_time_diff
234
235
236 ! ---------------------------------------------------------
239 subroutine time_diff(sec1, usec1, sec2, usec2)
240 integer, intent(in) :: sec1
241 integer, intent(in) :: usec1
242 integer, intent(inout) :: sec2
243 integer, intent(inout) :: usec2
244
245 ! this is called by push/pop so there cannot be a push/pop in this routine
246
247 ! Correct overflow.
248 if (usec2 - usec1 < 0) then
249 usec2 = 1000000 + usec2
250 if (sec2 >= sec1) then
251 sec2 = sec2 - 1
252 end if
253 end if
254
255 ! Replace values.
256 if (sec2 >= sec1) then
257 sec2 = sec2 - sec1
258 end if
259 usec2 = usec2 - usec1
260
261 end subroutine time_diff
262
263end module debug_oct_m
264
265!! Local Variables:
266!! mode: f90
267!! coding: utf-8
268!! End:
File-handling.
Definition: loct.F90:208
subroutine, public debug_enable(this)
Definition: debug.F90:243
type(debug_t), save, public debug
Definition: debug.F90:140
subroutine, public debug_open_trace(iunit)
Definition: debug.F90:281
subroutine, public epoch_time_diff(sec, usec)
Definition: debug.F90:312
subroutine from_bits(this)
Definition: debug.F90:296
subroutine, public debug_init(this, namespace)
Definition: debug.F90:148
subroutine, public debug_disable(this)
Definition: debug.F90:258
subroutine time_diff(sec1, usec1, sec2, usec2)
Computes t2 <- t2-t1. sec1,2 and usec1,2 are seconds,microseconds of t1,2.
Definition: debug.F90:325
subroutine, public debug_delete_trace()
Definition: debug.F90:267
integer, public s_epoch_sec
global epoch time (time at startup)
Definition: global.F90:209
integer, public s_epoch_usec
Definition: global.F90:209
subroutine, public mpi_debug_init(rank, info)
Definition: mpi_debug.F90:177
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:247
type(namespace_t), public global_namespace
Definition: namespace.F90:135
int true(void)