Octopus
profiling.F90
Go to the documentation of this file.
1!! Copyright (C) 2005-2009 Heiko Appel, Florian Lorenzen, Xavier Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
54 !*/
55module profiling_oct_m
56 use debug_oct_m
57 use global_oct_m
58 use io_oct_m
59 use loct_oct_m
61 use mpi_oct_m
62 use parser_oct_m
64 use nvtx_oct_m
65 use sort_oct_m
66 use string_oct_m
67 use types_oct_m
69
70 implicit none
71 private
72
73 public :: &
74 profile_t, &
87
88 integer, parameter :: &
89 LABEL_LENGTH = 25, & !< Max. number of characters of tag label.
90 max_profiles = 200
91
92 type profile_t
93 private
94 character(LABEL_LENGTH) :: label
95 float :: entry_time
96 float :: total_time
97 float :: min_time
98 float :: self_time
99 float :: op_count_current
100 float :: op_count
101 float :: op_count_child
102 float :: op_count_child_current
103 float :: tr_count_current
104 float :: tr_count
105 float :: tr_count_child
106 float :: tr_count_child_current
107 type(profile_t), pointer :: parent
108 integer :: count
109 logical :: initialized = .false.
110 logical :: active = .false.
111 logical :: exclude
112 integer :: index
113 logical :: has_child(MAX_PROFILES)
114 float :: timings(max_profiles)
115 end type profile_t
116
118 private
119 type(profile_t), pointer :: p
120 end type profile_pointer_t
121
123 module procedure &
131 module procedure &
139 end interface profiling_count_transfers
140
142 module procedure iprofiling_count_operations
143 module procedure rprofiling_count_operations
144 module procedure dprofiling_count_operations
146
147 integer, parameter, public :: &
154 integer, parameter :: max_memory_vars = 25
157 private
158 integer, public :: mode
160 type(profile_pointer_t) :: current
161 type(profile_pointer_t) :: profile_list(max_profiles)
162 integer :: last_profile
164 integer(i8) :: alloc_count
165 integer(i8) :: dealloc_count
167 integer(i8) :: memory_limit
168 integer(i8) :: total_memory
169 integer(i8) :: max_memory
170 character(len=256) :: max_memory_location
171
172 integer(i8) :: large_vars_size(max_memory_vars)
173 character(len=256) :: large_vars(max_memory_vars)
174
175 float :: start_time
176 integer :: mem_iunit
177
178 character(len=256) :: output_dir
179 character(len=6) :: file_number
180
181 logical :: all_nodes
182
183 logical :: output_yaml
184 logical :: output_tree
185 end type profile_vars_t
186
187 type(profile_vars_t), target, public :: prof_vars
188
192
193 type(profile_t), save, public :: C_PROFILING_COMPLETE_RUN
195contains
196
197 ! ---------------------------------------------------------
199 subroutine profiling_init(namespace)
200 type(namespace_t), intent(in) :: namespace
201
202 integer :: ii
203
205
206 ! FIXME: nothing is thread-safe here!
208 !%Variable ProfilingMode
209 !%Default no
210 !%Type integer
211 !%Section Execution::Optimization
212 !%Description
213 !% Use this variable to run <tt>Octopus</tt> in profiling mode. In this mode
214 !% <tt>Octopus</tt> records the time spent in certain areas of the code and
215 !% the number of times this code is executed. These numbers
216 !% are written in <tt>./profiling.NNN/profiling.nnn</tt> with <tt>nnn</tt> being the
217 !% node number (<tt>000</tt> in serial) and <tt>NNN</tt> the number of processors.
218 !% This is mainly for development purposes. Note, however, that
219 !% <tt>Octopus</tt> should be compiled with <tt>--disable-debug</tt> to do proper
220 !% profiling. Warning: you may encounter strange results with OpenMP.
221 !%Option no 0
222 !% No profiling information is generated.
223 !%Option prof_time 1
224 !% Profile the time spent in defined profiling regions.
225 !%Option prof_memory 2
226 !% As well as the time, summary information on memory usage and the largest arrays are reported.
227 !%Option prof_memory_full 4
228 !% As well as the time and summary memory information, a
229 !% log is reported of every allocation and deallocation.
230 !%Option likwid 8
231 !% Enable instrumentation using LIKWID.
232 !%Option prof_io 16
233 !% Count the number of file open and close.
234 !%End
235
236 call parse_variable(namespace, 'ProfilingMode', 0, prof_vars%mode)
237 if (.not. varinfo_valid_option('ProfilingMode', prof_vars%mode)) then
238 call messages_input_error(namespace, 'ProfilingMode')
239 end if
241 in_profiling_mode = (prof_vars%mode > 0)
242 if (.not. in_profiling_mode) then
244 return
245 end if
247 !%Variable ProfilingAllNodes
248 !%Default no
249 !%Type logical
250 !%Section Execution::Optimization
251 !%Description
252 !% This variable controls whether all nodes print the time
253 !% profiling output. If set to no, the default, only the root node
254 !% will write the profile. If set to yes, all nodes will print it.
255 !%End
256
257 call parse_variable(namespace, 'ProfilingAllNodes', .false., prof_vars%all_nodes)
258
259 call get_output_dir()
260
261 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
262 prof_vars%mode = ior(prof_vars%mode, profiling_memory)
263 end if
264
265 ! initialize memory profiling
266 if (bitand(prof_vars%mode, profiling_memory) /= 0) then
267 prof_vars%alloc_count = 0
268 prof_vars%dealloc_count = 0
269
270 prof_vars%total_memory = 0
271 prof_vars%max_memory = 0
272 prof_vars%max_memory_location = ''
273 prof_vars%start_time = loct_clock()
274
275 prof_vars%large_vars_size(:) = 0
276 prof_vars%large_vars(:) = ''
277
278 !%Variable MemoryLimit
279 !%Default -1
280 !%Type integer
281 !%Section Execution::Optimization
282 !%Description
283 !% If positive, <tt>Octopus</tt> will stop if more memory than <tt>MemoryLimit</tt>
284 !% is requested (in kb). Note that this variable only works when
285 !% <tt>ProfilingMode = prof_memory(_full)</tt>.
286 !%End
287 call parse_variable(namespace, 'MemoryLimit', -1, ii)
288 prof_vars%memory_limit = int(ii, 8)*1024
289 end if
290
291 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
292 ! make sure output directory is available before other processes try to write there
293 call mpi_world%barrier()
294
295 prof_vars%mem_iunit = io_open(trim(prof_vars%output_dir)//'/memory.'//prof_vars%file_number, &
296 namespace, action='write')
297 write(prof_vars%mem_iunit, '(5a16,a70)') 'Elapsed Time', 'Alloc/Dealloc', 'Size (words)', 'Prof Mem', &
298 'Sys Mem', 'Variable Name(Filename:Line)'
299 end if
300
301 ! initialize time profiling
302 prof_vars%last_profile = 0
303 nullify(prof_vars%current%p)
304
305 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
306#ifdef HAVE_LIKWID
307 call likwid_markerinit()
308#endif
309 end if
310
311 !%Variable ProfilingOutputYAML
312 !%Default no
313 !%Type logical
314 !%Section Execution::Optimization
315 !%Description
316 !% This variable controls whether the profiling output is additionally
317 !% written to a YAML file.
318 !%End
319 call parse_variable(namespace, 'ProfilingOutputYAML', .false., prof_vars%output_yaml)
320
321 !%Variable ProfilingOutputTree
322 !%Default yes
323 !%Type logical
324 !%Section Execution::Optimization
325 !%Description
326 !% This variable controls whether the profiling output is additionally
327 !% written as a tree.
328 !%End
329 call parse_variable(namespace, 'ProfilingOutputTree', .true., prof_vars%output_tree)
330
331 call profiling_in(c_profiling_complete_run, 'COMPLETE_RUN')
332
334
335 contains
336
337 ! ---------------------------------------------------------
338 subroutine get_output_dir()
339
341
342 write(prof_vars%file_number, '(i6.6)') mpi_world%rank
343
344 prof_vars%output_dir = 'profiling'
345
346 if (mpi_grp_is_root(mpi_world)) call io_mkdir(trim(prof_vars%output_dir), namespace)
347
349 end subroutine get_output_dir
350
351 end subroutine profiling_init
352
353
354 ! ---------------------------------------------------------
355 subroutine profiling_end(namespace)
356 type(namespace_t), intent(in) :: namespace
357 integer :: ii
358 float, parameter :: megabyte = cnst(1048576.0)
359 integer(i8) :: io_open_count, io_close_count
360 integer(i8) :: io_open_count_red, io_close_count_red
361
362 if (.not. in_profiling_mode) return
364
365 call profiling_out(c_profiling_complete_run)
366 call profiling_output(namespace)
367
368 do ii = 1, prof_vars%last_profile
369 prof_vars%profile_list(ii)%p%initialized = .false.
370 end do
371
372 if (bitand(prof_vars%mode, profiling_memory) /= 0) then
373 call messages_print_with_emphasis(msg="Memory profiling information", namespace=namespace)
374 write(message(1), '(a,i10)') 'Number of allocations = ', prof_vars%alloc_count
375 write(message(2), '(a,i10)') 'Number of deallocations = ', prof_vars%dealloc_count
376 write(message(3), '(a,f18.3,a)') 'Maximum total memory allocated = ', prof_vars%max_memory/megabyte, ' Mbytes'
377 write(message(4), '(2x,a,a)') 'at ', trim(prof_vars%max_memory_location)
378 call messages_info(4)
379
380 message(1) = ''
381 message(2) = 'Largest variables allocated:'
382 call messages_info(2)
383 do ii = 1, max_memory_vars
384 write(message(1),'(i2,f18.3,2a)') ii, prof_vars%large_vars_size(ii)/megabyte, ' Mbytes ', trim(prof_vars%large_vars(ii))
385 call messages_info(1)
386 end do
387
388 call messages_print_with_emphasis(namespace=namespace)
389
390 if (prof_vars%alloc_count /= prof_vars%dealloc_count) then
391 write(message(1),'(a,i10,a,i10,a)') "Not all memory was deallocated: ", prof_vars%alloc_count, &
392 ' allocations and ', prof_vars%dealloc_count, ' deallocations'
393 call messages_warning(1, all_nodes = .true.)
394 end if
395 if (prof_vars%total_memory > 0) then
396 write(message(1),'(a,f18.3,a,f18.3,a)') "Remaining allocated memory: ", prof_vars%total_memory/megabyte, &
397 ' Mbytes (out of maximum ', prof_vars%max_memory/megabyte, ' Mbytes)'
398 call messages_warning(1, all_nodes = .true.)
399 end if
400 end if
401
402 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
403 call io_close(prof_vars%mem_iunit)
404 end if
405
406 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
407#ifdef HAVE_LIKWID
408 call likwid_markerclose()
409#endif
410 end if
411
412 if (bitand(prof_vars%mode, profiling_io) /= 0) then
413 call messages_print_with_emphasis(msg="IO profiling information", namespace=namespace)
416 write(message(1), '(a,i10)') 'Number of file open = ', io_open_count
417 write(message(2), '(a,i10)') 'Number of file close = ', io_close_count
418 call mpi_world%allreduce(io_open_count, io_open_count_red, 1, mpi_integer8, mpi_sum)
419 call mpi_world%allreduce(io_close_count, io_close_count_red, 1, mpi_integer8, mpi_sum)
420 write(message(3), '(a,i10)') 'Global number of file open = ', io_open_count_red
421 write(message(4), '(a,i10)') 'Global number of file close = ', io_close_count_red
422 call messages_info(4)
423 call messages_print_with_emphasis(namespace=namespace)
424 end if
425
427 end subroutine profiling_end
428
429
430 ! ---------------------------------------------------------
432 subroutine profile_init(this, label)
433 type(profile_t), target, intent(out) :: this
434 character(*), intent(in) :: label
435
436 integer :: iprofile
437
439
440 if(len(label) > label_length) then
441 message(1) = "Label " // trim(label) // " is too long for the internal profiler"
442 call messages_fatal(1)
443 end if
444
445 this%label = label
446 this%total_time = m_zero
447 this%min_time = m_huge
448 this%self_time = m_zero
449 this%entry_time = huge(this%entry_time)
450 this%count = 0
451 this%op_count_current = m_zero
452 this%op_count = m_zero
453 this%op_count_child = m_zero
454 this%tr_count_current = m_zero
455 this%tr_count = m_zero
456 this%tr_count_child = m_zero
457 this%active = .false.
458 nullify(this%parent)
459 this%has_child = .false.
460 this%timings = m_zero
461 this%index = 0
462
463 if (.not. in_profiling_mode) then
465 return
466 end if
467
468 prof_vars%last_profile = prof_vars%last_profile + 1
469
470 assert(prof_vars%last_profile <= max_profiles)
471
472 prof_vars%profile_list(prof_vars%last_profile)%p => this
473 this%index = prof_vars%last_profile
474 this%initialized = .true.
475
476 ! print out a warning if a name is used more than once
477 do iprofile = 1, prof_vars%last_profile - 1
478 if (prof_vars%profile_list(iprofile)%p%label == this%label) then
479 message(1) = "Label "//label//" used more than once."
480 call messages_fatal(1)
481 exit
482 end if
483 end do
484
486 end subroutine profile_init
487
488
489 ! ---------------------------------------------------------
490 logical function profile_is_initialized(this)
491 type(profile_t), intent(in) :: this
492
494 profile_is_initialized = this%initialized
495
497 end function profile_is_initialized
498
499
500 ! ---------------------------------------------------------
503 subroutine profiling_in(this, label, exclude)
504 type(profile_t), target, intent(inout) :: this
505 character(*), intent(in) :: label
506 logical, optional, intent(in) :: exclude
508
509 float :: now
510
511 if (.not. in_profiling_mode) return
512 if (.not. not_in_openmp()) return
513
514 ! no PUSH_SUB, called too often
515
516 if (.not. this%initialized) then
517 call profile_init(this, label)
518 end if
519
520 assert(.not. this%active)
521 this%active = .true.
522#if defined(HAVE_MPI)
523 now = mpi_wtime()
524#else
525 now = loct_clock()
526#endif
527 if (associated(prof_vars%current%p)) then
528 !keep a pointer to the parent
529 this%parent => prof_vars%current%p
530 this%parent%has_child(this%index) = .true.
531 else
532 !we are orphans
533 nullify(this%parent)
534 end if
535
536 this%op_count_current = m_zero
537 this%tr_count_current = m_zero
538 this%op_count_child_current = m_zero
539 this%tr_count_child_current = m_zero
540
541 prof_vars%current%p => this
542 this%entry_time = now
544 this%exclude = optional_default(exclude, .false.)
545
546 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
547#ifdef HAVE_LIKWID
548 call likwid_markerstartregion(trim(label))
549#endif
550 end if
551
552#ifdef HAVE_NVTX
553 call nvtx_range_push(trim(label), this%index)
554#endif
555
556 end subroutine profiling_in
557
558
559 ! ---------------------------------------------------------
563 subroutine profiling_out(this)
564 type(profile_t), intent(inout) :: this
565
566 float :: now, time_spent
567
568 if (.not. in_profiling_mode) return
569 if (.not. not_in_openmp()) return
570
571 ! no PUSH_SUB, called too often
572
573 assert(this%initialized)
574 assert(this%active)
575 this%active = .false.
576#if defined(HAVE_MPI)
577 now = mpi_wtime()
578#else
579 now = loct_clock()
580#endif
581
582 time_spent = now - this%entry_time
583 this%total_time = this%total_time + time_spent
584 this%self_time = this%self_time + time_spent
585 this%count = this%count + 1
586 if (time_spent < this%min_time) then
587 this%min_time = time_spent
588 end if
589
590 this%op_count = this%op_count + this%op_count_current
591 this%tr_count = this%tr_count + this%tr_count_current
592 this%op_count_child = this%op_count_child + this%op_count_child_current
593 this%tr_count_child = this%tr_count_child + this%tr_count_child_current
594
595 if (associated(this%parent)) then
596 !remove the spent from the self time of our parent
597 this%parent%self_time = this%parent%self_time - time_spent
598 if (this%exclude) this%parent%total_time = this%parent%total_time - time_spent
599
600 ! add the operations to the parent
601 this%parent%op_count_child_current = this%parent%op_count_child_current &
602 + this%op_count_current + this%op_count_child_current
603 this%parent%tr_count_child_current = this%parent%tr_count_child_current &
604 + this%tr_count_current + this%tr_count_child_current
605
606 this%parent%timings(this%index) = this%parent%timings(this%index) + time_spent
607
608 !and set parent as current
609 prof_vars%current%p => this%parent
610
611 else
612 nullify(prof_vars%current%p)
613 end if
614
615 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
616#ifdef HAVE_LIKWID
617 call likwid_markerstopregion(trim(this%label))
618#endif
619 end if
620
621#ifdef HAVE_NVTX
622 call nvtx_range_pop()
623#endif
624
625 end subroutine profiling_out
626
627
628 ! ---------------------------------------------------------
629
630 subroutine iprofiling_count_operations(ops)
631 integer, intent(in) :: ops
632
633 if (.not. in_profiling_mode) return
634 ! no PUSH_SUB, called too often
635
636 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + tofloat(ops)
637 end subroutine iprofiling_count_operations
638
639
640 ! ---------------------------------------------------------
641
642 subroutine rprofiling_count_operations(ops)
643 real(4), intent(in) :: ops
644
645 if (.not. in_profiling_mode) return
646 ! no PUSH_SUB, called too often
647
648 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + tofloat(ops)
649 end subroutine rprofiling_count_operations
650
651
652 ! ---------------------------------------------------------
653
654 subroutine dprofiling_count_operations(ops)
655 real(r8), intent(in) :: ops
656
657 if (.not. in_profiling_mode) return
658 ! no PUSH_SUB, called too often
659
660 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + ops
661
662 end subroutine dprofiling_count_operations
663
664
665 ! ---------------------------------------------------------
666
667 subroutine profiling_count_tran_int_l(trf, type)
668 integer(i8), intent(in) :: trf
669 integer, intent(in) :: type
670
671 if (.not. in_profiling_mode) return
672 ! no PUSH_SUB, called too often
673
674 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
675 end subroutine profiling_count_tran_int_l
676
677
678 ! ---------------------------------------------------------
679
680 subroutine profiling_count_tran_int_8_l(trf, type)
681 integer(i8), intent(in) :: trf
682 integer(i8), intent(in) :: type
684 if (.not. in_profiling_mode) return
685 ! no PUSH_SUB, called too often
686
687 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
688 end subroutine profiling_count_tran_int_8_l
689
690
691 ! ---------------------------------------------------------
692
693 subroutine profiling_count_tran_real_4_l(trf, type)
694 integer(i8), intent(in) :: trf
695 real(4), intent(in) :: type
696
697 if (.not. in_profiling_mode) return
698 ! no PUSH_SUB, called too often
699
700 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
701
702 end subroutine profiling_count_tran_real_4_l
703
704
705 ! ---------------------------------------------------------
706
707 subroutine profiling_count_tran_real_8_l(trf, type)
708 integer(i8), intent(in) :: trf
709 real(r8), intent(in) :: type
710
711 if (.not. in_profiling_mode) return
712 ! no PUSH_SUB, called too often
713
714 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
715
716 end subroutine profiling_count_tran_real_8_l
717
718
719 ! ---------------------------------------------------------
721 subroutine profiling_count_tran_complex_4_l(trf, type)
722 integer(i8), intent(in) :: trf
723 complex(4), intent(in) :: type
724
725 if (.not. in_profiling_mode) return
726 ! no PUSH_SUB, called too often
727
728 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
729
731
732
733 ! ---------------------------------------------------------
734
735 subroutine profiling_count_tran_complex_8_l(trf, type)
736 integer(i8), intent(in) :: trf
737 complex(r8), intent(in) :: type
738
739 if (.not. in_profiling_mode) return
740 ! no PUSH_SUB, called too often
741
742 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 16*tofloat(trf)
743
745
747 ! ---------------------------------------------------------
748
749 subroutine profiling_count_tran_type_l(trf, type)
750 integer(i8), intent(in) :: trf
751 type(type_t), intent(in) :: type
752
753 if (.not. in_profiling_mode) return
754 ! no PUSH_SUB, called too often
755
756 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + tofloat(trf)*types_get_size(type)
757
758 end subroutine profiling_count_tran_type_l
759
761 ! ---------------------------------------------------------
762
763 subroutine profiling_count_tran_int(trf, type)
764 integer, intent(in) :: trf
765 integer, intent(in) :: type
766
767 if (.not. in_profiling_mode) return
768 ! no PUSH_SUB, called too often
769
770 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
771 end subroutine profiling_count_tran_int
772
773
774 ! ---------------------------------------------------------
775
776 subroutine profiling_count_tran_int_8(trf, type)
777 integer, intent(in) :: trf
778 integer(i8), intent(in) :: type
779
780 if (.not. in_profiling_mode) return
781 ! no PUSH_SUB, called too often
782
783 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
784 end subroutine profiling_count_tran_int_8
785
786
787 ! ---------------------------------------------------------
789 subroutine profiling_count_tran_real_4(trf, type)
790 integer, intent(in) :: trf
791 real(4), intent(in) :: type
792
793 if (.not. in_profiling_mode) return
794 ! no PUSH_SUB, called too often
795
796 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
797
798 end subroutine profiling_count_tran_real_4
799
800
801 ! ---------------------------------------------------------
803 subroutine profiling_count_tran_real_8(trf, type)
804 integer, intent(in) :: trf
805 real(r8), intent(in) :: type
806
807 if (.not. in_profiling_mode) return
808 ! no PUSH_SUB, called too often
809
810 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
811
812 end subroutine profiling_count_tran_real_8
813
814
815 ! ---------------------------------------------------------
817 subroutine profiling_count_tran_complex_4(trf, type)
818 integer, intent(in) :: trf
819 complex(4), intent(in) :: type
820
821 if (.not. in_profiling_mode) return
822 ! no PUSH_SUB, called too often
823
824 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
825
826 end subroutine profiling_count_tran_complex_4
827
828
829 ! ---------------------------------------------------------
830
831 subroutine profiling_count_tran_complex_8(trf, type)
832 integer, intent(in) :: trf
833 complex(r8), intent(in) :: type
834
835 if (.not. in_profiling_mode) return
836 ! no PUSH_SUB, called too often
837
838 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 16*tofloat(trf)
839
840 end subroutine profiling_count_tran_complex_8
841
843 ! ---------------------------------------------------------
844
845 subroutine profiling_count_tran_type(trf, type)
846 integer, intent(in) :: trf
847 type(type_t), intent(in) :: type
848
849 if (.not. in_profiling_mode) return
850 ! no PUSH_SUB, called too often
851
852 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + tofloat(trf)*types_get_size(type)
853
854 end subroutine profiling_count_tran_type
855
856 ! ---------------------------------------------------------
857 float function profile_total_time(this)
858 type(profile_t), intent(in) :: this
859
861 profile_total_time = this%total_time
862
864 end function profile_total_time
865
866
867 ! ---------------------------------------------------------
868 float function profile_self_time(this)
869 type(profile_t), intent(in) :: this
872 profile_self_time = this%self_time
873
875 end function profile_self_time
876
877
878 ! ---------------------------------------------------------
879 float function profile_total_time_per_call(this)
880 type(profile_t), intent(in) :: this
881
883 profile_total_time_per_call = this%total_time / tofloat(this%count)
886 end function profile_total_time_per_call
887
888
889 ! ---------------------------------------------------------
890 float function profile_min_time(this)
891 type(profile_t), intent(in) :: this
892
894 profile_min_time = this%min_time
895
897 end function profile_min_time
899
900 ! ---------------------------------------------------------
901 float function profile_self_time_per_call(this)
902 type(profile_t), intent(in) :: this
903
905 profile_self_time_per_call = this%self_time / tofloat(this%count)
906
908 end function profile_self_time_per_call
909
911 ! ---------------------------------------------------------
912 real(r8) function profile_total_throughput(this)
913 type(profile_t), intent(in) :: this
914
915 push_sub(profile_throughput)
916
917 if (this%total_time > epsilon(this%total_time)) then
918 profile_total_throughput = (this%op_count + this%op_count_child)/this%total_time*cnst(1.0e-6)
919 else
920 profile_total_throughput = cnst(0.0)
921 end if
922
923 pop_sub(profile_throughput)
924 end function profile_total_throughput
925
926
927 ! ---------------------------------------------------------
928
929 float function profile_total_bandwidth(this)
930 type(profile_t), intent(in) :: this
931
932 push_sub(profile_bandwidth)
933
934 if (this%total_time > epsilon(this%total_time)) then
935 profile_total_bandwidth = (this%tr_count + this%tr_count_child)/(this%total_time*cnst(1024.0)**2)
936 else
937 profile_total_bandwidth = cnst(0.0)
938 end if
939
940 pop_sub(profile_bandwidth)
941 end function profile_total_bandwidth
942
943 ! ---------------------------------------------------------
944
945 float function profile_self_throughput(this)
946 type(profile_t), intent(in) :: this
947
948 push_sub(profile_throughput)
949
950 if (this%self_time > epsilon(this%self_time)) then
951 profile_self_throughput = this%op_count/this%self_time*cnst(1.0e-6)
952 else
953 profile_self_throughput = cnst(0.0)
954 end if
955
956 pop_sub(profile_throughput)
957 end function profile_self_throughput
958
959 ! ---------------------------------------------------------
960
961 float function profile_self_bandwidth(this)
962 type(profile_t), intent(in) :: this
963
964 push_sub(profile_bandwidth)
966 if (this%self_time > epsilon(this%self_time)) then
967 profile_self_bandwidth = this%tr_count/(this%self_time*cnst(1024.0)**2)
968 else
969 profile_self_bandwidth = cnst(0.0)
970 end if
971
972 pop_sub(profile_bandwidth)
973 end function profile_self_bandwidth
974
975
976 ! ---------------------------------------------------------
977 integer function profile_num_calls(this)
978 type(profile_t), intent(in) :: this
979
980 push_sub(profile_num_calls)
981 profile_num_calls = this%count
983 pop_sub(profile_num_calls)
984 end function profile_num_calls
985
986
987 ! ---------------------------------------------------------
988 character(LABEL_LENGTH) function profile_label(this)
989 type(profile_t), intent(in) :: this
990
991 push_sub(profile_label)
992 profile_label = this%label
993
994 pop_sub(profile_label)
995 end function profile_label
996
997
998 ! ---------------------------------------------------------
1005 subroutine profiling_output(namespace)
1006 type(namespace_t), intent(in) :: namespace
1007
1008 integer :: ii
1009 integer :: iunit
1010 float :: total_time
1011 type(profile_t), pointer :: prof
1012 character(len=256) :: filename
1013 float, allocatable :: selftime(:)
1014 integer, allocatable :: position(:)
1015
1016 if (.not. in_profiling_mode) return
1017 push_sub(profiling_output)
1018
1019 call mpi_world%barrier()
1020
1021 if (.not. prof_vars%all_nodes .and. .not. mpi_grp_is_root(mpi_world)) then
1022 pop_sub(profiling_output)
1023 return
1024 end if
1025
1026 filename = trim(prof_vars%output_dir)//'/time.'//prof_vars%file_number
1027 iunit = io_open(trim(filename), namespace, action='write')
1028 if (iunit < 0) then
1029 message(1) = 'Failed to open file ' // trim(filename) // ' to write profiling results.'
1030 call messages_warning(1)
1031 pop_sub(profiling_output)
1032 return
1033 end if
1034
1035 write(iunit, '(2a)') &
1036 ' CUMULATIVE TIME ', &
1037 ' | SELF TIME'
1038 write(iunit, '(2a)') &
1039 ' ----------------------------------------------------------', &
1040 '----------------|-------------------------------------------------------------'
1041 write(iunit, '(2a)') &
1042 'TAG NUM_CALLS TOTAL_TIME TIME_PER_CALL MIN_TIME ', &
1043 ' MFLOPS MBYTES/S %TIME | TOTAL_TIME TIME_PER_CALL MFLOPS MBYTES/S %TIME'
1044 write(iunit, '(2a)') &
1045 '===================================================================================================', &
1046 '=================|============================================================='
1047
1049
1050 safe_allocate(selftime(1:prof_vars%last_profile))
1051 safe_allocate(position(1:prof_vars%last_profile))
1052
1053 do ii = 1, prof_vars%last_profile
1054 selftime(ii) = -profile_self_time(prof_vars%profile_list(ii)%p)
1055 position(ii) = ii
1056 end do
1057
1058 call sort(selftime, position)
1059
1060 do ii = 1, prof_vars%last_profile
1061 prof => prof_vars%profile_list(position(ii))%p
1062 if (.not. prof%initialized) then
1063 write(message(1),'(a,i6,a)') "Internal error: Profile number ", position(ii), " is not initialized."
1064 call messages_fatal(1)
1065 end if
1066 if (prof%active) then
1067 write(message(1),'(a)') "Internal error: Profile '" // trim(profile_label(prof)) // &
1068 "' is active, i.e. profiling_out was not called."
1069 call messages_warning(1)
1070 end if
1071
1072 if (profile_num_calls(prof) == 0) cycle
1073
1074 write(iunit, '(a,i14,3f16.6,2f10.1,f8.1,a,2f16.6,2f10.1,f8.1)') &
1075 profile_label(prof), &
1076 profile_num_calls(prof), &
1077 profile_total_time(prof), &
1079 profile_min_time(prof), &
1082 profile_total_time(prof)/total_time*cnst(100.0), &
1083 ' | ', &
1084 profile_self_time(prof), &
1087 profile_self_bandwidth(prof), &
1088 profile_self_time(prof)/total_time*cnst(100.0)
1089 end do
1090
1091 call io_close(iunit)
1092
1093 if (prof_vars%output_yaml) then
1094 filename = trim(prof_vars%output_dir)//'/time.'//prof_vars%file_number//'.yaml'
1095 iunit = io_open(trim(filename), namespace, action='write')
1096 if (iunit < 0) then
1097 message(1) = 'Failed to open file ' // trim(filename) // ' to write profiling results.'
1098 call messages_warning(1)
1099 pop_sub(profiling_output)
1100 return
1101 end if
1102 write(iunit, '(2a)') 'schema: [num_calls, total_time, total_throughput, ', &
1103 'total_bandwidth, self_time, self_throughput, self_bandwidth]'
1104 write(iunit, '(a)') 'data:'
1105
1106 do ii = 1, prof_vars%last_profile
1107 prof => prof_vars%profile_list(position(ii))%p
1108 if (profile_num_calls(prof) == 0) cycle
1109 write(iunit, '(a,a,a,i6,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a)') &
1110 ' ', profile_label(prof), ': [', &
1111 profile_num_calls(prof), ', ', &
1112 profile_total_time(prof), ', ', &
1113 profile_total_throughput(prof), ', ', &
1114 profile_total_bandwidth(prof), ', ', &
1115 profile_self_time(prof), ', ', &
1116 profile_self_throughput(prof), ', ', &
1117 profile_self_bandwidth(prof), ']'
1118 end do
1119
1120 call io_close(iunit)
1121 end if
1122
1123 safe_deallocate_a(selftime)
1124 safe_deallocate_a(position)
1125
1126 if (prof_vars%output_tree) then
1127 filename = trim(prof_vars%output_dir)//'/time.'//prof_vars%file_number//'.tree'
1128 iunit = io_open(trim(filename), namespace, action='write')
1129 if (iunit < 0) then
1130 message(1) = 'Failed to open file ' // trim(filename) // ' to write profiling results.'
1131 call messages_warning(1)
1132 pop_sub(profiling_output)
1133 return
1134 end if
1135 write(iunit, '(a40,a11,a11,a12)') &
1136 "Tree level, region ", &
1137 "% of total ", "% of parent", &
1138 " Full time"
1139
1140 ! output of top-level node
1141 write(iunit, '(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1142 repeat('-', 0) // '| ', &
1144 repeat(' ', 15-0-2), &
1145 100.0, "% ", &
1146 100.0, "% ", &
1147 total_time
1148 call output_tree_level(c_profiling_complete_run, 1, total_time, iunit)
1149 write(iunit, '(a)') "// modeline for vim to enable folding (put in ~/.vimrc: set modeline modelineexpr)"
1150 write(iunit, '(a)') "// vim: fdm=expr fde=getline(v\:lnum)=~'.*\|.*'?len(split(getline(v\:lnum))[0])-1\:0"
1151 call io_close(iunit)
1152 end if
1153
1154 pop_sub(profiling_output)
1155 contains
1156 ! Traverse the tree depth-first, pre-order
1157 recursive subroutine output_tree_level(profile, level, total_time, iunit)
1158 type(profile_t), intent(in) :: profile
1159 integer, intent(in) :: level
1160 float, intent(in) :: total_time
1161 integer, intent(in) :: iunit
1162
1163 integer :: ichild, width
1164
1166 width = 15
1167 ! loop over children
1168 do ichild = 1, max_profiles
1169 if (profile%has_child(ichild)) then
1170 ! print out information on current child with the first marker
1171 ! placed according to the level of the tree
1172 write(iunit, '(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1173 repeat('-', level) // '| ', &
1174 profile_label(prof_vars%profile_list(ichild)%p), &
1175 repeat(' ', width-level-2), &
1176 profile%timings(ichild)/total_time * 100, "% ", &
1177 profile%timings(ichild)/profile%total_time * 100, "% ", &
1178 profile%timings(ichild)
1179 call output_tree_level(prof_vars%profile_list(ichild)%p, &
1180 level+1, total_time, iunit)
1181 end if
1182 end do
1184 end subroutine output_tree_level
1185 end subroutine profiling_output
1186
1187
1188 ! ---------------------------------------------------------
1189 subroutine profiling_make_position_str(var, file, line, str)
1190 character(len=*), intent(in) :: var
1191 character(len=*), intent(in) :: file
1192 integer, intent(in) :: line
1193 character(len=*), intent(out) :: str
1194
1195 integer :: ii, jj, nn
1196
1197 ! no push_sub, called too many times
1198
1199 jj = len(var)
1200 if (var(jj:jj) == ')') then
1201 nn = 1
1202 do ii = len(var)-1, 1, -1
1203 jj = ii - 1
1204 if (var(ii:ii) == ')') nn = nn + 1
1205 if (var(ii:ii) == '(') nn = nn - 1
1206 if (nn == 0) exit
1207 end do
1208 if (jj == 0) then
1209 message(1) = "Internal Error in profiling_memory_log"
1210 call messages_fatal(1)
1211 end if
1212 end if
1213 ii = 1
1214 do while (file(ii:ii+2) == "../")
1215 ii = ii + 3
1216 end do
1217 write(str, '(4a,i5,a)') var(1:jj), "(", trim(file(ii:len(file))), ":", line, ")"
1218 call compact(str)
1219
1220 end subroutine profiling_make_position_str
1221
1222
1223 ! ---------------------------------------------------------
1224 subroutine profiling_memory_log(type, var, file, line, size)
1225 character(len=*), intent(in) :: type
1226 character(len=*), intent(in) :: var
1227 character(len=*), intent(in) :: file
1228 integer, intent(in) :: line
1229 integer(i8), intent(in) :: size
1230
1231 character(len=256) :: str
1232 integer(i8) :: mem
1233
1234 ! no push_sub, called too many times
1235
1236 call profiling_make_position_str(var, file, line, str)
1237
1238 ! get number of pages
1239 mem = loct_get_memory_usage()
1240
1241 write(prof_vars%mem_iunit, '(f16.6,a16,3i16,a70)') loct_clock() - prof_vars%start_time, &
1242 trim(type), size, prof_vars%total_memory, mem, trim(str)
1243
1244 end subroutine profiling_memory_log
1245
1246
1247 !-----------------------------------------------------
1248 subroutine profiling_memory_allocate(var, file, line, size_)
1249 character(len=*), intent(in) :: var
1250 character(len=*), intent(in) :: file
1251 integer, intent(in) :: line
1252 integer(i8), intent(in) :: size_
1253
1254 integer :: ii, jj
1255 integer(i8) :: size
1256 character(len=256) :: str
1257
1258 ! no push_sub, called too many times
1259
1260 size = size_ ! make a copy that we can change
1261
1262 prof_vars%alloc_count = prof_vars%alloc_count + 1
1263 prof_vars%total_memory = prof_vars%total_memory + size
1264
1265 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
1266 call profiling_memory_log('A ', var, file, line, size)
1267 end if
1268
1269 if (prof_vars%memory_limit > 0) then
1270 if (prof_vars%total_memory > prof_vars%memory_limit) then
1271 message(1) = "Memory limit set in the input file was passed"
1272 call messages_fatal(1)
1273 end if
1274 end if
1275
1276 if (prof_vars%total_memory > prof_vars%max_memory) then
1277 prof_vars%max_memory = prof_vars%total_memory
1278 call profiling_make_position_str(var, file, line, prof_vars%max_memory_location)
1279 end if
1280
1281 call profiling_make_position_str(var, file, line, str)
1282
1283 ! check if variable is already in stack
1284 do ii = 1, max_memory_vars
1285 if (str == prof_vars%large_vars(ii)) then
1286 if (size > prof_vars%large_vars_size(ii)) then
1287 ! delete variable by moving stack up
1288 do jj = ii, max_memory_vars - 1
1289 prof_vars%large_vars(jj) = prof_vars%large_vars(jj + 1)
1290 prof_vars%large_vars_size(jj) = prof_vars%large_vars_size(jj + 1)
1291 end do
1292 prof_vars%large_vars(max_memory_vars) = ""
1293 prof_vars%large_vars_size(max_memory_vars) = 0
1294 else
1295 ! do not consider this variable any longer
1296 size = -1
1297 end if
1298 exit
1299 end if
1300 end do
1302 do ii = 1, max_memory_vars
1303 if (size > prof_vars%large_vars_size(ii)) then
1304 ! move the stack one position down
1305 do jj = max_memory_vars, ii + 1, -1
1306 prof_vars%large_vars(jj) = prof_vars%large_vars(jj - 1)
1307 prof_vars%large_vars_size(jj) = prof_vars%large_vars_size(jj - 1)
1308 end do
1309 prof_vars%large_vars_size(ii) = size
1310 prof_vars%large_vars(ii) = str
1311 exit
1312 end if
1313 end do
1314
1315 end subroutine profiling_memory_allocate
1316
1317
1318 !-----------------------------------------------------
1319 subroutine profiling_memory_deallocate(var, file, line, size)
1320 character(len=*), intent(in) :: var
1321 character(len=*), intent(in) :: file
1322 integer, intent(in) :: line
1323 integer(i8), intent(in) :: size
1324
1325 ! no push_sub, called too many times
1326
1327 prof_vars%dealloc_count = prof_vars%dealloc_count + 1
1328 prof_vars%total_memory = prof_vars%total_memory - size
1329
1330 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
1331 call profiling_memory_log('D ', var, file, line, -size)
1332 end if
1333
1334 end subroutine profiling_memory_deallocate
1335
1336
1337end module profiling_oct_m
1338
1339!! Local Variables:
1340!! mode: f90
1341!! coding: utf-8
1342!! End:
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
real(8), parameter, public m_huge
Definition: global.F90:188
logical function, public not_in_openmp()
Definition: global.F90:399
logical, public in_profiling_mode
Same for profiling mode.
Definition: global.F90:217
real(8), parameter, public m_zero
Definition: global.F90:170
Definition: io.F90:106
subroutine, public io_close(iunit, grp)
Definition: io.F90:460
integer(i8) pure function, public io_get_open_count()
Definition: io.F90:686
integer(i8), save io_open_count
Definition: io.F90:148
subroutine, public io_mkdir(fname, namespace, parents)
Definition: io.F90:346
integer(i8) pure function, public io_get_close_count()
Definition: io.F90:693
integer(i8), save io_close_count
Definition: io.F90:149
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
Definition: io.F90:387
subroutine, public messages_print_with_emphasis(msg, iunit, namespace)
Definition: messages.F90:918
character(len=512), private msg
Definition: messages.F90:162
subroutine, public push_sub(sub_name)
Definition: messages.F90:1046
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:530
subroutine, public messages_info(no_lines, iunit, verbose_limit, stress, all_nodes, namespace)
Definition: messages.F90:611
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:157
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:406
subroutine, public pop_sub(sub_name)
Definition: messages.F90:1101
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:710
logical function mpi_grp_is_root(grp)
Definition: mpi.F90:374
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:247
real(8) function profile_total_time_per_call(this)
Definition: profiling.F90:933
subroutine, public profiling_in(this, label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:557
subroutine, public profiling_output(namespace)
Write profiling results of each node to profiling.NNN/profiling.nnn The format of each line is tag-la...
Definition: profiling.F90:1059
subroutine profiling_count_tran_int(trf, type)
Definition: profiling.F90:817
subroutine profiling_count_tran_int_8_l(trf, type)
Definition: profiling.F90:734
subroutine, public profiling_end(namespace)
Definition: profiling.F90:409
real(8) function profile_min_time(this)
Definition: profiling.F90:944
type(profile_t), save, public c_profiling_complete_run
For the moment we will have the profiler objects here, but they should be moved to their respective m...
Definition: profiling.F90:246
integer, parameter max_memory_vars
Definition: profiling.F90:207
subroutine iprofiling_count_operations(ops)
Definition: profiling.F90:684
integer, parameter, public profiling_memory
Definition: profiling.F90:200
subroutine profiling_memory_log(type, var, file, line, size)
Definition: profiling.F90:1278
real(8) function profile_self_throughput(this)
Definition: profiling.F90:999
integer, parameter, public profiling_likwid
Definition: profiling.F90:200
real(8) function profile_self_time_per_call(this)
Definition: profiling.F90:955
integer, parameter, public profiling_memory_full
Definition: profiling.F90:200
real(8) function profile_self_time(this)
Definition: profiling.F90:922
subroutine profiling_count_tran_real_8(trf, type)
Definition: profiling.F90:857
subroutine, public profiling_out(this)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:617
subroutine profiling_count_tran_real_8_l(trf, type)
Definition: profiling.F90:761
subroutine profiling_count_tran_int_8(trf, type)
Definition: profiling.F90:830
subroutine rprofiling_count_operations(ops)
Definition: profiling.F90:696
type(profile_vars_t), target, public prof_vars
Definition: profiling.F90:240
integer, parameter, public profiling_io
Definition: profiling.F90:200
subroutine profiling_count_tran_complex_8_l(trf, type)
Definition: profiling.F90:789
real(8) function profile_self_bandwidth(this)
Definition: profiling.F90:1015
subroutine profile_init(this, label)
Initialize a profile object and add it to the list.
Definition: profiling.F90:486
subroutine, public profiling_init(namespace)
Create profiling subdirectory.
Definition: profiling.F90:253
subroutine profiling_count_tran_real_4_l(trf, type)
Definition: profiling.F90:747
subroutine dprofiling_count_operations(ops)
Definition: profiling.F90:708
integer, parameter max_profiles
Max. number of tags.
Definition: profiling.F90:141
subroutine profiling_count_tran_int_l(trf, type)
Definition: profiling.F90:721
subroutine profiling_count_tran_complex_4_l(trf, type)
Definition: profiling.F90:775
subroutine profiling_count_tran_real_4(trf, type)
Definition: profiling.F90:843
integer, parameter, public profiling_time
Definition: profiling.F90:200
integer function profile_num_calls(this)
Definition: profiling.F90:1031
subroutine profiling_count_tran_type(trf, type)
Definition: profiling.F90:899
subroutine, public profiling_memory_deallocate(var, file, line, size)
Definition: profiling.F90:1373
character(label_length) function profile_label(this)
Definition: profiling.F90:1042
subroutine, public profiling_memory_allocate(var, file, line, size_)
Definition: profiling.F90:1302
subroutine profiling_count_tran_type_l(trf, type)
Definition: profiling.F90:803
subroutine profiling_make_position_str(var, file, line, str)
Definition: profiling.F90:1243
subroutine profiling_count_tran_complex_8(trf, type)
Definition: profiling.F90:885
subroutine profiling_count_tran_complex_4(trf, type)
Definition: profiling.F90:871
real(r8) function profile_total_throughput(this)
Definition: profiling.F90:966
real(8) function profile_total_bandwidth(this)
Definition: profiling.F90:983
logical function, public profile_is_initialized(this)
Definition: profiling.F90:544
real(8) function profile_total_time(this)
Definition: profiling.F90:911
This module is intended to contain "only mathematical" functions and procedures.
Definition: sort.F90:109
integer pure function, public types_get_size(this)
Definition: types.F90:144
subroutine get_output_dir()
Definition: profiling.F90:392
recursive subroutine output_tree_level(profile, level, total_time, iunit)
Definition: profiling.F90:1211
int true(void)