Octopus
profiling.F90
Go to the documentation of this file.
1!! Copyright (C) 2005-2009 Heiko Appel, Florian Lorenzen, Xavier Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
49 !*/
50module profiling_oct_m
51 use debug_oct_m
52 use global_oct_m
53 use io_oct_m
54 use, intrinsic :: iso_fortran_env
55 use loct_oct_m
57 use mpi_oct_m
58 use parser_oct_m
60 use nvtx_oct_m
61 use sort_oct_m
62 use sphash_oct_m
63 use string_oct_m
64 use types_oct_m
66
67 implicit none
68 private
69
70 public :: &
80
81 integer, parameter :: &
82 LABEL_LENGTH = 40, & !< Max. number of characters of tag label.
83 max_profiles = 300
84
85 type profile_t
86 private
87 character(LABEL_LENGTH) :: label
88 real(real64) :: entry_time
89 real(real64) :: total_time
90 real(real64) :: min_time
91 real(real64) :: self_time
92 real(real64) :: op_count_current
93 real(real64) :: op_count
94 real(real64) :: op_count_child
95 real(real64) :: op_count_child_current
96 real(real64) :: tr_count_current
97 real(real64) :: tr_count
98 real(real64) :: tr_count_child
99 real(real64) :: tr_count_child_current
100 type(profile_t), pointer :: parent
101 integer :: count
102 logical :: initialized = .false.
103 logical :: active = .false.
104 logical :: exclude
105 integer :: index
106 logical :: has_child(MAX_PROFILES)
107 real(real64) :: timings(MAX_PROFILES)
108 end type profile_t
109
111 private
112 type(profile_t), pointer :: p
113 end type profile_pointer_t
114
116 module procedure &
124 module procedure &
132 end interface profiling_count_transfers
133
135 module procedure iprofiling_count_operations
136 module procedure rprofiling_count_operations
137 module procedure dprofiling_count_operations
138 end interface profiling_count_operations
139
140 integer, parameter, public :: &
141 PROFILING_TIME = 1, &
142 profiling_memory = 2, &
144 profiling_likwid = 8, &
145 profiling_io = 16
146
147 integer, parameter :: MAX_MEMORY_VARS = 25
148
150 private
151 integer, public :: mode
152
153 type(profile_pointer_t) :: current
154 type(profile_pointer_t) :: profile_list(MAX_PROFILES)
155 integer :: last_profile
157 integer(int64) :: alloc_count
158 integer(int64) :: dealloc_count
160 integer(int64) :: memory_limit = -1
161 integer(int64) :: total_memory
162 integer(int64) :: max_memory
163 character(len=256) :: max_memory_location
165 integer(int64) :: large_vars_size(max_memory_vars)
166 character(len=256) :: large_vars(max_memory_vars)
168 real(real64) :: start_time
169 integer :: mem_iunit
171 character(len=256) :: output_dir
172 character(len=6) :: file_number
174 logical :: all_nodes
176 logical :: output_yaml
177 logical :: output_tree
179
180 type(profile_vars_t), target, save, public :: prof_vars
181
182 type(sphash_t), save :: profiling_map
184contains
185
186 ! ---------------------------------------------------------
188 subroutine profiling_init(namespace)
189 type(namespace_t), intent(in) :: namespace
190
191 integer :: ii
192
193 push_sub(profiling_init)
194
195 ! FIXME: nothing is thread-safe here!
196
197 !%Variable ProfilingMode
198 !%Default no
199 !%Type integer
200 !%Section Execution::Optimization
201 !%Description
202 !% Use this variable to run <tt>Octopus</tt> in profiling mode. In this mode
203 !% <tt>Octopus</tt> records the time spent in certain areas of the code and
204 !% the number of times this code is executed. These numbers
205 !% are written in <tt>./profiling.NNN/profiling.nnn</tt> with <tt>nnn</tt> being the
206 !% node number (<tt>000</tt> in serial) and <tt>NNN</tt> the number of processors.
207 !% This is mainly for development purposes. Note, however, that
208 !% <tt>Octopus</tt> should be compiled with <tt>--disable-debug</tt> to do proper
209 !% profiling. Warning: you may encounter strange results with OpenMP.
210 !%Option no 0
211 !% No profiling information is generated.
212 !%Option prof_time 1
213 !% Profile the time spent in defined profiling regions.
214 !%Option prof_memory 2
215 !% As well as the time, summary information on memory usage and the largest arrays are reported.
216 !%Option prof_memory_full 4
217 !% As well as the time and summary memory information, a
218 !% log is reported of every allocation and deallocation.
219 !%Option likwid 8
220 !% Enable instrumentation using LIKWID.
221 !%Option prof_io 16
222 !% Count the number of file open and close.
223 !%End
224
225 call parse_variable(namespace, 'ProfilingMode', 0, prof_vars%mode)
226 if (.not. varinfo_valid_option('ProfilingMode', prof_vars%mode)) then
227 call messages_input_error(namespace, 'ProfilingMode')
228 end if
231 if (.not. in_profiling_mode) then
232 pop_sub(profiling_init)
233 return
234 end if
235
236 !%Variable ProfilingAllNodes
237 !%Default no
238 !%Type logical
239 !%Section Execution::Optimization
240 !%Description
241 !% This variable controls whether all nodes print the time
242 !% profiling output. If set to no, the default, only the root node
243 !% will write the profile. If set to yes, all nodes will print it.
244 !%End
246 call parse_variable(namespace, 'ProfilingAllNodes', .false., prof_vars%all_nodes)
247
249
251 prof_vars%mode = ior(prof_vars%mode, profiling_memory)
252 end if
253
254 ! initialize memory profiling
255 if (bitand(prof_vars%mode, profiling_memory) /= 0) then
256 prof_vars%alloc_count = 0
257 prof_vars%dealloc_count = 0
258
259 prof_vars%total_memory = 0
260 prof_vars%max_memory = 0
261 prof_vars%max_memory_location = ''
262 prof_vars%start_time = loct_clock()
263
264 prof_vars%large_vars_size(:) = 0
265 prof_vars%large_vars(:) = ''
266
267 !%Variable MemoryLimit
268 !%Default -1
269 !%Type integer
270 !%Section Execution::Optimization
271 !%Description
272 !% If positive, <tt>Octopus</tt> will stop if more memory than <tt>MemoryLimit</tt>
273 !% is requested (in kb). Note that this variable only works when
274 !% <tt>ProfilingMode = prof_memory(_full)</tt>.
275 !%End
276 call parse_variable(namespace, 'MemoryLimit', -1, ii)
277 prof_vars%memory_limit = int(ii, int64)*1024
278 end if
279
280 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
281 ! make sure output directory is available before other processes try to write there
282 call mpi_world%barrier()
283
284 prof_vars%mem_iunit = io_open(trim(prof_vars%output_dir)//'/memory.'//prof_vars%file_number, &
285 namespace, action='write')
286 write(prof_vars%mem_iunit, '(5a16,a70)') 'Elapsed Time', 'Alloc/Dealloc', 'Size (words)', 'Prof Mem', &
287 'Sys Mem', 'Variable Name(Filename:Line)'
288 end if
289
290 ! initialize time profiling
291 prof_vars%last_profile = 0
292 nullify(prof_vars%current%p)
293
294 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
295#ifdef HAVE_LIKWID
296 call likwid_markerinit()
297#endif
298 end if
299
300 !%Variable ProfilingOutputYAML
301 !%Default no
302 !%Type logical
303 !%Section Execution::Optimization
304 !%Description
305 !% This variable controls whether the profiling output is additionally
306 !% written to a YAML file.
307 !%End
308 call parse_variable(namespace, 'ProfilingOutputYAML', .false., prof_vars%output_yaml)
309
310 !%Variable ProfilingOutputTree
311 !%Default yes
312 !%Type logical
313 !%Section Execution::Optimization
314 !%Description
315 !% This variable controls whether the profiling output is additionally
316 !% written as a tree.
317 !%End
318 call parse_variable(namespace, 'ProfilingOutputTree', .true., prof_vars%output_tree)
319
321
322 call profiling_in('COMPLETE_RUN')
323
324 pop_sub(profiling_init)
325
326 contains
327
328 ! ---------------------------------------------------------
329 subroutine get_output_dir()
330
332
333 write(prof_vars%file_number, '(i6.6)') mpi_world%rank
334
335 prof_vars%output_dir = 'profiling'
336
337 if (mpi_world%is_root()) call io_mkdir(trim(prof_vars%output_dir), namespace)
338
340 end subroutine get_output_dir
341
342 end subroutine profiling_init
343
344
345 ! ---------------------------------------------------------
346 subroutine profiling_end(namespace)
347 type(namespace_t), intent(in) :: namespace
348 integer :: ii
349 real(real64), parameter :: megabyte = 1048576.0_real64
350 integer(int64) :: io_open_count, io_close_count
351 integer(int64) :: io_open_count_red, io_close_count_red
352
353 if (.not. in_profiling_mode) return
354 push_sub(profiling_end)
355
356 call profiling_out('COMPLETE_RUN')
357 call profiling_output(namespace)
358
359 do ii = 1, prof_vars%last_profile
360 prof_vars%profile_list(ii)%p%initialized = .false.
361 end do
362
364
365 if (bitand(prof_vars%mode, profiling_memory) /= 0) then
366 call messages_print_with_emphasis(msg="Memory profiling information", namespace=namespace)
367 write(message(1), '(a,i10)') 'Number of allocations = ', prof_vars%alloc_count
368 write(message(2), '(a,i10)') 'Number of deallocations = ', prof_vars%dealloc_count
369 write(message(3), '(a,f18.3,a)') 'Maximum total memory allocated = ', prof_vars%max_memory/megabyte, ' Mbytes'
370 write(message(4), '(2x,a,a)') 'at ', trim(prof_vars%max_memory_location)
371 call messages_info(4)
372
373 message(1) = ''
374 message(2) = 'Largest variables allocated:'
375 call messages_info(2)
376 do ii = 1, max_memory_vars
377 write(message(1),'(i2,f18.3,2a)') ii, prof_vars%large_vars_size(ii)/megabyte, ' Mbytes ', trim(prof_vars%large_vars(ii))
378 call messages_info(1)
379 end do
380
381 call messages_print_with_emphasis(namespace=namespace)
382
383 if (prof_vars%alloc_count /= prof_vars%dealloc_count) then
384 write(message(1),'(a,i10,a,i10,a)') "Not all memory was deallocated: ", prof_vars%alloc_count, &
385 ' allocations and ', prof_vars%dealloc_count, ' deallocations'
386 call messages_warning(1, all_nodes = .true.)
387 end if
388 if (prof_vars%total_memory > 0) then
389 write(message(1),'(a,f18.3,a,f18.3,a)') "Remaining allocated memory: ", prof_vars%total_memory/megabyte, &
390 ' Mbytes (out of maximum ', prof_vars%max_memory/megabyte, ' Mbytes)'
391 call messages_warning(1, all_nodes = .true.)
392 end if
393 end if
394
395 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
396 call io_close(prof_vars%mem_iunit)
397 end if
398
399 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
400#ifdef HAVE_LIKWID
401 call likwid_markerclose()
402#endif
403 end if
404
405 if (bitand(prof_vars%mode, profiling_io) /= 0) then
406 call messages_print_with_emphasis(msg="IO profiling information", namespace=namespace)
409 write(message(1), '(a,i10)') 'Number of file open = ', io_open_count
410 write(message(2), '(a,i10)') 'Number of file close = ', io_close_count
411 call mpi_world%allreduce(io_open_count, io_open_count_red, 1, mpi_integer8, mpi_sum)
412 call mpi_world%allreduce(io_close_count, io_close_count_red, 1, mpi_integer8, mpi_sum)
413 write(message(3), '(a,i10)') 'Global number of file open = ', io_open_count_red
414 write(message(4), '(a,i10)') 'Global number of file close = ', io_close_count_red
415 call messages_info(4)
416 call messages_print_with_emphasis(namespace=namespace)
417 end if
418
419 pop_sub(profiling_end)
420 end subroutine profiling_end
421
422
423 ! ---------------------------------------------------------
425 subroutine profile_init(this, label)
426 type(profile_t), target, intent(out) :: this
427 character(*), intent(in) :: label
428
429 integer :: iprofile
430
431 push_sub(profile_init)
432
433 if(len(label) > label_length) then
434 message(1) = "Label " // trim(label) // " is too long for the internal profiler"
435 call messages_fatal(1)
436 end if
437
438 this%label = label
439 this%total_time = m_zero
440 this%min_time = m_huge
441 this%self_time = m_zero
442 this%entry_time = huge(this%entry_time)
443 this%count = 0
444 this%op_count_current = m_zero
445 this%op_count = m_zero
446 this%op_count_child = m_zero
447 this%tr_count_current = m_zero
448 this%tr_count = m_zero
449 this%tr_count_child = m_zero
450 this%active = .false.
451 nullify(this%parent)
452 this%has_child = .false.
453 this%timings = m_zero
454 this%index = 0
455
456 if (.not. in_profiling_mode) then
457 pop_sub(profile_init)
458 return
459 end if
460
461 prof_vars%last_profile = prof_vars%last_profile + 1
462
463 assert(prof_vars%last_profile <= max_profiles)
464
465 prof_vars%profile_list(prof_vars%last_profile)%p => this
466 this%index = prof_vars%last_profile
467 this%initialized = .true.
468
469 ! print out a warning if a name is used more than once
470 do iprofile = 1, prof_vars%last_profile - 1
471 if (prof_vars%profile_list(iprofile)%p%label == this%label) then
472 message(1) = "Label "//label//" used more than once."
473 call messages_fatal(1)
474 exit
475 end if
476 end do
477
478 pop_sub(profile_init)
479 end subroutine profile_init
480
481
482 ! ---------------------------------------------------------
485 subroutine profiling_in(label, exclude)
486 character(*), intent(in) :: label
487 logical, optional, intent(in) :: exclude
489
490 real(real64) :: now
491 class(*), pointer :: profile_p
492 type(profile_t), allocatable :: profile
493 logical :: found
494 character(len=len(label)) :: label_
495
496 if (.not. in_profiling_mode) return
497 if (.not. not_in_openmp()) return
498
499 ! no PUSH_SUB, called too often
500
501 label_ = to_upper(trim(label))
502 profile_p => sphash_lookup(profiling_map, trim(label_), found)
503 if (.not. found) then
504 allocate(profile)
505 call sphash_insert(profiling_map, trim(label_), profile, clone=.true.)
506 deallocate(profile)
507 profile_p => sphash_lookup(profiling_map, trim(label_), found)
508 end if
509 select type(this => profile_p)
510 type is (profile_t)
511 if (.not. this%initialized) then
512 call profile_init(this, label_)
513 end if
514
515 if (this%active) then
516 message(1) = " The region with label "//trim(label)//" is already active."
517 message(2) = " This is likely caused by a missing or incorrect profiling_out call."
518 call messages_fatal(2)
519 end if
520 assert(.not. this%active)
521
522 this%active = .true.
523 now = mpi_get_wtime()
524
525 if (associated(prof_vars%current%p)) then
526 !keep a pointer to the parent
527 this%parent => prof_vars%current%p
528 this%parent%has_child(this%index) = .true.
529 else
530 !we are orphans
531 nullify(this%parent)
532 end if
533
534 this%op_count_current = m_zero
535 this%tr_count_current = m_zero
536 this%op_count_child_current = m_zero
537 this%tr_count_child_current = m_zero
538
539 prof_vars%current%p => this
540 this%entry_time = now
541
542 this%exclude = optional_default(exclude, .false.)
543
544#if defined(HAVE_NVTX) || (defined(HAVE_HIP) && defined(__HIP_PLATFORM_AMD__))
545 call nvtx_range_push(string_f_to_c(trim(label_)), this%index)
546#endif
547 end select
548
549 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
550#ifdef HAVE_LIKWID
551 call likwid_markerstartregion(trim(label_))
552#endif
553 end if
554
555 end subroutine profiling_in
556
557
558 ! ---------------------------------------------------------
562 subroutine profiling_out(label)
563 character(*), intent(in) :: label
564
565 real(real64) :: now, time_spent
566 class(*), pointer :: profile_p
567 logical :: found
568
569 if (.not. in_profiling_mode) return
570 if (.not. not_in_openmp()) return
571
572 ! no PUSH_SUB, called too often
573 profile_p => sphash_lookup(profiling_map, to_upper(trim(label)), found)
574
575 if (.not. found) then
576 message(1) = " No profiling region with label "//trim(label)//" found."
577 message(2) = " This is likely caused by a missing or incorrect profiling_in call."
578 call messages_fatal(2)
579 end if
580 assert(found)
581
582 select type(this => profile_p)
583 type is (profile_t)
584 assert(this%initialized)
585 assert(this%active)
586 this%active = .false.
587 now = mpi_get_wtime()
588
589 time_spent = now - this%entry_time
590 this%total_time = this%total_time + time_spent
591 this%self_time = this%self_time + time_spent
592 this%count = this%count + 1
593 if (time_spent < this%min_time) then
594 this%min_time = time_spent
595 end if
596
597 this%op_count = this%op_count + this%op_count_current
598 this%tr_count = this%tr_count + this%tr_count_current
599 this%op_count_child = this%op_count_child + this%op_count_child_current
600 this%tr_count_child = this%tr_count_child + this%tr_count_child_current
601
602 if (associated(this%parent)) then
603 !remove the spent from the self time of our parent
604 this%parent%self_time = this%parent%self_time - time_spent
605 if (this%exclude) this%parent%total_time = this%parent%total_time - time_spent
606
607 ! add the operations to the parent
608 this%parent%op_count_child_current = this%parent%op_count_child_current &
609 + this%op_count_current + this%op_count_child_current
610 this%parent%tr_count_child_current = this%parent%tr_count_child_current &
611 + this%tr_count_current + this%tr_count_child_current
612
613 this%parent%timings(this%index) = this%parent%timings(this%index) + time_spent
614
615 !and set parent as current
616 prof_vars%current%p => this%parent
617
618 else
619 nullify(prof_vars%current%p)
620 end if
621
622 if (bitand(prof_vars%mode, profiling_likwid) /= 0) then
623#ifdef HAVE_LIKWID
624 call likwid_markerstopregion(trim(this%label))
625#endif
626 end if
627 end select
628
629#if defined(HAVE_NVTX) || (defined(HAVE_HIP) && defined(__HIP_PLATFORM_AMD__))
631#endif
632
633 end subroutine profiling_out
634
635
636 ! ---------------------------------------------------------
637
638 subroutine iprofiling_count_operations(ops)
639 integer, intent(in) :: ops
640
641 if (.not. in_profiling_mode) return
642 ! no PUSH_SUB, called too often
643
644 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + real(ops, real64)
645 end subroutine iprofiling_count_operations
646
647
648 ! ---------------------------------------------------------
649
650 subroutine rprofiling_count_operations(ops)
651 real(4), intent(in) :: ops
652
653 if (.not. in_profiling_mode) return
654 ! no PUSH_SUB, called too often
655
656 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + real(ops, real64)
657 end subroutine rprofiling_count_operations
658
659
660 ! ---------------------------------------------------------
661
662 subroutine dprofiling_count_operations(ops)
663 real(real64), intent(in) :: ops
664
665 if (.not. in_profiling_mode) return
666 ! no PUSH_SUB, called too often
667
668 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + ops
669
670 end subroutine dprofiling_count_operations
671
672
673 ! ---------------------------------------------------------
674
675 subroutine profiling_count_tran_int_l(trf, type)
676 integer(int64), intent(in) :: trf
677 integer, intent(in) :: type
678
679 if (.not. in_profiling_mode) return
680 ! no PUSH_SUB, called too often
681
682 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*real(trf, real64)
683 end subroutine profiling_count_tran_int_l
684
685
686 ! ---------------------------------------------------------
687
688 subroutine profiling_count_tran_int_8_l(trf, type)
689 integer(int64), intent(in) :: trf
690 integer(int64), intent(in) :: type
691
692 if (.not. in_profiling_mode) return
693 ! no PUSH_SUB, called too often
694
695 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*real(trf, real64)
696 end subroutine profiling_count_tran_int_8_l
697
698
699 ! ---------------------------------------------------------
700
701 subroutine profiling_count_tran_real_4_l(trf, type)
702 integer(int64), intent(in) :: trf
703 real(4), intent(in) :: type
704
705 if (.not. in_profiling_mode) return
706 ! no PUSH_SUB, called too often
707
708 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*real(trf, real64)
709
710 end subroutine profiling_count_tran_real_4_l
711
712
713 ! ---------------------------------------------------------
714
715 subroutine profiling_count_tran_real_8_l(trf, type)
716 integer(int64), intent(in) :: trf
717 real(real64), intent(in) :: type
719 if (.not. in_profiling_mode) return
720 ! no PUSH_SUB, called too often
721
722 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*real(trf, real64)
723
724 end subroutine profiling_count_tran_real_8_l
725
726
727 ! ---------------------------------------------------------
728
729 subroutine profiling_count_tran_complex_4_l(trf, type)
730 integer(int64), intent(in) :: trf
731 complex(4), intent(in) :: type
732
733 if (.not. in_profiling_mode) return
734 ! no PUSH_SUB, called too often
735
736 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*real(trf, real64)
737
739
740
741 ! ---------------------------------------------------------
742
744 integer(int64), intent(in) :: trf
745 complex(real64), intent(in) :: type
746
747 if (.not. in_profiling_mode) return
748 ! no PUSH_SUB, called too often
749
750 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 16*real(trf, real64)
751
753
754
755 ! ---------------------------------------------------------
757 subroutine profiling_count_tran_type_l(trf, type)
758 integer(int64), intent(in) :: trf
759 type(type_t), intent(in) :: type
760
761 if (.not. in_profiling_mode) return
762 ! no PUSH_SUB, called too often
763
764 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + real(trf, real64) *types_get_size(type)
765
766 end subroutine profiling_count_tran_type_l
767
768
769 ! ---------------------------------------------------------
770
771 subroutine profiling_count_tran_int(trf, type)
772 integer, intent(in) :: trf
773 integer, intent(in) :: type
774
775 if (.not. in_profiling_mode) return
776 ! no PUSH_SUB, called too often
777
778 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*real(trf, real64)
779 end subroutine profiling_count_tran_int
780
781
782 ! ---------------------------------------------------------
784 subroutine profiling_count_tran_int_8(trf, type)
785 integer, intent(in) :: trf
786 integer(int64), intent(in) :: type
787
788 if (.not. in_profiling_mode) return
789 ! no PUSH_SUB, called too often
790
791 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*real(trf, real64)
792 end subroutine profiling_count_tran_int_8
793
794
795 ! ---------------------------------------------------------
796
797 subroutine profiling_count_tran_real_4(trf, type)
798 integer, intent(in) :: trf
799 real(4), intent(in) :: type
800
801 if (.not. in_profiling_mode) return
802 ! no PUSH_SUB, called too often
803
804 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*real(trf, real64)
805
806 end subroutine profiling_count_tran_real_4
807
808
809 ! ---------------------------------------------------------
810
811 subroutine profiling_count_tran_real_8(trf, type)
812 integer, intent(in) :: trf
813 real(real64), intent(in) :: type
814
815 if (.not. in_profiling_mode) return
816 ! no PUSH_SUB, called too often
817
818 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*real(trf, real64)
819
820 end subroutine profiling_count_tran_real_8
821
822
823 ! ---------------------------------------------------------
824
825 subroutine profiling_count_tran_complex_4(trf, type)
826 integer, intent(in) :: trf
827 complex(4), intent(in) :: type
828
829 if (.not. in_profiling_mode) return
830 ! no PUSH_SUB, called too often
831
832 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*real(trf, real64)
833
834 end subroutine profiling_count_tran_complex_4
835
836
837 ! ---------------------------------------------------------
838
839 subroutine profiling_count_tran_complex_8(trf, type)
840 integer, intent(in) :: trf
841 complex(real64), intent(in) :: type
842
843 if (.not. in_profiling_mode) return
844 ! no PUSH_SUB, called too often
845
846 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 16*real(trf, real64)
847
848 end subroutine profiling_count_tran_complex_8
849
850
851 ! ---------------------------------------------------------
853 subroutine profiling_count_tran_type(trf, type)
854 integer, intent(in) :: trf
855 type(type_t), intent(in) :: type
856
857 if (.not. in_profiling_mode) return
858 ! no PUSH_SUB, called too often
859
860 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + real(trf, real64) *types_get_size(type)
861
862 end subroutine profiling_count_tran_type
863
864 ! ---------------------------------------------------------
865 real(real64) function profile_total_time(this)
866 type(profile_t), intent(in) :: this
867
868 push_sub(profile_total_time)
869 profile_total_time = this%total_time
870
871 pop_sub(profile_total_time)
872 end function profile_total_time
873
874
875 ! ---------------------------------------------------------
876 real(real64) function profile_self_time(this)
877 type(profile_t), intent(in) :: this
878
880 profile_self_time = this%self_time
881
882 pop_sub(profile_self_time)
883 end function profile_self_time
884
885
886 ! ---------------------------------------------------------
887 real(real64) function profile_total_time_per_call(this)
888 type(profile_t), intent(in) :: this
889
891 profile_total_time_per_call = this%total_time / real(this%count, real64)
892
894 end function profile_total_time_per_call
895
896
897 ! ---------------------------------------------------------
898 real(real64) function profile_min_time(this)
899 type(profile_t), intent(in) :: this
900
901 push_sub(profile_self_time)
902 profile_min_time = this%min_time
903
904 pop_sub(profile_self_time)
905 end function profile_min_time
906
908 ! ---------------------------------------------------------
909 real(real64) function profile_self_time_per_call(this)
910 type(profile_t), intent(in) :: this
911
913 profile_self_time_per_call = this%self_time / real(this%count, real64)
914
916 end function profile_self_time_per_call
917
918
919 ! ---------------------------------------------------------
920 real(real64) function profile_total_throughput(this)
921 type(profile_t), intent(in) :: this
922
923 push_sub(profile_throughput)
924
925 if (this%total_time > epsilon(this%total_time)) then
926 profile_total_throughput = (this%op_count + this%op_count_child)/this%total_time*1.0e-6_real64
927 else
928 profile_total_throughput = 0.0_real64
929 end if
930
931 pop_sub(profile_throughput)
932 end function profile_total_throughput
934
935 ! ---------------------------------------------------------
936
937 real(real64) function profile_total_bandwidth(this)
938 type(profile_t), intent(in) :: this
939
940 push_sub(profile_bandwidth)
941
942 if (this%total_time > epsilon(this%total_time)) then
943 profile_total_bandwidth = (this%tr_count + this%tr_count_child)/(this%total_time*1024.0_real64**2)
944 else
945 profile_total_bandwidth = 0.0_real64
946 end if
947
948 pop_sub(profile_bandwidth)
949 end function profile_total_bandwidth
950
951 ! ---------------------------------------------------------
952
953 real(real64) function profile_self_throughput(this)
954 type(profile_t), intent(in) :: this
956 push_sub(profile_throughput)
957
958 if (this%self_time > epsilon(this%self_time)) then
959 profile_self_throughput = this%op_count/this%self_time*1.0e-6_real64
960 else
961 profile_self_throughput = 0.0_real64
962 end if
963
964 pop_sub(profile_throughput)
965 end function profile_self_throughput
967 ! ---------------------------------------------------------
968
969 real(real64) function profile_self_bandwidth(this)
970 type(profile_t), intent(in) :: this
971
972 push_sub(profile_bandwidth)
973
974 if (this%self_time > epsilon(this%self_time)) then
975 profile_self_bandwidth = this%tr_count/(this%self_time*1024.0_real64**2)
976 else
978 end if
979
980 pop_sub(profile_bandwidth)
981 end function profile_self_bandwidth
982
983
984 ! ---------------------------------------------------------
985 integer function profile_num_calls(this)
986 type(profile_t), intent(in) :: this
987
989 profile_num_calls = this%count
990
991 pop_sub(profile_num_calls)
992 end function profile_num_calls
993
994
995 ! ---------------------------------------------------------
996 character(LABEL_LENGTH) function profile_label(this)
997 type(profile_t), intent(in) :: this
998
999 push_sub(profile_label)
1000 profile_label = this%label
1001
1002 pop_sub(profile_label)
1003 end function profile_label
1004
1006 ! ---------------------------------------------------------
1013 subroutine profiling_output(namespace)
1014 type(namespace_t), intent(in) :: namespace
1015
1016 integer :: ii
1017 integer :: iunit
1018 real(real64) :: total_time
1019 type(profile_t), pointer :: prof
1020 character(len=256) :: filename
1021 real(real64), allocatable :: selftime(:)
1022 integer, allocatable :: position(:)
1023 class(*), pointer :: profile_p
1024 logical :: found
1025
1026 if (.not. in_profiling_mode) return
1027 push_sub(profiling_output)
1028
1029 call mpi_world%barrier()
1030
1031 if (.not. prof_vars%all_nodes .and. .not. mpi_world%is_root()) then
1032 pop_sub(profiling_output)
1033 return
1034 end if
1035
1036 filename = trim(prof_vars%output_dir)//'/time.'//prof_vars%file_number
1037 iunit = io_open(trim(filename), namespace, action='write', die=.false.)
1038 if (iunit == -1) then
1039 message(1) = 'Failed to open file ' // trim(filename) // ' to write profiling results.'
1040 call messages_warning(1)
1041 pop_sub(profiling_output)
1042 return
1043 end if
1044
1045 write(iunit, '(2a)') &
1046 ' CUMULATIVE TIME ', &
1047 ' | SELF TIME'
1048 write(iunit, '(2a)') &
1049 ' ----------------------------------------------------------', &
1050 '----------------|-------------------------------------------------------------'
1051 write(iunit, '(2a)') &
1052 'TAG NUM_CALLS TOTAL_TIME TIME_PER_CALL MIN_TIME ', &
1053 ' MFLOPS MBYTES/S %TIME | TOTAL_TIME TIME_PER_CALL MFLOPS MBYTES/S %TIME'
1054 write(iunit, '(2a)') &
1055 '===================================================================================================', &
1056 '=================|============================================================='
1057
1058 profile_p => sphash_lookup(profiling_map, "COMPLETE_RUN", found)
1059 assert(found)
1060 select type(complete_run => profile_p)
1061 type is (profile_t)
1062 total_time = profile_total_time(complete_run)
1063 end select
1065 safe_allocate(selftime(1:prof_vars%last_profile))
1066 safe_allocate(position(1:prof_vars%last_profile))
1067
1068 do ii = 1, prof_vars%last_profile
1069 selftime(ii) = -profile_self_time(prof_vars%profile_list(ii)%p)
1070 position(ii) = ii
1071 end do
1072
1073 call sort(selftime, position)
1074
1075 do ii = 1, prof_vars%last_profile
1076 prof => prof_vars%profile_list(position(ii))%p
1077 if (.not. prof%initialized) then
1078 write(message(1),'(a,i6,a)') "Internal error: Profile number ", position(ii), " is not initialized."
1079 call messages_fatal(1)
1080 end if
1081 if (prof%active) then
1082 write(message(1),'(a)') "Internal error: Profile '" // trim(profile_label(prof)) // &
1083 "' is active, i.e. profiling_out was not called."
1084 call messages_warning(1)
1085 end if
1086
1087 if (profile_num_calls(prof) == 0) cycle
1088
1089 write(iunit, '(a,i14,3f16.6,2f10.1,f8.1,a,2f16.6,2f10.1,f8.1)') &
1090 profile_label(prof), &
1091 profile_num_calls(prof), &
1092 profile_total_time(prof), &
1094 profile_min_time(prof), &
1097 profile_total_time(prof)/total_time*100.0_real64, &
1098 ' | ', &
1099 profile_self_time(prof), &
1102 profile_self_bandwidth(prof), &
1103 profile_self_time(prof)/total_time*100.0_real64
1104 end do
1105
1106 call io_close(iunit)
1107
1108 if (prof_vars%output_yaml) then
1109 filename = trim(prof_vars%output_dir)//'/time.'//prof_vars%file_number//'.yaml'
1110 iunit = io_open(trim(filename), namespace, action='write', die=.false.)
1111 if (iunit == -1) then
1112 message(1) = 'Failed to open file ' // trim(filename) // ' to write profiling results.'
1113 call messages_warning(1)
1114 pop_sub(profiling_output)
1115 return
1116 end if
1117 write(iunit, '(2a)') 'schema: [num_calls, total_time, total_throughput, ', &
1118 'total_bandwidth, self_time, self_throughput, self_bandwidth]'
1119 write(iunit, '(a)') 'data:'
1120
1121 do ii = 1, prof_vars%last_profile
1122 prof => prof_vars%profile_list(position(ii))%p
1123 if (profile_num_calls(prof) == 0) cycle
1124 write(iunit, '(a,a,a,i6,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a)') &
1125 ' ', profile_label(prof), ': [', &
1126 profile_num_calls(prof), ', ', &
1127 profile_total_time(prof), ', ', &
1128 profile_total_throughput(prof), ', ', &
1129 profile_total_bandwidth(prof), ', ', &
1130 profile_self_time(prof), ', ', &
1131 profile_self_throughput(prof), ', ', &
1132 profile_self_bandwidth(prof), ']'
1133 end do
1134
1135 call io_close(iunit)
1136 end if
1137
1138 safe_deallocate_a(selftime)
1139 safe_deallocate_a(position)
1140
1141 if (prof_vars%output_tree) then
1142 filename = trim(prof_vars%output_dir)//'/time.'//prof_vars%file_number//'.tree'
1143 iunit = io_open(trim(filename), namespace, action='write', die=.false.)
1144 if (iunit == -1) then
1145 message(1) = 'Failed to open file ' // trim(filename) // ' to write profiling results.'
1146 call messages_warning(1)
1147 pop_sub(profiling_output)
1148 return
1149 end if
1150 write(iunit, '(a40,a11,a11,a12)') &
1151 "Tree level, region ", &
1152 "% of total ", "% of parent", &
1153 " Full time"
1154
1155 select type(complete_run => profile_p)
1156 type is (profile_t)
1157 ! output of top-level node
1158 write(iunit, '(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1159 repeat('-', 0) // '| ', &
1160 profile_label(complete_run), &
1161 repeat(' ', 15-0-2), &
1162 100.0, "% ", &
1163 100.0, "% ", &
1164 total_time
1165 call output_tree_level(complete_run, 1, total_time, iunit)
1166 end select
1167 write(iunit, '(a)') "// modeline for vim to enable folding (put in ~/.vimrc: set modeline modelineexpr)"
1168 write(iunit, '(a)') "// vim: fdm=expr fde=getline(v\:lnum)=~'.*\|.*'?len(split(getline(v\:lnum))[0])-1\:0"
1169 call io_close(iunit)
1170 end if
1171
1172 pop_sub(profiling_output)
1173 contains
1174 ! Traverse the tree depth-first, pre-order
1175 recursive subroutine output_tree_level(profile, level, total_time, iunit)
1176 type(profile_t), intent(in) :: profile
1177 integer, intent(in) :: level
1178 real(real64), intent(in) :: total_time
1179 integer, intent(in) :: iunit
1180
1181 integer :: ichild, width
1182
1184 width = 15
1185 ! loop over children
1186 do ichild = 1, max_profiles
1187 if (profile%has_child(ichild)) then
1188 ! print out information on current child with the first marker
1189 ! placed according to the level of the tree
1190 write(iunit, '(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1191 repeat('-', level) // '| ', &
1192 profile_label(prof_vars%profile_list(ichild)%p), &
1193 repeat(' ', width-level-2), &
1194 profile%timings(ichild)/total_time * 100, "% ", &
1195 profile%timings(ichild)/profile%total_time * 100, "% ", &
1196 profile%timings(ichild)
1197 call output_tree_level(prof_vars%profile_list(ichild)%p, &
1198 level+1, total_time, iunit)
1199 end if
1200 end do
1202 end subroutine output_tree_level
1203 end subroutine profiling_output
1204
1205
1206 ! ---------------------------------------------------------
1207 subroutine profiling_make_position_str(var, file, line, str)
1208 character(len=*), intent(in) :: var
1209 character(len=*), intent(in) :: file
1210 integer, intent(in) :: line
1211 character(len=*), intent(out) :: str
1212
1213 integer :: ii, jj, nn
1214
1215 ! no push_sub, called too many times
1216
1217 jj = len(var)
1218 if (var(jj:jj) == ')') then
1219 nn = 1
1220 do ii = len(var)-1, 1, -1
1221 jj = ii - 1
1222 if (var(ii:ii) == ')') nn = nn + 1
1223 if (var(ii:ii) == '(') nn = nn - 1
1224 if (nn == 0) exit
1225 end do
1226 if (jj == 0) then
1227 message(1) = "Internal Error in profiling_memory_log"
1228 call messages_fatal(1)
1229 end if
1230 end if
1231 ii = index(file, '/', back=.true.)+1
1232
1233 write(str, '(4a,i5,a)') var(1:jj), "(", trim(file(ii:len(file))), ":", line, ")"
1234 call compact(str)
1235
1236 end subroutine profiling_make_position_str
1237
1238
1239 ! ---------------------------------------------------------
1240 subroutine profiling_memory_log(type, var, file, line, size)
1241 character(len=*), intent(in) :: type
1242 character(len=*), intent(in) :: var
1243 character(len=*), intent(in) :: file
1244 integer, intent(in) :: line
1245 integer(int64), intent(in) :: size
1246
1247 character(len=256) :: str
1248 integer(int64) :: mem
1249
1250 ! no push_sub, called too many times
1251
1252 call profiling_make_position_str(var, file, line, str)
1253
1254 ! get number of pages
1255 mem = loct_get_memory_usage()
1256
1257 write(prof_vars%mem_iunit, '(f16.6,a16,3i16,a70)') loct_clock() - prof_vars%start_time, &
1258 trim(type), size, prof_vars%total_memory, mem, trim(str)
1259
1260 end subroutine profiling_memory_log
1261
1262
1263 !-----------------------------------------------------
1264 subroutine profiling_memory_allocate(var, file, line, size_)
1265 character(len=*), intent(in) :: var
1266 character(len=*), intent(in) :: file
1267 integer, intent(in) :: line
1268 integer(int64), intent(in) :: size_
1269
1270 integer :: ii, jj
1271 integer(int64) :: size
1272 character(len=256) :: str
1273
1274 ! no push_sub, called too many times
1276 size = size_ ! make a copy that we can change
1277
1278 prof_vars%alloc_count = prof_vars%alloc_count + 1
1279 prof_vars%total_memory = prof_vars%total_memory + size
1280
1281 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
1282 call profiling_memory_log('A ', var, file, line, size)
1283 end if
1284
1285 if (prof_vars%memory_limit > 0) then
1286 if (prof_vars%total_memory > prof_vars%memory_limit) then
1287 message(1) = "Memory limit set in the input file was passed"
1288 call messages_fatal(1)
1289 end if
1290 end if
1291
1292 if (prof_vars%total_memory > prof_vars%max_memory) then
1293 prof_vars%max_memory = prof_vars%total_memory
1294 call profiling_make_position_str(var, file, line, prof_vars%max_memory_location)
1295 end if
1296
1297 call profiling_make_position_str(var, file, line, str)
1298
1299 ! check if variable is already in stack
1300 do ii = 1, max_memory_vars
1301 if (str == prof_vars%large_vars(ii)) then
1302 if (size > prof_vars%large_vars_size(ii)) then
1303 ! delete variable by moving stack up
1304 do jj = ii, max_memory_vars - 1
1305 prof_vars%large_vars(jj) = prof_vars%large_vars(jj + 1)
1306 prof_vars%large_vars_size(jj) = prof_vars%large_vars_size(jj + 1)
1307 end do
1309 prof_vars%large_vars_size(max_memory_vars) = 0
1310 else
1311 ! do not consider this variable any longer
1312 size = -1
1313 end if
1314 exit
1315 end if
1316 end do
1317
1318 do ii = 1, max_memory_vars
1319 if (size > prof_vars%large_vars_size(ii)) then
1320 ! move the stack one position down
1321 do jj = max_memory_vars, ii + 1, -1
1322 prof_vars%large_vars(jj) = prof_vars%large_vars(jj - 1)
1323 prof_vars%large_vars_size(jj) = prof_vars%large_vars_size(jj - 1)
1324 end do
1325 prof_vars%large_vars_size(ii) = size
1326 prof_vars%large_vars(ii) = str
1327 exit
1328 end if
1329 end do
1330
1331 end subroutine profiling_memory_allocate
1333
1334 !-----------------------------------------------------
1335 subroutine profiling_memory_deallocate(var, file, line, size)
1336 character(len=*), intent(in) :: var
1337 character(len=*), intent(in) :: file
1338 integer, intent(in) :: line
1339 integer(int64), intent(in) :: size
1340
1341 ! no push_sub, called too many times
1342
1343 prof_vars%dealloc_count = prof_vars%dealloc_count + 1
1344 prof_vars%total_memory = prof_vars%total_memory - size
1345
1346 if (bitand(prof_vars%mode, profiling_memory_full) /= 0) then
1347 call profiling_memory_log('D ', var, file, line, -size)
1348 end if
1349
1350 end subroutine profiling_memory_deallocate
1351
1352
1353 ! string handling functions from fortran stdlib, licensed unter MIT,
1354 ! available at https://github.com/fortran-lang/stdlib
1355 ! Returns the corresponding uppercase letter, if `c_in` is a lowercase
1356 ! ASCII character, otherwise `c` itself.
1357 pure function char_to_upper(c_in) result(c_out)
1358 character(len=1), intent(in) :: c_in
1359 character(len=1) :: c_out
1360
1361 integer, parameter :: difference=iachar('a')-iachar('A'), la=iachar('a'), lz=iachar('z')
1362 integer :: ascii_decimal
1363
1364 ! no push_sub/pop_sub, called too often
1365 ascii_decimal = iachar(c_in)
1366 if (ascii_decimal >= la .and. ascii_decimal <= lz) then
1367 ascii_decimal = ascii_decimal - difference
1368 end if
1369 c_out = achar(ascii_decimal)
1370 end function char_to_upper
1371
1372 ! Convert string to upper case
1373 pure function to_upper(string) result(upper_string)
1374 character(len=*), intent(in) :: string
1375 character(len=len(string)) :: upper_string
1376
1377 integer :: i
1378
1379 ! no push_sub/pop_sub, called too often
1380 do i = 1, len(string)
1381 upper_string(i:i) = char_to_upper(string(i:i))
1382 end do
1383 end function to_upper
1384end module profiling_oct_m
1385
1386!! Local Variables:
1387!! mode: f90
1388!! coding: utf-8
1389!! End:
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
real(real64), parameter, public m_huge
Definition: global.F90:209
real(real64), parameter, public m_zero
Definition: global.F90:191
logical pure function, public not_in_openmp()
Definition: global.F90:493
logical, public in_profiling_mode
Same for profiling mode.
Definition: global.F90:257
Definition: io.F90:116
integer(int64), save io_open_count
Definition: io.F90:155
subroutine, public io_close(iunit, grp)
Definition: io.F90:467
integer(int64) pure function, public io_get_close_count()
Definition: io.F90:668
integer(int64), save io_close_count
Definition: io.F90:156
subroutine, public io_mkdir(fname, namespace, parents)
Definition: io.F90:361
integer(int64) pure function, public io_get_open_count()
Definition: io.F90:661
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
Definition: io.F90:402
System information (time, memory, sysname)
Definition: loct.F90:117
character(kind=c_char, len=1) function, dimension(len_trim(f_string)+1), private string_f_to_c(f_string)
convert a Fortran string to a C string
Definition: loct.F90:240
subroutine, public messages_print_with_emphasis(msg, iunit, namespace)
Definition: messages.F90:898
character(len=512), private msg
Definition: messages.F90:167
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:525
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:410
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:691
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:594
real(real64) function, public mpi_get_wtime()
. Returns an elapsed time on the calling processor.
Definition: mpi.F90:548
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:272
subroutine profiling_count_tran_int(trf, type)
Definition: profiling.F90:840
subroutine profiling_count_tran_int_8_l(trf, type)
Definition: profiling.F90:757
subroutine, public profiling_end(namespace)
Definition: profiling.F90:415
type(profile_vars_t), target, save, public prof_vars
Definition: profiling.F90:248
real(real64) function profile_self_bandwidth(this)
Definition: profiling.F90:1038
real(real64) function profile_self_time(this)
Definition: profiling.F90:945
pure character(len=1) function char_to_upper(c_in)
Definition: profiling.F90:1426
integer, parameter max_memory_vars
Definition: profiling.F90:215
subroutine iprofiling_count_operations(ops)
Definition: profiling.F90:707
integer, parameter, public profiling_memory
Definition: profiling.F90:208
subroutine profiling_memory_log(type, var, file, line, size)
Definition: profiling.F90:1309
integer, parameter, public profiling_likwid
Definition: profiling.F90:208
subroutine profiling_output(namespace)
Write profiling results of each node to profiling.NNN/profiling.nnn The format of each line is tag-la...
Definition: profiling.F90:1082
integer, parameter, public profiling_memory_full
Definition: profiling.F90:208
subroutine profiling_count_tran_real_8(trf, type)
Definition: profiling.F90:880
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine profiling_count_tran_real_8_l(trf, type)
Definition: profiling.F90:784
subroutine profiling_count_tran_int_8(trf, type)
Definition: profiling.F90:853
subroutine rprofiling_count_operations(ops)
Definition: profiling.F90:719
integer, parameter, public profiling_io
Definition: profiling.F90:208
real(real64) function profile_self_throughput(this)
Definition: profiling.F90:1022
subroutine profiling_count_tran_complex_8_l(trf, type)
Definition: profiling.F90:812
subroutine profile_init(this, label)
Initialize a profile object and add it to the list.
Definition: profiling.F90:494
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
real(real64) function profile_total_throughput(this)
Definition: profiling.F90:989
subroutine, public profiling_init(namespace)
Create profiling subdirectory.
Definition: profiling.F90:257
subroutine profiling_count_tran_real_4_l(trf, type)
Definition: profiling.F90:770
subroutine dprofiling_count_operations(ops)
Definition: profiling.F90:731
pure character(len=len(string)) function to_upper(string)
Definition: profiling.F90:1442
real(real64) function profile_total_time_per_call(this)
Definition: profiling.F90:956
integer, parameter max_profiles
Max. number of tags.
Definition: profiling.F90:149
subroutine profiling_count_tran_int_l(trf, type)
Definition: profiling.F90:744
subroutine profiling_count_tran_complex_4_l(trf, type)
Definition: profiling.F90:798
subroutine profiling_count_tran_real_4(trf, type)
Definition: profiling.F90:866
type(sphash_t), save profiling_map
Definition: profiling.F90:250
integer function profile_num_calls(this)
Definition: profiling.F90:1054
subroutine profiling_count_tran_type(trf, type)
Definition: profiling.F90:922
real(real64) function profile_min_time(this)
Definition: profiling.F90:967
subroutine, public profiling_memory_deallocate(var, file, line, size)
Definition: profiling.F90:1404
character(label_length) function profile_label(this)
Definition: profiling.F90:1065
real(real64) function profile_self_time_per_call(this)
Definition: profiling.F90:978
subroutine, public profiling_memory_allocate(var, file, line, size_)
Definition: profiling.F90:1333
subroutine profiling_count_tran_type_l(trf, type)
Definition: profiling.F90:826
subroutine profiling_make_position_str(var, file, line, str)
Definition: profiling.F90:1276
subroutine profiling_count_tran_complex_8(trf, type)
Definition: profiling.F90:908
subroutine profiling_count_tran_complex_4(trf, type)
Definition: profiling.F90:894
real(real64) function profile_total_bandwidth(this)
Definition: profiling.F90:1006
real(real64) function profile_total_time(this)
Definition: profiling.F90:934
This module is intended to contain "only mathematical" functions and procedures.
Definition: sort.F90:119
This module implements a simple hash table for string valued keys and integer values using the C++ ST...
Definition: sphash.F90:120
subroutine, public sphash_init(h)
Initialize a hash table h with size entries. Since we use separate chaining, the number of entries in...
Definition: sphash.F90:224
subroutine, public sphash_insert(h, key, val, clone)
Insert a (key, val) pair into the hash table h. If clone=.true., the object will be copied.
Definition: sphash.F90:293
subroutine, public sphash_end(h)
Free a hash table.
Definition: sphash.F90:250
class(*) function, pointer, public sphash_lookup(h, key, found)
Look up a value in the hash table h. If found is present, it indicates if key could be found in the t...
Definition: sphash.F90:325
integer pure function, public types_get_size(this)
Definition: types.F90:154
subroutine get_output_dir()
Definition: profiling.F90:398
recursive subroutine output_tree_level(profile, level, total_time, iunit)
Definition: profiling.F90:1244
int true(void)