54 use,
intrinsic :: iso_fortran_env
81 integer,
parameter :: &
82 LABEL_LENGTH = 40, & !< Max. number of characters of tag label.
87 character(LABEL_LENGTH) :: label
88 real(real64) :: entry_time
89 real(real64) :: total_time
90 real(real64) :: min_time
91 real(real64) :: self_time
92 real(real64) :: op_count_current
93 real(real64) :: op_count
94 real(real64) :: op_count_child
95 real(real64) :: op_count_child_current
96 real(real64) :: tr_count_current
97 real(real64) :: tr_count
98 real(real64) :: tr_count_child
99 real(real64) :: tr_count_child_current
100 type(profile_t),
pointer :: parent
102 logical :: initialized = .false.
103 logical :: active = .false.
106 logical :: has_child(MAX_PROFILES)
107 real(real64) :: timings(MAX_PROFILES)
112 type(profile_t),
pointer :: p
140 integer,
parameter,
public :: &
141 PROFILING_TIME = 1, &
151 integer,
public :: mode
155 integer :: last_profile
157 integer(int64) :: alloc_count
158 integer(int64) :: dealloc_count
160 integer(int64) :: memory_limit = -1
161 integer(int64) :: total_memory
162 integer(int64) :: max_memory
163 character(len=256) :: max_memory_location
168 real(real64) :: start_time
171 character(len=256) :: output_dir
172 character(len=6) :: file_number
176 logical :: output_yaml
177 logical :: output_tree
182 type(sphash_t),
save :: profiling_map
189 type(namespace_t),
intent(in) :: namespace
277 prof_vars%memory_limit = int(ii, int64)*1024
285 namespace, action=
'write')
286 write(
prof_vars%mem_iunit,
'(5a16,a70)')
'Elapsed Time',
'Alloc/Dealloc',
'Size (words)',
'Prof Mem', &
287 'Sys Mem',
'Variable Name(Filename:Line)'
296 call likwid_markerinit()
349 real(real64),
parameter :: megabyte = 1048576.0_real64
351 integer(int64) :: io_open_count_red, io_close_count_red
360 prof_vars%profile_list(ii)%p%initialized = .false.
367 write(
message(1),
'(a,i10)')
'Number of allocations = ',
prof_vars%alloc_count
368 write(
message(2),
'(a,i10)')
'Number of deallocations = ',
prof_vars%dealloc_count
369 write(
message(3),
'(a,f18.3,a)')
'Maximum total memory allocated = ',
prof_vars%max_memory/megabyte,
' Mbytes'
374 message(2) =
'Largest variables allocated:'
384 write(
message(1),
'(a,i10,a,i10,a)')
"Not all memory was deallocated: ",
prof_vars%alloc_count, &
385 ' allocations and ',
prof_vars%dealloc_count,
' deallocations'
389 write(
message(1),
'(a,f18.3,a,f18.3,a)')
"Remaining allocated memory: ",
prof_vars%total_memory/megabyte, &
390 ' Mbytes (out of maximum ',
prof_vars%max_memory/megabyte,
' Mbytes)'
401 call likwid_markerclose()
413 write(
message(3),
'(a,i10)')
'Global number of file open = ', io_open_count_red
414 write(
message(4),
'(a,i10)')
'Global number of file close = ', io_close_count_red
426 type(
profile_t),
target,
intent(out) :: this
427 character(*),
intent(in) :: label
433 if(len(label) > label_length)
then
434 message(1) =
"Label " // trim(label) //
" is too long for the internal profiler"
442 this%entry_time = huge(this%entry_time)
444 this%op_count_current =
m_zero
446 this%op_count_child =
m_zero
447 this%tr_count_current =
m_zero
449 this%tr_count_child =
m_zero
450 this%active = .false.
452 this%has_child = .false.
467 this%initialized = .
true.
470 do iprofile = 1,
prof_vars%last_profile - 1
471 if (
prof_vars%profile_list(iprofile)%p%label == this%label)
then
472 message(1) =
"Label "//label//
" used more than once."
486 character(*),
intent(in) :: label
487 logical,
optional,
intent(in) :: exclude
491 class(*),
pointer :: profile_p
494 character(len=len(label)) :: label_
502 profile_p =>
sphash_lookup(profiling_map, trim(label_), found)
503 if (.not. found)
then
507 profile_p =>
sphash_lookup(profiling_map, trim(label_), found)
509 select type(this => profile_p)
511 if (.not. this%initialized)
then
515 assert(.not. this%active)
519 if (
associated(
prof_vars%current%p))
then
522 this%parent%has_child(this%index) = .
true.
528 this%op_count_current =
m_zero
529 this%tr_count_current =
m_zero
530 this%op_count_child_current =
m_zero
531 this%tr_count_child_current =
m_zero
534 this%entry_time = now
545 call likwid_markerstartregion(trim(label_))
557 character(*),
intent(in) :: label
559 real(real64) :: now, time_spent
560 class(*),
pointer :: profile_p
570 select type(this => profile_p)
572 assert(this%initialized)
574 this%active = .false.
577 time_spent = now - this%entry_time
578 this%total_time = this%total_time + time_spent
579 this%self_time = this%self_time + time_spent
580 this%count = this%count + 1
581 if (time_spent < this%min_time)
then
582 this%min_time = time_spent
585 this%op_count = this%op_count + this%op_count_current
586 this%tr_count = this%tr_count + this%tr_count_current
587 this%op_count_child = this%op_count_child + this%op_count_child_current
588 this%tr_count_child = this%tr_count_child + this%tr_count_child_current
590 if (
associated(this%parent))
then
592 this%parent%self_time = this%parent%self_time - time_spent
593 if (this%exclude) this%parent%total_time = this%parent%total_time - time_spent
596 this%parent%op_count_child_current = this%parent%op_count_child_current &
597 + this%op_count_current + this%op_count_child_current
598 this%parent%tr_count_child_current = this%parent%tr_count_child_current &
599 + this%tr_count_current + this%tr_count_child_current
601 this%parent%timings(this%index) = this%parent%timings(this%index) + time_spent
612 call likwid_markerstopregion(trim(this%label))
627 integer,
intent(in) :: ops
632 prof_vars%current%p%op_count_current =
prof_vars%current%p%op_count_current + real(ops, real64)
639 real(4),
intent(in) :: ops
644 prof_vars%current%p%op_count_current =
prof_vars%current%p%op_count_current + real(ops, real64)
651 real(real64),
intent(in) :: ops
664 integer(int64),
intent(in) :: trf
665 integer,
intent(in) :: type
670 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
677 integer(int64),
intent(in) :: trf
678 integer(int64),
intent(in) :: type
683 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
690 integer(int64),
intent(in) :: trf
691 real(4),
intent(in) :: type
696 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
704 integer(int64),
intent(in) :: trf
705 real(real64),
intent(in) :: type
710 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
718 integer(int64),
intent(in) :: trf
719 complex(4),
intent(in) :: type
724 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
732 integer(int64),
intent(in) :: trf
733 complex(real64),
intent(in) :: type
738 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 16*real(trf, real64)
746 integer(int64),
intent(in) :: trf
747 type(
type_t),
intent(in) :: type
760 integer,
intent(in) :: trf
761 integer,
intent(in) :: type
766 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
773 integer,
intent(in) :: trf
774 integer(int64),
intent(in) :: type
779 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
786 integer,
intent(in) :: trf
787 real(4),
intent(in) :: type
792 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
800 integer,
intent(in) :: trf
801 real(real64),
intent(in) :: type
806 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
814 integer,
intent(in) :: trf
815 complex(4),
intent(in) :: type
820 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
828 integer,
intent(in) :: trf
829 complex(real64),
intent(in) :: type
834 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 16*real(trf, real64)
842 integer,
intent(in) :: trf
843 type(
type_t),
intent(in) :: type
853 real(real64) function profile_total_time(this)
856 push_sub(profile_total_time)
857 profile_total_time = this%total_time
859 pop_sub(profile_total_time)
864 real(real64) function profile_self_time(this)
875 real(real64) function profile_total_time_per_call(this)
886 real(real64) function profile_min_time(this)
897 real(real64) function profile_self_time_per_call(this)
908 real(real64) function profile_total_throughput(this)
911 push_sub(profile_throughput)
913 if (this%total_time > epsilon(this%total_time))
then
919 pop_sub(profile_throughput)
928 push_sub(profile_bandwidth)
930 if (this%total_time > epsilon(this%total_time))
then
936 pop_sub(profile_bandwidth)
944 push_sub(profile_throughput)
946 if (this%self_time > epsilon(this%self_time))
then
952 pop_sub(profile_throughput)
960 push_sub(profile_bandwidth)
962 if (this%self_time > epsilon(this%self_time))
then
968 pop_sub(profile_bandwidth)
1002 type(namespace_t),
intent(in) :: namespace
1006 real(real64) :: total_time
1008 character(len=256) :: filename
1009 real(real64),
allocatable :: selftime(:)
1010 integer,
allocatable :: position(:)
1011 class(*),
pointer :: profile_p
1014 if (.not. in_profiling_mode)
return
1017 call mpi_world%barrier()
1019 if (.not.
prof_vars%all_nodes .and. .not. mpi_grp_is_root(mpi_world))
then
1025 iunit = io_open(trim(filename), namespace, action=
'write')
1027 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1028 call messages_warning(1)
1033 write(iunit,
'(2a)') &
1034 ' CUMULATIVE TIME ', &
1036 write(iunit,
'(2a)') &
1037 ' ----------------------------------------------------------', &
1038 '----------------|-------------------------------------------------------------'
1039 write(iunit,
'(2a)') &
1040 'TAG NUM_CALLS TOTAL_TIME TIME_PER_CALL MIN_TIME ', &
1041 ' MFLOPS MBYTES/S %TIME | TOTAL_TIME TIME_PER_CALL MFLOPS MBYTES/S %TIME'
1042 write(iunit,
'(2a)') &
1043 '===================================================================================================', &
1044 '=================|============================================================='
1046 profile_p => sphash_lookup(
profiling_map,
"COMPLETE_RUN", found)
1048 select type(complete_run => profile_p)
1053 safe_allocate(selftime(1:
prof_vars%last_profile))
1054 safe_allocate(position(1:
prof_vars%last_profile))
1061 call sort(selftime, position)
1064 prof =>
prof_vars%profile_list(position(ii))%p
1065 if (.not. prof%initialized)
then
1066 write(message(1),
'(a,i6,a)')
"Internal error: Profile number ", position(ii),
" is not initialized."
1067 call messages_fatal(1)
1069 if (prof%active)
then
1070 write(message(1),
'(a)')
"Internal error: Profile '" // trim(
profile_label(prof)) // &
1071 "' is active, i.e. profiling_out was not called."
1072 call messages_warning(1)
1077 write(iunit,
'(a,i14,3f16.6,2f10.1,f8.1,a,2f16.6,2f10.1,f8.1)') &
1094 call io_close(iunit)
1098 iunit = io_open(trim(filename), namespace, action=
'write')
1100 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1101 call messages_warning(1)
1105 write(iunit,
'(2a)')
'schema: [num_calls, total_time, total_throughput, ', &
1106 'total_bandwidth, self_time, self_throughput, self_bandwidth]'
1107 write(iunit,
'(a)')
'data:'
1110 prof =>
prof_vars%profile_list(position(ii))%p
1112 write(iunit,
'(a,a,a,i6,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a)') &
1123 call io_close(iunit)
1126 safe_deallocate_a(selftime)
1127 safe_deallocate_a(position)
1131 iunit = io_open(trim(filename), namespace, action=
'write')
1133 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1134 call messages_warning(1)
1138 write(iunit,
'(a40,a11,a11,a12)') &
1139 "Tree level, region ", &
1140 "% of total ",
"% of parent", &
1143 select type(complete_run => profile_p)
1146 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1147 repeat(
'-', 0) //
'| ', &
1149 repeat(
' ', 15-0-2), &
1155 write(iunit,
'(a)')
"// modeline for vim to enable folding (put in ~/.vimrc: set modeline modelineexpr)"
1156 write(iunit,
'(a)')
"// vim: fdm=expr fde=getline(v\:lnum)=~'.*\|.*'?len(split(getline(v\:lnum))[0])-1\:0"
1157 call io_close(iunit)
1165 integer,
intent(in) :: level
1166 real(real64),
intent(in) :: total_time
1167 integer,
intent(in) :: iunit
1169 integer :: ichild, width
1175 if (profile%has_child(ichild))
then
1178 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1179 repeat(
'-', level) //
'| ', &
1181 repeat(
' ', width-level-2), &
1182 profile%timings(ichild)/total_time * 100,
"% ", &
1183 profile%timings(ichild)/profile%total_time * 100,
"% ", &
1184 profile%timings(ichild)
1186 level+1, total_time, iunit)
1196 character(len=*),
intent(in) :: var
1197 character(len=*),
intent(in) :: file
1198 integer,
intent(in) :: line
1199 character(len=*),
intent(out) :: str
1201 integer :: ii, jj, nn
1206 if (var(jj:jj) ==
')')
then
1208 do ii = len(var)-1, 1, -1
1210 if (var(ii:ii) ==
')') nn = nn + 1
1211 if (var(ii:ii) ==
'(') nn = nn - 1
1215 message(1) =
"Internal Error in profiling_memory_log"
1216 call messages_fatal(1)
1220 do while (file(ii:ii+2) ==
"../")
1223 write(str,
'(4a,i5,a)') var(1:jj),
"(", trim(file(ii:len(file))),
":", line,
")"
1231 character(len=*),
intent(in) :: type
1232 character(len=*),
intent(in) :: var
1233 character(len=*),
intent(in) :: file
1234 integer,
intent(in) :: line
1235 integer(int64),
intent(in) :: size
1237 character(len=256) :: str
1238 integer(int64) :: mem
1245 mem = loct_get_memory_usage()
1247 write(
prof_vars%mem_iunit,
'(f16.6,a16,3i16,a70)') loct_clock() -
prof_vars%start_time, &
1248 trim(type),
size,
prof_vars%total_memory, mem, trim(str)
1255 character(len=*),
intent(in) :: var
1256 character(len=*),
intent(in) :: file
1257 integer,
intent(in) :: line
1258 integer(int64),
intent(in) :: size_
1261 integer(int64) :: size
1262 character(len=256) :: str
1277 message(1) =
"Memory limit set in the input file was passed"
1278 call messages_fatal(1)
1291 if (str ==
prof_vars%large_vars(ii))
then
1292 if (
size >
prof_vars%large_vars_size(ii))
then
1309 if (
size >
prof_vars%large_vars_size(ii))
then
1326 character(len=*),
intent(in) :: var
1327 character(len=*),
intent(in) :: file
1328 integer,
intent(in) :: line
1329 integer(int64),
intent(in) :: size
1348 character(len=1),
intent(in) :: c_in
1349 character(len=1) :: c_out
1351 integer,
parameter :: difference=iachar(
'a')-iachar(
'A'), la=iachar(
'a'), lz=iachar(
'z')
1352 integer :: ascii_decimal
1355 ascii_decimal = iachar(c_in)
1356 if (ascii_decimal >= la .and. ascii_decimal <= lz)
then
1357 ascii_decimal = ascii_decimal - difference
1359 c_out = achar(ascii_decimal)
1363 pure function to_upper(string)
result(upper_string)
1364 character(len=*),
intent(in) :: string
1365 character(len=len(string)) :: upper_string
1370 do i = 1, len(string)
real(real64), parameter, public m_huge
real(real64), parameter, public m_zero
logical pure function, public not_in_openmp()
logical, public in_profiling_mode
Same for profiling mode.
integer(int64), save io_open_count
subroutine, public io_close(iunit, grp)
integer(int64) pure function, public io_get_close_count()
integer(int64), save io_close_count
subroutine, public io_mkdir(fname, namespace, parents)
integer(int64) pure function, public io_get_open_count()
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
subroutine, public messages_print_with_emphasis(msg, iunit, namespace)
character(len=512), private msg
subroutine, public messages_warning(no_lines, all_nodes, namespace)
subroutine, public messages_info(no_lines, iunit, verbose_limit, stress, all_nodes, namespace)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
subroutine, public messages_input_error(namespace, var, details, row, column)
real(real64) function, public mpi_get_wtime()
. Returns an elapsed time on the calling processor.
logical function mpi_grp_is_root(grp)
Is the current MPI process of grpcomm, root.
type(mpi_grp_t), public mpi_world
subroutine profiling_count_tran_int(trf, type)
subroutine profiling_count_tran_int_8_l(trf, type)
subroutine, public profiling_end(namespace)
type(profile_vars_t), target, save, public prof_vars
real(real64) function profile_self_bandwidth(this)
real(real64) function profile_self_time(this)
pure character(len=1) function char_to_upper(c_in)
integer, parameter max_memory_vars
subroutine iprofiling_count_operations(ops)
integer, parameter, public profiling_memory
subroutine profiling_memory_log(type, var, file, line, size)
integer, parameter, public profiling_likwid
subroutine profiling_output(namespace)
Write profiling results of each node to profiling.NNN/profiling.nnn The format of each line is tag-la...
integer, parameter, public profiling_memory_full
subroutine profiling_count_tran_real_8(trf, type)
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
subroutine profiling_count_tran_real_8_l(trf, type)
subroutine profiling_count_tran_int_8(trf, type)
subroutine rprofiling_count_operations(ops)
integer, parameter, public profiling_io
real(real64) function profile_self_throughput(this)
subroutine profiling_count_tran_complex_8_l(trf, type)
subroutine profile_init(this, label)
Initialize a profile object and add it to the list.
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
real(real64) function profile_total_throughput(this)
subroutine, public profiling_init(namespace)
Create profiling subdirectory.
subroutine profiling_count_tran_real_4_l(trf, type)
subroutine dprofiling_count_operations(ops)
pure character(len=len(string)) function to_upper(string)
real(real64) function profile_total_time_per_call(this)
integer, parameter max_profiles
Max. number of tags.
subroutine profiling_count_tran_int_l(trf, type)
subroutine profiling_count_tran_complex_4_l(trf, type)
subroutine profiling_count_tran_real_4(trf, type)
type(sphash_t), save profiling_map
integer function profile_num_calls(this)
subroutine profiling_count_tran_type(trf, type)
real(real64) function profile_min_time(this)
subroutine, public profiling_memory_deallocate(var, file, line, size)
character(label_length) function profile_label(this)
real(real64) function profile_self_time_per_call(this)
subroutine, public profiling_memory_allocate(var, file, line, size_)
subroutine profiling_count_tran_type_l(trf, type)
subroutine profiling_make_position_str(var, file, line, str)
subroutine profiling_count_tran_complex_8(trf, type)
subroutine profiling_count_tran_complex_4(trf, type)
real(real64) function profile_total_bandwidth(this)
real(real64) function profile_total_time(this)
This module is intended to contain "only mathematical" functions and procedures.
This module implements a simple hash table for string valued keys and integer values using the C++ ST...
subroutine, public sphash_init(h)
Initialize a hash table h with size entries. Since we use separate chaining, the number of entries in...
subroutine, public sphash_insert(h, key, val, clone)
Insert a (key, val) pair into the hash table h. If clone=.true., the object will be copied.
subroutine, public sphash_end(h)
Free a hash table.
class(*) function, pointer, public sphash_lookup(h, key, found)
Look up a value in the hash table h. If found is present, it indicates if key could be found in the t...
integer pure function, public types_get_size(this)
subroutine get_output_dir()
recursive subroutine output_tree_level(profile, level, total_time, iunit)