54 use,
intrinsic :: iso_fortran_env
81 integer,
parameter :: &
82 LABEL_LENGTH = 40, & !< Max. number of characters of tag label.
87 character(LABEL_LENGTH) :: label
88 real(real64) :: entry_time
89 real(real64) :: total_time
90 real(real64) :: min_time
91 real(real64) :: self_time
92 real(real64) :: op_count_current
93 real(real64) :: op_count
94 real(real64) :: op_count_child
95 real(real64) :: op_count_child_current
96 real(real64) :: tr_count_current
97 real(real64) :: tr_count
98 real(real64) :: tr_count_child
99 real(real64) :: tr_count_child_current
100 type(profile_t),
pointer :: parent
102 logical :: initialized = .false.
103 logical :: active = .false.
106 logical :: has_child(MAX_PROFILES)
107 real(real64) :: timings(MAX_PROFILES)
112 type(profile_t),
pointer :: p
140 integer,
parameter,
public :: &
141 PROFILING_TIME = 1, &
147 integer,
parameter :: MAX_MEMORY_VARS = 25
151 integer,
public ::
mode
153 type(profile_pointer_t) :: current
154 type(profile_pointer_t) :: profile_list(MAX_PROFILES)
155 integer :: last_profile
157 integer(int64) :: alloc_count
158 integer(int64) :: dealloc_count
160 integer(int64) :: memory_limit = -1
161 integer(int64) :: total_memory
162 integer(int64) :: max_memory
163 character(len=256) :: max_memory_location
165 integer(int64) :: large_vars_size(max_memory_vars)
166 character(len=256) :: large_vars(max_memory_vars)
168 real(real64) :: start_time
171 character(len=256) :: output_dir
172 character(len=6) :: file_number
176 logical :: output_yaml
177 logical :: output_tree
189 type(namespace_t),
intent(in) :: namespace
277 prof_vars%memory_limit = int(ii, int64)*1024
285 namespace, action=
'write')
286 write(
prof_vars%mem_iunit,
'(5a16,a70)')
'Elapsed Time',
'Alloc/Dealloc',
'Size (words)',
'Prof Mem', &
287 'Sys Mem',
'Variable Name(Filename:Line)'
296 call likwid_markerinit()
349 real(real64),
parameter :: megabyte = 1048576.0_real64
351 integer(int64) :: io_open_count_red, io_close_count_red
360 prof_vars%profile_list(ii)%p%initialized = .false.
367 write(
message(1),
'(a,i10)')
'Number of allocations = ',
prof_vars%alloc_count
368 write(
message(2),
'(a,i10)')
'Number of deallocations = ',
prof_vars%dealloc_count
369 write(
message(3),
'(a,f18.3,a)')
'Maximum total memory allocated = ',
prof_vars%max_memory/megabyte,
' Mbytes'
374 message(2) =
'Largest variables allocated:'
376 do ii = 1, max_memory_vars
384 write(
message(1),
'(a,i10,a,i10,a)')
"Not all memory was deallocated: ",
prof_vars%alloc_count, &
385 ' allocations and ',
prof_vars%dealloc_count,
' deallocations'
389 write(
message(1),
'(a,f18.3,a,f18.3,a)')
"Remaining allocated memory: ",
prof_vars%total_memory/megabyte, &
390 ' Mbytes (out of maximum ',
prof_vars%max_memory/megabyte,
' Mbytes)'
401 call likwid_markerclose()
413 write(
message(3),
'(a,i10)')
'Global number of file open = ', io_open_count_red
414 write(
message(4),
'(a,i10)')
'Global number of file close = ', io_close_count_red
426 type(
profile_t),
target,
intent(out) :: this
427 character(*),
intent(in) :: label
433 if(len(label) > label_length)
then
434 message(1) =
"Label " // trim(label) //
" is too long for the internal profiler"
442 this%entry_time = huge(this%entry_time)
444 this%op_count_current =
m_zero
446 this%op_count_child =
m_zero
447 this%tr_count_current =
m_zero
449 this%tr_count_child =
m_zero
450 this%active = .false.
452 this%has_child = .false.
467 this%initialized = .
true.
470 do iprofile = 1,
prof_vars%last_profile - 1
471 if (
prof_vars%profile_list(iprofile)%p%label == this%label)
then
472 message(1) =
"Label "//label//
" used more than once."
486 character(*),
intent(in) :: label
487 logical,
optional,
intent(in) :: exclude
491 class(*),
pointer :: profile_p
494 character(len=len(label)) :: label_
503 if (.not. found)
then
509 select type(this => profile_p)
511 if (.not. this%initialized)
then
515 if (this%active)
then
516 message(1) =
" The region with label "//trim(label)//
" is already active."
517 message(2) =
" This is likely caused by a missing or incorrect profiling_out call."
520 assert(.not. this%active)
525 if (
associated(
prof_vars%current%p))
then
528 this%parent%has_child(this%index) = .
true.
534 this%op_count_current =
m_zero
535 this%tr_count_current =
m_zero
536 this%op_count_child_current =
m_zero
537 this%tr_count_child_current =
m_zero
540 this%entry_time = now
544#if defined(HAVE_NVTX) || (defined(HAVE_HIP) && defined(__HIP_PLATFORM_AMD__))
551 call likwid_markerstartregion(trim(label_))
563 character(*),
intent(in) :: label
565 real(real64) :: now, time_spent
566 class(*),
pointer :: profile_p
575 if (.not. found)
then
576 message(1) =
" No profiling region with label "//trim(label)//
" found."
577 message(2) =
" This is likely caused by a missing or incorrect profiling_in call."
582 select type(this => profile_p)
584 assert(this%initialized)
586 this%active = .false.
589 time_spent = now - this%entry_time
590 this%total_time = this%total_time + time_spent
591 this%self_time = this%self_time + time_spent
592 this%count = this%count + 1
593 if (time_spent < this%min_time)
then
594 this%min_time = time_spent
597 this%op_count = this%op_count + this%op_count_current
598 this%tr_count = this%tr_count + this%tr_count_current
599 this%op_count_child = this%op_count_child + this%op_count_child_current
600 this%tr_count_child = this%tr_count_child + this%tr_count_child_current
602 if (
associated(this%parent))
then
604 this%parent%self_time = this%parent%self_time - time_spent
605 if (this%exclude) this%parent%total_time = this%parent%total_time - time_spent
608 this%parent%op_count_child_current = this%parent%op_count_child_current &
609 + this%op_count_current + this%op_count_child_current
610 this%parent%tr_count_child_current = this%parent%tr_count_child_current &
611 + this%tr_count_current + this%tr_count_child_current
613 this%parent%timings(this%index) = this%parent%timings(this%index) + time_spent
624 call likwid_markerstopregion(trim(this%label))
629#if defined(HAVE_NVTX) || (defined(HAVE_HIP) && defined(__HIP_PLATFORM_AMD__))
639 integer,
intent(in) :: ops
644 prof_vars%current%p%op_count_current =
prof_vars%current%p%op_count_current + real(ops, real64)
651 real(4),
intent(in) :: ops
656 prof_vars%current%p%op_count_current =
prof_vars%current%p%op_count_current + real(ops, real64)
663 real(real64),
intent(in) :: ops
676 integer(int64),
intent(in) :: trf
677 integer,
intent(in) :: type
682 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
689 integer(int64),
intent(in) :: trf
690 integer(int64),
intent(in) :: type
695 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
702 integer(int64),
intent(in) :: trf
703 real(4),
intent(in) :: type
708 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
716 integer(int64),
intent(in) :: trf
717 real(real64),
intent(in) :: type
722 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
730 integer(int64),
intent(in) :: trf
731 complex(4),
intent(in) :: type
736 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
744 integer(int64),
intent(in) :: trf
745 complex(real64),
intent(in) :: type
750 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 16*real(trf, real64)
758 integer(int64),
intent(in) :: trf
759 type(
type_t),
intent(in) :: type
772 integer,
intent(in) :: trf
773 integer,
intent(in) :: type
778 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
785 integer,
intent(in) :: trf
786 integer(int64),
intent(in) :: type
791 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
798 integer,
intent(in) :: trf
799 real(4),
intent(in) :: type
804 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
812 integer,
intent(in) :: trf
813 real(real64),
intent(in) :: type
818 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
826 integer,
intent(in) :: trf
827 complex(4),
intent(in) :: type
832 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
840 integer,
intent(in) :: trf
841 complex(real64),
intent(in) :: type
846 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 16*real(trf, real64)
854 integer,
intent(in) :: trf
855 type(
type_t),
intent(in) :: type
865 real(real64) function profile_total_time(this)
868 push_sub(profile_total_time)
869 profile_total_time = this%total_time
871 pop_sub(profile_total_time)
876 real(real64) function profile_self_time(this)
887 real(real64) function profile_total_time_per_call(this)
898 real(real64) function profile_min_time(this)
909 real(real64) function profile_self_time_per_call(this)
920 real(real64) function profile_total_throughput(this)
923 push_sub(profile_throughput)
925 if (this%total_time > epsilon(this%total_time))
then
931 pop_sub(profile_throughput)
940 push_sub(profile_bandwidth)
942 if (this%total_time > epsilon(this%total_time))
then
948 pop_sub(profile_bandwidth)
956 push_sub(profile_throughput)
958 if (this%self_time > epsilon(this%self_time))
then
964 pop_sub(profile_throughput)
972 push_sub(profile_bandwidth)
974 if (this%self_time > epsilon(this%self_time))
then
980 pop_sub(profile_bandwidth)
1014 type(namespace_t),
intent(in) :: namespace
1018 real(real64) :: total_time
1020 character(len=256) :: filename
1021 real(real64),
allocatable :: selftime(:)
1022 integer,
allocatable :: position(:)
1023 class(*),
pointer :: profile_p
1026 if (.not. in_profiling_mode)
return
1029 call mpi_world%barrier()
1031 if (.not.
prof_vars%all_nodes .and. .not. mpi_world%is_root())
then
1037 iunit = io_open(trim(filename), namespace, action=
'write', die=.false.)
1038 if (iunit == -1)
then
1039 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1040 call messages_warning(1)
1045 write(iunit,
'(2a)') &
1046 ' CUMULATIVE TIME ', &
1048 write(iunit,
'(2a)') &
1049 ' ----------------------------------------------------------', &
1050 '----------------|-------------------------------------------------------------'
1051 write(iunit,
'(2a)') &
1052 'TAG NUM_CALLS TOTAL_TIME TIME_PER_CALL MIN_TIME ', &
1053 ' MFLOPS MBYTES/S %TIME | TOTAL_TIME TIME_PER_CALL MFLOPS MBYTES/S %TIME'
1054 write(iunit,
'(2a)') &
1055 '===================================================================================================', &
1056 '=================|============================================================='
1058 profile_p => sphash_lookup(
profiling_map,
"COMPLETE_RUN", found)
1060 select type(complete_run => profile_p)
1065 safe_allocate(selftime(1:
prof_vars%last_profile))
1066 safe_allocate(position(1:
prof_vars%last_profile))
1073 call sort(selftime, position)
1076 prof =>
prof_vars%profile_list(position(ii))%p
1077 if (.not. prof%initialized)
then
1078 write(message(1),
'(a,i6,a)')
"Internal error: Profile number ", position(ii),
" is not initialized."
1079 call messages_fatal(1)
1081 if (prof%active)
then
1082 write(message(1),
'(a)')
"Internal error: Profile '" // trim(
profile_label(prof)) // &
1083 "' is active, i.e. profiling_out was not called."
1084 call messages_warning(1)
1089 write(iunit,
'(a,i14,3f16.6,2f10.1,f8.1,a,2f16.6,2f10.1,f8.1)') &
1106 call io_close(iunit)
1110 iunit = io_open(trim(filename), namespace, action=
'write', die=.false.)
1111 if (iunit == -1)
then
1112 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1113 call messages_warning(1)
1117 write(iunit,
'(2a)')
'schema: [num_calls, total_time, total_throughput, ', &
1118 'total_bandwidth, self_time, self_throughput, self_bandwidth]'
1119 write(iunit,
'(a)')
'data:'
1122 prof =>
prof_vars%profile_list(position(ii))%p
1124 write(iunit,
'(a,a,a,i6,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a)') &
1135 call io_close(iunit)
1138 safe_deallocate_a(selftime)
1139 safe_deallocate_a(position)
1143 iunit = io_open(trim(filename), namespace, action=
'write', die=.false.)
1144 if (iunit == -1)
then
1145 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1146 call messages_warning(1)
1150 write(iunit,
'(a40,a11,a11,a12)') &
1151 "Tree level, region ", &
1152 "% of total ",
"% of parent", &
1155 select type(complete_run => profile_p)
1158 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1159 repeat(
'-', 0) //
'| ', &
1161 repeat(
' ', 15-0-2), &
1167 write(iunit,
'(a)')
"// modeline for vim to enable folding (put in ~/.vimrc: set modeline modelineexpr)"
1168 write(iunit,
'(a)')
"// vim: fdm=expr fde=getline(v\:lnum)=~'.*\|.*'?len(split(getline(v\:lnum))[0])-1\:0"
1169 call io_close(iunit)
1177 integer,
intent(in) :: level
1178 real(real64),
intent(in) :: total_time
1179 integer,
intent(in) :: iunit
1181 integer :: ichild, width
1187 if (profile%has_child(ichild))
then
1190 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1191 repeat(
'-', level) //
'| ', &
1193 repeat(
' ', width-level-2), &
1194 profile%timings(ichild)/total_time * 100,
"% ", &
1195 profile%timings(ichild)/profile%total_time * 100,
"% ", &
1196 profile%timings(ichild)
1198 level+1, total_time, iunit)
1208 character(len=*),
intent(in) :: var
1209 character(len=*),
intent(in) :: file
1210 integer,
intent(in) :: line
1211 character(len=*),
intent(out) :: str
1213 integer :: ii, jj, nn
1218 if (var(jj:jj) ==
')')
then
1220 do ii = len(var)-1, 1, -1
1222 if (var(ii:ii) ==
')') nn = nn + 1
1223 if (var(ii:ii) ==
'(') nn = nn - 1
1227 message(1) =
"Internal Error in profiling_memory_log"
1228 call messages_fatal(1)
1231 ii = index(file,
'/', back=.
true.)+1
1233 write(str,
'(4a,i5,a)') var(1:jj),
"(", trim(file(ii:len(file))),
":", line,
")"
1241 character(len=*),
intent(in) :: type
1242 character(len=*),
intent(in) :: var
1243 character(len=*),
intent(in) :: file
1244 integer,
intent(in) :: line
1245 integer(int64),
intent(in) :: size
1247 character(len=256) :: str
1248 integer(int64) :: mem
1255 mem = loct_get_memory_usage()
1257 write(
prof_vars%mem_iunit,
'(f16.6,a16,3i16,a70)') loct_clock() -
prof_vars%start_time, &
1258 trim(type),
size,
prof_vars%total_memory, mem, trim(str)
1265 character(len=*),
intent(in) :: var
1266 character(len=*),
intent(in) :: file
1267 integer,
intent(in) :: line
1268 integer(int64),
intent(in) :: size_
1271 integer(int64) :: size
1272 character(len=256) :: str
1287 message(1) =
"Memory limit set in the input file was passed"
1288 call messages_fatal(1)
1301 if (str ==
prof_vars%large_vars(ii))
then
1302 if (
size >
prof_vars%large_vars_size(ii))
then
1319 if (
size >
prof_vars%large_vars_size(ii))
then
1336 character(len=*),
intent(in) :: var
1337 character(len=*),
intent(in) :: file
1338 integer,
intent(in) :: line
1339 integer(int64),
intent(in) :: size
1358 character(len=1),
intent(in) :: c_in
1359 character(len=1) :: c_out
1361 integer,
parameter :: difference=iachar(
'a')-iachar(
'A'), la=iachar(
'a'), lz=iachar(
'z')
1362 integer :: ascii_decimal
1365 ascii_decimal = iachar(c_in)
1366 if (ascii_decimal >= la .and. ascii_decimal <= lz)
then
1367 ascii_decimal = ascii_decimal - difference
1369 c_out = achar(ascii_decimal)
1373 pure function to_upper(string)
result(upper_string)
1374 character(len=*),
intent(in) :: string
1375 character(len=len(string)) :: upper_string
1380 do i = 1, len(string)
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
real(real64), parameter, public m_huge
real(real64), parameter, public m_zero
logical pure function, public not_in_openmp()
logical, public in_profiling_mode
Same for profiling mode.
integer(int64), save io_open_count
subroutine, public io_close(iunit, grp)
integer(int64) pure function, public io_get_close_count()
integer(int64), save io_close_count
subroutine, public io_mkdir(fname, namespace, parents)
integer(int64) pure function, public io_get_open_count()
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
System information (time, memory, sysname)
character(kind=c_char, len=1) function, dimension(len_trim(f_string)+1), private string_f_to_c(f_string)
convert a Fortran string to a C string
subroutine, public messages_print_with_emphasis(msg, iunit, namespace)
character(len=512), private msg
subroutine, public messages_warning(no_lines, all_nodes, namespace)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
real(real64) function, public mpi_get_wtime()
. Returns an elapsed time on the calling processor.
type(mpi_grp_t), public mpi_world
subroutine profiling_count_tran_int(trf, type)
subroutine profiling_count_tran_int_8_l(trf, type)
subroutine, public profiling_end(namespace)
type(profile_vars_t), target, save, public prof_vars
real(real64) function profile_self_bandwidth(this)
real(real64) function profile_self_time(this)
pure character(len=1) function char_to_upper(c_in)
integer, parameter max_memory_vars
subroutine iprofiling_count_operations(ops)
integer, parameter, public profiling_memory
subroutine profiling_memory_log(type, var, file, line, size)
integer, parameter, public profiling_likwid
subroutine profiling_output(namespace)
Write profiling results of each node to profiling.NNN/profiling.nnn The format of each line is tag-la...
integer, parameter, public profiling_memory_full
subroutine profiling_count_tran_real_8(trf, type)
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
subroutine profiling_count_tran_real_8_l(trf, type)
subroutine profiling_count_tran_int_8(trf, type)
subroutine rprofiling_count_operations(ops)
integer, parameter, public profiling_io
real(real64) function profile_self_throughput(this)
subroutine profiling_count_tran_complex_8_l(trf, type)
subroutine profile_init(this, label)
Initialize a profile object and add it to the list.
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
real(real64) function profile_total_throughput(this)
subroutine, public profiling_init(namespace)
Create profiling subdirectory.
subroutine profiling_count_tran_real_4_l(trf, type)
subroutine dprofiling_count_operations(ops)
pure character(len=len(string)) function to_upper(string)
real(real64) function profile_total_time_per_call(this)
integer, parameter max_profiles
Max. number of tags.
subroutine profiling_count_tran_int_l(trf, type)
subroutine profiling_count_tran_complex_4_l(trf, type)
subroutine profiling_count_tran_real_4(trf, type)
type(sphash_t), save profiling_map
integer function profile_num_calls(this)
subroutine profiling_count_tran_type(trf, type)
real(real64) function profile_min_time(this)
subroutine, public profiling_memory_deallocate(var, file, line, size)
character(label_length) function profile_label(this)
real(real64) function profile_self_time_per_call(this)
subroutine, public profiling_memory_allocate(var, file, line, size_)
subroutine profiling_count_tran_type_l(trf, type)
subroutine profiling_make_position_str(var, file, line, str)
subroutine profiling_count_tran_complex_8(trf, type)
subroutine profiling_count_tran_complex_4(trf, type)
real(real64) function profile_total_bandwidth(this)
real(real64) function profile_total_time(this)
This module is intended to contain "only mathematical" functions and procedures.
This module implements a simple hash table for string valued keys and integer values using the C++ ST...
subroutine, public sphash_init(h)
Initialize a hash table h with size entries. Since we use separate chaining, the number of entries in...
subroutine, public sphash_insert(h, key, val, clone)
Insert a (key, val) pair into the hash table h. If clone=.true., the object will be copied.
subroutine, public sphash_end(h)
Free a hash table.
class(*) function, pointer, public sphash_lookup(h, key, found)
Look up a value in the hash table h. If found is present, it indicates if key could be found in the t...
integer pure function, public types_get_size(this)
subroutine get_output_dir()
recursive subroutine output_tree_level(profile, level, total_time, iunit)