54 use,
intrinsic :: iso_fortran_env
81 integer,
parameter :: &
82 LABEL_LENGTH = 40, & !< Max. number of characters of tag label.
87 character(LABEL_LENGTH) :: label
88 real(real64) :: entry_time
89 real(real64) :: total_time
90 real(real64) :: min_time
91 real(real64) :: self_time
92 real(real64) :: op_count_current
93 real(real64) :: op_count
94 real(real64) :: op_count_child
95 real(real64) :: op_count_child_current
96 real(real64) :: tr_count_current
97 real(real64) :: tr_count
98 real(real64) :: tr_count_child
99 real(real64) :: tr_count_child_current
100 type(profile_t),
pointer :: parent
102 logical :: initialized = .false.
103 logical :: active = .false.
106 logical :: has_child(MAX_PROFILES)
107 real(real64) :: timings(MAX_PROFILES)
112 type(profile_t),
pointer :: p
140 integer,
parameter,
public :: &
141 PROFILING_TIME = 1, &
147 integer,
parameter :: MAX_MEMORY_VARS = 25
151 integer,
public ::
mode
153 type(profile_pointer_t) :: current
154 type(profile_pointer_t) :: profile_list(MAX_PROFILES)
155 integer :: last_profile
157 integer(int64) :: alloc_count
158 integer(int64) :: dealloc_count
160 integer(int64) :: memory_limit = -1
161 integer(int64) :: total_memory
162 integer(int64) :: max_memory
163 character(len=256) :: max_memory_location
165 integer(int64) :: large_vars_size(max_memory_vars)
166 character(len=256) :: large_vars(max_memory_vars)
168 real(real64) :: start_time
171 character(len=256) :: output_dir
172 character(len=6) :: file_number
176 logical :: output_yaml
177 logical :: output_tree
189 type(namespace_t),
intent(in) :: namespace
277 prof_vars%memory_limit = int(ii, int64)*1024
285 namespace, action=
'write')
286 write(
prof_vars%mem_iunit,
'(5a16,a70)')
'Elapsed Time',
'Alloc/Dealloc',
'Size (words)',
'Prof Mem', &
287 'Sys Mem',
'Variable Name(Filename:Line)'
296 call likwid_markerinit()
349 real(real64),
parameter :: megabyte = 1048576.0_real64
351 integer(int64) :: io_open_count_red, io_close_count_red
360 prof_vars%profile_list(ii)%p%initialized = .false.
367 write(
message(1),
'(a,i10)')
'Number of allocations = ',
prof_vars%alloc_count
368 write(
message(2),
'(a,i10)')
'Number of deallocations = ',
prof_vars%dealloc_count
369 write(
message(3),
'(a,f18.3,a)')
'Maximum total memory allocated = ',
prof_vars%max_memory/megabyte,
' Mbytes'
374 message(2) =
'Largest variables allocated:'
376 do ii = 1, max_memory_vars
384 write(
message(1),
'(a,i10,a,i10,a)')
"Not all memory was deallocated: ",
prof_vars%alloc_count, &
385 ' allocations and ',
prof_vars%dealloc_count,
' deallocations'
389 write(
message(1),
'(a,f18.3,a,f18.3,a)')
"Remaining allocated memory: ",
prof_vars%total_memory/megabyte, &
390 ' Mbytes (out of maximum ',
prof_vars%max_memory/megabyte,
' Mbytes)'
401 call likwid_markerclose()
413 write(
message(3),
'(a,i10)')
'Global number of file open = ', io_open_count_red
414 write(
message(4),
'(a,i10)')
'Global number of file close = ', io_close_count_red
426 type(
profile_t),
target,
intent(out) :: this
427 character(*),
intent(in) :: label
433 if(len(label) > label_length)
then
434 message(1) =
"Label " // trim(label) //
" is too long for the internal profiler"
442 this%entry_time = huge(this%entry_time)
444 this%op_count_current =
m_zero
446 this%op_count_child =
m_zero
447 this%tr_count_current =
m_zero
449 this%tr_count_child =
m_zero
450 this%active = .false.
452 this%has_child = .false.
467 this%initialized = .
true.
470 do iprofile = 1,
prof_vars%last_profile - 1
471 if (
prof_vars%profile_list(iprofile)%p%label == this%label)
then
472 message(1) =
"Label "//label//
" used more than once."
486 character(*),
intent(in) :: label
487 logical,
optional,
intent(in) :: exclude
491 class(*),
pointer :: profile_p
494 character(len=len(label)) :: label_
503 if (.not. found)
then
509 select type(this => profile_p)
511 if (.not. this%initialized)
then
515 assert(.not. this%active)
519 if (
associated(
prof_vars%current%p))
then
522 this%parent%has_child(this%index) = .
true.
528 this%op_count_current =
m_zero
529 this%tr_count_current =
m_zero
530 this%op_count_child_current =
m_zero
531 this%tr_count_child_current =
m_zero
534 this%entry_time = now
538#if defined(HAVE_NVTX) || (defined(HAVE_HIP) && defined(__HIP_PLATFORM_AMD__))
545 call likwid_markerstartregion(trim(label_))
557 character(*),
intent(in) :: label
559 real(real64) :: now, time_spent
560 class(*),
pointer :: profile_p
570 select type(this => profile_p)
572 assert(this%initialized)
574 this%active = .false.
577 time_spent = now - this%entry_time
578 this%total_time = this%total_time + time_spent
579 this%self_time = this%self_time + time_spent
580 this%count = this%count + 1
581 if (time_spent < this%min_time)
then
582 this%min_time = time_spent
585 this%op_count = this%op_count + this%op_count_current
586 this%tr_count = this%tr_count + this%tr_count_current
587 this%op_count_child = this%op_count_child + this%op_count_child_current
588 this%tr_count_child = this%tr_count_child + this%tr_count_child_current
590 if (
associated(this%parent))
then
592 this%parent%self_time = this%parent%self_time - time_spent
593 if (this%exclude) this%parent%total_time = this%parent%total_time - time_spent
596 this%parent%op_count_child_current = this%parent%op_count_child_current &
597 + this%op_count_current + this%op_count_child_current
598 this%parent%tr_count_child_current = this%parent%tr_count_child_current &
599 + this%tr_count_current + this%tr_count_child_current
601 this%parent%timings(this%index) = this%parent%timings(this%index) + time_spent
612 call likwid_markerstopregion(trim(this%label))
617#if defined(HAVE_NVTX) || (defined(HAVE_HIP) && defined(__HIP_PLATFORM_AMD__))
627 integer,
intent(in) :: ops
632 prof_vars%current%p%op_count_current =
prof_vars%current%p%op_count_current + real(ops, real64)
639 real(4),
intent(in) :: ops
644 prof_vars%current%p%op_count_current =
prof_vars%current%p%op_count_current + real(ops, real64)
651 real(real64),
intent(in) :: ops
664 integer(int64),
intent(in) :: trf
665 integer,
intent(in) :: type
670 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
677 integer(int64),
intent(in) :: trf
678 integer(int64),
intent(in) :: type
683 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
690 integer(int64),
intent(in) :: trf
691 real(4),
intent(in) :: type
696 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
704 integer(int64),
intent(in) :: trf
705 real(real64),
intent(in) :: type
710 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
718 integer(int64),
intent(in) :: trf
719 complex(4),
intent(in) :: type
724 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
732 integer(int64),
intent(in) :: trf
733 complex(real64),
intent(in) :: type
738 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 16*real(trf, real64)
746 integer(int64),
intent(in) :: trf
747 type(
type_t),
intent(in) :: type
760 integer,
intent(in) :: trf
761 integer,
intent(in) :: type
766 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
773 integer,
intent(in) :: trf
774 integer(int64),
intent(in) :: type
779 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
786 integer,
intent(in) :: trf
787 real(4),
intent(in) :: type
792 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 4*real(trf, real64)
800 integer,
intent(in) :: trf
801 real(real64),
intent(in) :: type
806 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
814 integer,
intent(in) :: trf
815 complex(4),
intent(in) :: type
820 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 8*real(trf, real64)
828 integer,
intent(in) :: trf
829 complex(real64),
intent(in) :: type
834 prof_vars%current%p%tr_count_current =
prof_vars%current%p%tr_count_current + 16*real(trf, real64)
842 integer,
intent(in) :: trf
843 type(
type_t),
intent(in) :: type
853 real(real64) function profile_total_time(this)
856 push_sub(profile_total_time)
857 profile_total_time = this%total_time
859 pop_sub(profile_total_time)
864 real(real64) function profile_self_time(this)
875 real(real64) function profile_total_time_per_call(this)
886 real(real64) function profile_min_time(this)
897 real(real64) function profile_self_time_per_call(this)
908 real(real64) function profile_total_throughput(this)
911 push_sub(profile_throughput)
913 if (this%total_time > epsilon(this%total_time))
then
919 pop_sub(profile_throughput)
928 push_sub(profile_bandwidth)
930 if (this%total_time > epsilon(this%total_time))
then
936 pop_sub(profile_bandwidth)
944 push_sub(profile_throughput)
946 if (this%self_time > epsilon(this%self_time))
then
952 pop_sub(profile_throughput)
960 push_sub(profile_bandwidth)
962 if (this%self_time > epsilon(this%self_time))
then
968 pop_sub(profile_bandwidth)
1002 type(namespace_t),
intent(in) :: namespace
1006 real(real64) :: total_time
1008 character(len=256) :: filename
1009 real(real64),
allocatable :: selftime(:)
1010 integer,
allocatable :: position(:)
1011 class(*),
pointer :: profile_p
1014 if (.not. in_profiling_mode)
return
1017 call mpi_world%barrier()
1019 if (.not.
prof_vars%all_nodes .and. .not. mpi_world%is_root())
then
1025 iunit = io_open(trim(filename), namespace, action=
'write', die=.false.)
1026 if (iunit == -1)
then
1027 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1028 call messages_warning(1)
1033 write(iunit,
'(2a)') &
1034 ' CUMULATIVE TIME ', &
1036 write(iunit,
'(2a)') &
1037 ' ----------------------------------------------------------', &
1038 '----------------|-------------------------------------------------------------'
1039 write(iunit,
'(2a)') &
1040 'TAG NUM_CALLS TOTAL_TIME TIME_PER_CALL MIN_TIME ', &
1041 ' MFLOPS MBYTES/S %TIME | TOTAL_TIME TIME_PER_CALL MFLOPS MBYTES/S %TIME'
1042 write(iunit,
'(2a)') &
1043 '===================================================================================================', &
1044 '=================|============================================================='
1046 profile_p => sphash_lookup(
profiling_map,
"COMPLETE_RUN", found)
1048 select type(complete_run => profile_p)
1053 safe_allocate(selftime(1:
prof_vars%last_profile))
1054 safe_allocate(position(1:
prof_vars%last_profile))
1061 call sort(selftime, position)
1064 prof =>
prof_vars%profile_list(position(ii))%p
1065 if (.not. prof%initialized)
then
1066 write(message(1),
'(a,i6,a)')
"Internal error: Profile number ", position(ii),
" is not initialized."
1067 call messages_fatal(1)
1069 if (prof%active)
then
1070 write(message(1),
'(a)')
"Internal error: Profile '" // trim(
profile_label(prof)) // &
1071 "' is active, i.e. profiling_out was not called."
1072 call messages_warning(1)
1077 write(iunit,
'(a,i14,3f16.6,2f10.1,f8.1,a,2f16.6,2f10.1,f8.1)') &
1094 call io_close(iunit)
1098 iunit = io_open(trim(filename), namespace, action=
'write', die=.false.)
1099 if (iunit == -1)
then
1100 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1101 call messages_warning(1)
1105 write(iunit,
'(2a)')
'schema: [num_calls, total_time, total_throughput, ', &
1106 'total_bandwidth, self_time, self_throughput, self_bandwidth]'
1107 write(iunit,
'(a)')
'data:'
1110 prof =>
prof_vars%profile_list(position(ii))%p
1112 write(iunit,
'(a,a,a,i6,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a)') &
1123 call io_close(iunit)
1126 safe_deallocate_a(selftime)
1127 safe_deallocate_a(position)
1131 iunit = io_open(trim(filename), namespace, action=
'write', die=.false.)
1132 if (iunit == -1)
then
1133 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1134 call messages_warning(1)
1138 write(iunit,
'(a40,a11,a11,a12)') &
1139 "Tree level, region ", &
1140 "% of total ",
"% of parent", &
1143 select type(complete_run => profile_p)
1146 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1147 repeat(
'-', 0) //
'| ', &
1149 repeat(
' ', 15-0-2), &
1155 write(iunit,
'(a)')
"// modeline for vim to enable folding (put in ~/.vimrc: set modeline modelineexpr)"
1156 write(iunit,
'(a)')
"// vim: fdm=expr fde=getline(v\:lnum)=~'.*\|.*'?len(split(getline(v\:lnum))[0])-1\:0"
1157 call io_close(iunit)
1165 integer,
intent(in) :: level
1166 real(real64),
intent(in) :: total_time
1167 integer,
intent(in) :: iunit
1169 integer :: ichild, width
1175 if (profile%has_child(ichild))
then
1178 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1179 repeat(
'-', level) //
'| ', &
1181 repeat(
' ', width-level-2), &
1182 profile%timings(ichild)/total_time * 100,
"% ", &
1183 profile%timings(ichild)/profile%total_time * 100,
"% ", &
1184 profile%timings(ichild)
1186 level+1, total_time, iunit)
1196 character(len=*),
intent(in) :: var
1197 character(len=*),
intent(in) :: file
1198 integer,
intent(in) :: line
1199 character(len=*),
intent(out) :: str
1201 integer :: ii, jj, nn
1206 if (var(jj:jj) ==
')')
then
1208 do ii = len(var)-1, 1, -1
1210 if (var(ii:ii) ==
')') nn = nn + 1
1211 if (var(ii:ii) ==
'(') nn = nn - 1
1215 message(1) =
"Internal Error in profiling_memory_log"
1216 call messages_fatal(1)
1219 ii = index(file,
'/', back=.
true.)+1
1221 write(str,
'(4a,i5,a)') var(1:jj),
"(", trim(file(ii:len(file))),
":", line,
")"
1229 character(len=*),
intent(in) :: type
1230 character(len=*),
intent(in) :: var
1231 character(len=*),
intent(in) :: file
1232 integer,
intent(in) :: line
1233 integer(int64),
intent(in) :: size
1235 character(len=256) :: str
1236 integer(int64) :: mem
1243 mem = loct_get_memory_usage()
1245 write(
prof_vars%mem_iunit,
'(f16.6,a16,3i16,a70)') loct_clock() -
prof_vars%start_time, &
1246 trim(type),
size,
prof_vars%total_memory, mem, trim(str)
1253 character(len=*),
intent(in) :: var
1254 character(len=*),
intent(in) :: file
1255 integer,
intent(in) :: line
1256 integer(int64),
intent(in) :: size_
1259 integer(int64) :: size
1260 character(len=256) :: str
1275 message(1) =
"Memory limit set in the input file was passed"
1276 call messages_fatal(1)
1289 if (str ==
prof_vars%large_vars(ii))
then
1290 if (
size >
prof_vars%large_vars_size(ii))
then
1307 if (
size >
prof_vars%large_vars_size(ii))
then
1324 character(len=*),
intent(in) :: var
1325 character(len=*),
intent(in) :: file
1326 integer,
intent(in) :: line
1327 integer(int64),
intent(in) :: size
1346 character(len=1),
intent(in) :: c_in
1347 character(len=1) :: c_out
1349 integer,
parameter :: difference=iachar(
'a')-iachar(
'A'), la=iachar(
'a'), lz=iachar(
'z')
1350 integer :: ascii_decimal
1353 ascii_decimal = iachar(c_in)
1354 if (ascii_decimal >= la .and. ascii_decimal <= lz)
then
1355 ascii_decimal = ascii_decimal - difference
1357 c_out = achar(ascii_decimal)
1361 pure function to_upper(string)
result(upper_string)
1362 character(len=*),
intent(in) :: string
1363 character(len=len(string)) :: upper_string
1368 do i = 1, len(string)
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
real(real64), parameter, public m_huge
real(real64), parameter, public m_zero
logical pure function, public not_in_openmp()
logical, public in_profiling_mode
Same for profiling mode.
integer(int64), save io_open_count
subroutine, public io_close(iunit, grp)
integer(int64) pure function, public io_get_close_count()
integer(int64), save io_close_count
subroutine, public io_mkdir(fname, namespace, parents)
integer(int64) pure function, public io_get_open_count()
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
subroutine, public messages_print_with_emphasis(msg, iunit, namespace)
character(len=512), private msg
subroutine, public messages_warning(no_lines, all_nodes, namespace)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
real(real64) function, public mpi_get_wtime()
. Returns an elapsed time on the calling processor.
type(mpi_grp_t), public mpi_world
subroutine profiling_count_tran_int(trf, type)
subroutine profiling_count_tran_int_8_l(trf, type)
subroutine, public profiling_end(namespace)
type(profile_vars_t), target, save, public prof_vars
real(real64) function profile_self_bandwidth(this)
real(real64) function profile_self_time(this)
pure character(len=1) function char_to_upper(c_in)
integer, parameter max_memory_vars
subroutine iprofiling_count_operations(ops)
integer, parameter, public profiling_memory
subroutine profiling_memory_log(type, var, file, line, size)
integer, parameter, public profiling_likwid
subroutine profiling_output(namespace)
Write profiling results of each node to profiling.NNN/profiling.nnn The format of each line is tag-la...
integer, parameter, public profiling_memory_full
subroutine profiling_count_tran_real_8(trf, type)
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
subroutine profiling_count_tran_real_8_l(trf, type)
subroutine profiling_count_tran_int_8(trf, type)
subroutine rprofiling_count_operations(ops)
integer, parameter, public profiling_io
real(real64) function profile_self_throughput(this)
subroutine profiling_count_tran_complex_8_l(trf, type)
subroutine profile_init(this, label)
Initialize a profile object and add it to the list.
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
real(real64) function profile_total_throughput(this)
subroutine, public profiling_init(namespace)
Create profiling subdirectory.
subroutine profiling_count_tran_real_4_l(trf, type)
subroutine dprofiling_count_operations(ops)
pure character(len=len(string)) function to_upper(string)
real(real64) function profile_total_time_per_call(this)
integer, parameter max_profiles
Max. number of tags.
subroutine profiling_count_tran_int_l(trf, type)
subroutine profiling_count_tran_complex_4_l(trf, type)
subroutine profiling_count_tran_real_4(trf, type)
type(sphash_t), save profiling_map
integer function profile_num_calls(this)
subroutine profiling_count_tran_type(trf, type)
real(real64) function profile_min_time(this)
subroutine, public profiling_memory_deallocate(var, file, line, size)
character(label_length) function profile_label(this)
real(real64) function profile_self_time_per_call(this)
subroutine, public profiling_memory_allocate(var, file, line, size_)
subroutine profiling_count_tran_type_l(trf, type)
subroutine profiling_make_position_str(var, file, line, str)
subroutine profiling_count_tran_complex_8(trf, type)
subroutine profiling_count_tran_complex_4(trf, type)
real(real64) function profile_total_bandwidth(this)
real(real64) function profile_total_time(this)
This module is intended to contain "only mathematical" functions and procedures.
This module implements a simple hash table for string valued keys and integer values using the C++ ST...
subroutine, public sphash_init(h)
Initialize a hash table h with size entries. Since we use separate chaining, the number of entries in...
subroutine, public sphash_insert(h, key, val, clone)
Insert a (key, val) pair into the hash table h. If clone=.true., the object will be copied.
subroutine, public sphash_end(h)
Free a hash table.
class(*) function, pointer, public sphash_lookup(h, key, found)
Look up a value in the hash table h. If found is present, it indicates if key could be found in the t...
integer pure function, public types_get_size(this)
subroutine get_output_dir()
recursive subroutine output_tree_level(profile, level, total_time, iunit)