88 integer,
parameter :: &
89 LABEL_LENGTH = 25, & !< Max. number of characters of tag label.
94 character(LABEL_LENGTH) :: label
99 float :: op_count_current
101 float :: op_count_child
102 float :: op_count_child_current
103 float :: tr_count_current
105 float :: tr_count_child
106 float :: tr_count_child_current
107 type(profile_t),
pointer :: parent
109 logical :: initialized = .false.
110 logical :: active = .false.
113 logical :: has_child(MAX_PROFILES)
114 float :: timings(max_profiles)
119 type(profile_t),
pointer :: p
147 integer,
parameter,
public :: &
162 integer :: last_profile
164 integer(i8) :: alloc_count
165 integer(i8) :: dealloc_count
167 integer(i8) :: memory_limit
168 integer(i8) :: total_memory
169 integer(i8) :: max_memory
170 character(len=256) :: max_memory_location
178 character(len=256) :: output_dir
179 character(len=6) :: file_number
183 logical :: output_yaml
184 logical :: output_tree
187 type(profile_vars_t),
target,
public :: prof_vars
193 type(profile_t),
save,
public :: C_PROFILING_COMPLETE_RUN
257 call parse_variable(namespace,
'ProfilingAllNodes', .false., prof_vars%all_nodes)
267 prof_vars%alloc_count = 0
268 prof_vars%dealloc_count = 0
270 prof_vars%total_memory = 0
271 prof_vars%max_memory = 0
272 prof_vars%max_memory_location =
''
275 prof_vars%large_vars_size(:) = 0
276 prof_vars%large_vars(:) =
''
288 prof_vars%memory_limit = int(ii, 8)*1024
295 prof_vars%mem_iunit =
io_open(trim(prof_vars%output_dir)//
'/memory.'//prof_vars%file_number, &
296 namespace, action=
'write')
297 write(prof_vars%mem_iunit,
'(5a16,a70)')
'Elapsed Time',
'Alloc/Dealloc',
'Size (words)',
'Prof Mem', &
298 'Sys Mem',
'Variable Name(Filename:Line)'
302 prof_vars%last_profile = 0
303 nullify(prof_vars%current%p)
307 call likwid_markerinit()
319 call parse_variable(namespace,
'ProfilingOutputYAML', .false., prof_vars%output_yaml)
331 call profiling_in(c_profiling_complete_run,
'COMPLETE_RUN')
342 write(prof_vars%file_number,
'(i6.6)')
mpi_world%rank
344 prof_vars%output_dir =
'profiling'
358 float,
parameter :: megabyte = cnst(1048576.0)
360 integer(i8) :: io_open_count_red, io_close_count_red
368 do ii = 1, prof_vars%last_profile
369 prof_vars%profile_list(ii)%p%initialized = .false.
374 write(
message(1),
'(a,i10)')
'Number of allocations = ', prof_vars%alloc_count
375 write(
message(2),
'(a,i10)')
'Number of deallocations = ', prof_vars%dealloc_count
376 write(
message(3),
'(a,f18.3,a)')
'Maximum total memory allocated = ', prof_vars%max_memory/megabyte,
' Mbytes'
377 write(
message(4),
'(2x,a,a)')
'at ', trim(prof_vars%max_memory_location)
381 message(2) =
'Largest variables allocated:'
384 write(
message(1),
'(i2,f18.3,2a)') ii, prof_vars%large_vars_size(ii)/megabyte,
' Mbytes ', trim(prof_vars%large_vars(ii))
390 if (prof_vars%alloc_count /= prof_vars%dealloc_count)
then
391 write(
message(1),
'(a,i10,a,i10,a)')
"Not all memory was deallocated: ", prof_vars%alloc_count, &
392 ' allocations and ', prof_vars%dealloc_count,
' deallocations'
395 if (prof_vars%total_memory > 0)
then
396 write(
message(1),
'(a,f18.3,a,f18.3,a)')
"Remaining allocated memory: ", prof_vars%total_memory/megabyte, &
397 ' Mbytes (out of maximum ', prof_vars%max_memory/megabyte,
' Mbytes)'
408 call likwid_markerclose()
420 write(
message(3),
'(a,i10)')
'Global number of file open = ', io_open_count_red
421 write(
message(4),
'(a,i10)')
'Global number of file close = ', io_close_count_red
433 type(
profile_t),
target,
intent(out) :: this
434 character(*),
intent(in) :: label
440 if(len(label) > label_length)
then
441 message(1) =
"Label " // trim(label) //
" is too long for the internal profiler"
449 this%entry_time = huge(this%entry_time)
451 this%op_count_current =
m_zero
453 this%op_count_child =
m_zero
454 this%tr_count_current =
m_zero
456 this%tr_count_child =
m_zero
457 this%active = .false.
459 this%has_child = .false.
468 prof_vars%last_profile = prof_vars%last_profile + 1
472 prof_vars%profile_list(prof_vars%last_profile)%p => this
473 this%index = prof_vars%last_profile
474 this%initialized = .
true.
477 do iprofile = 1, prof_vars%last_profile - 1
478 if (prof_vars%profile_list(iprofile)%p%label == this%label)
then
479 message(1) =
"Label "//label//
" used more than once."
504 type(
profile_t),
target,
intent(inout) :: this
505 character(*),
intent(in) :: label
506 logical,
optional,
intent(in) :: exclude
516 if (.not. this%initialized)
then
520 assert(.not. this%active)
527 if (
associated(prof_vars%current%p))
then
529 this%parent => prof_vars%current%p
530 this%parent%has_child(this%index) = .
true.
536 this%op_count_current =
m_zero
537 this%tr_count_current =
m_zero
538 this%op_count_child_current =
m_zero
539 this%tr_count_child_current =
m_zero
541 prof_vars%current%p => this
542 this%entry_time = now
548 call likwid_markerstartregion(trim(label))
566 float :: now, time_spent
573 assert(this%initialized)
575 this%active = .false.
582 time_spent = now - this%entry_time
583 this%total_time = this%total_time + time_spent
584 this%self_time = this%self_time + time_spent
585 this%count = this%count + 1
586 if (time_spent < this%min_time)
then
587 this%min_time = time_spent
590 this%op_count = this%op_count + this%op_count_current
591 this%tr_count = this%tr_count + this%tr_count_current
592 this%op_count_child = this%op_count_child + this%op_count_child_current
593 this%tr_count_child = this%tr_count_child + this%tr_count_child_current
595 if (
associated(this%parent))
then
597 this%parent%self_time = this%parent%self_time - time_spent
598 if (this%exclude) this%parent%total_time = this%parent%total_time - time_spent
601 this%parent%op_count_child_current = this%parent%op_count_child_current &
602 + this%op_count_current + this%op_count_child_current
603 this%parent%tr_count_child_current = this%parent%tr_count_child_current &
604 + this%tr_count_current + this%tr_count_child_current
606 this%parent%timings(this%index) = this%parent%timings(this%index) + time_spent
609 prof_vars%current%p => this%parent
612 nullify(prof_vars%current%p)
617 call likwid_markerstopregion(trim(this%label))
631 integer,
intent(in) :: ops
636 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + tofloat(ops)
643 real(4),
intent(in) :: ops
648 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + tofloat(ops)
655 real(r8),
intent(in) :: ops
660 prof_vars%current%p%op_count_current = prof_vars%current%p%op_count_current + ops
668 integer(i8),
intent(in) :: trf
669 integer,
intent(in) :: type
674 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
681 integer(i8),
intent(in) :: trf
682 integer(i8),
intent(in) :: type
687 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
694 integer(i8),
intent(in) :: trf
695 real(4),
intent(in) :: type
700 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
708 integer(i8),
intent(in) :: trf
709 real(r8),
intent(in) :: type
714 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
722 integer(i8),
intent(in) :: trf
723 complex(4),
intent(in) :: type
728 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
736 integer(i8),
intent(in) :: trf
737 complex(r8),
intent(in) :: type
742 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 16*tofloat(trf)
750 integer(i8),
intent(in) :: trf
751 type(
type_t),
intent(in) :: type
756 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + tofloat(trf)*
types_get_size(type)
764 integer,
intent(in) :: trf
765 integer,
intent(in) :: type
770 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
777 integer,
intent(in) :: trf
778 integer(i8),
intent(in) :: type
783 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
790 integer,
intent(in) :: trf
791 real(4),
intent(in) :: type
796 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 4*tofloat(trf)
804 integer,
intent(in) :: trf
805 real(r8),
intent(in) :: type
810 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
818 integer,
intent(in) :: trf
819 complex(4),
intent(in) :: type
824 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 8*tofloat(trf)
832 integer,
intent(in) :: trf
833 complex(r8),
intent(in) :: type
838 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + 16*tofloat(trf)
846 integer,
intent(in) :: trf
847 type(
type_t),
intent(in) :: type
852 prof_vars%current%p%tr_count_current = prof_vars%current%p%tr_count_current + tofloat(trf)*
types_get_size(type)
917 if (this%total_time > epsilon(this%total_time))
then
932 push_sub(profile_bandwidth)
934 if (this%total_time > epsilon(this%total_time))
then
940 pop_sub(profile_bandwidth)
948 push_sub(profile_throughput)
950 if (this%self_time > epsilon(this%self_time))
then
956 pop_sub(profile_throughput)
964 push_sub(profile_bandwidth)
966 if (this%self_time > epsilon(this%self_time))
then
972 pop_sub(profile_bandwidth)
1006 type(namespace_t),
intent(in) :: namespace
1012 character(len=256) :: filename
1013 float,
allocatable :: selftime(:)
1014 integer,
allocatable :: position(:)
1016 if (.not. in_profiling_mode)
return
1019 call mpi_world%barrier()
1021 if (.not.
prof_vars%all_nodes .and. .not. mpi_grp_is_root(mpi_world))
then
1027 iunit = io_open(trim(filename), namespace, action=
'write')
1029 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1030 call messages_warning(1)
1035 write(iunit,
'(2a)') &
1036 ' CUMULATIVE TIME ', &
1038 write(iunit,
'(2a)') &
1039 ' ----------------------------------------------------------', &
1040 '----------------|-------------------------------------------------------------'
1041 write(iunit,
'(2a)') &
1042 'TAG NUM_CALLS TOTAL_TIME TIME_PER_CALL MIN_TIME ', &
1043 ' MFLOPS MBYTES/S %TIME | TOTAL_TIME TIME_PER_CALL MFLOPS MBYTES/S %TIME'
1044 write(iunit,
'(2a)') &
1045 '===================================================================================================', &
1046 '=================|============================================================='
1050 safe_allocate(selftime(1:
prof_vars%last_profile))
1051 safe_allocate(position(1:
prof_vars%last_profile))
1058 call sort(selftime, position)
1061 prof =>
prof_vars%profile_list(position(ii))%p
1062 if (.not. prof%initialized)
then
1063 write(message(1),
'(a,i6,a)')
"Internal error: Profile number ", position(ii),
" is not initialized."
1064 call messages_fatal(1)
1066 if (prof%active)
then
1067 write(message(1),
'(a)')
"Internal error: Profile '" // trim(
profile_label(prof)) // &
1068 "' is active, i.e. profiling_out was not called."
1069 call messages_warning(1)
1074 write(iunit,
'(a,i14,3f16.6,2f10.1,f8.1,a,2f16.6,2f10.1,f8.1)') &
1091 call io_close(iunit)
1095 iunit = io_open(trim(filename), namespace, action=
'write')
1097 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1098 call messages_warning(1)
1102 write(iunit,
'(2a)')
'schema: [num_calls, total_time, total_throughput, ', &
1103 'total_bandwidth, self_time, self_throughput, self_bandwidth]'
1104 write(iunit,
'(a)')
'data:'
1107 prof =>
prof_vars%profile_list(position(ii))%p
1109 write(iunit,
'(a,a,a,i6,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a,e10.3,a)') &
1120 call io_close(iunit)
1123 safe_deallocate_a(selftime)
1124 safe_deallocate_a(position)
1128 iunit = io_open(trim(filename), namespace, action=
'write')
1130 message(1) =
'Failed to open file ' // trim(filename) //
' to write profiling results.'
1131 call messages_warning(1)
1135 write(iunit,
'(a40,a11,a11,a12)') &
1136 "Tree level, region ", &
1137 "% of total ",
"% of parent", &
1141 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1142 repeat(
'-', 0) //
'| ', &
1144 repeat(
' ', 15-0-2), &
1149 write(iunit,
'(a)')
"// modeline for vim to enable folding (put in ~/.vimrc: set modeline modelineexpr)"
1150 write(iunit,
'(a)')
"// vim: fdm=expr fde=getline(v\:lnum)=~'.*\|.*'?len(split(getline(v\:lnum))[0])-1\:0"
1151 call io_close(iunit)
1159 integer,
intent(in) :: level
1160 float,
intent(in) :: total_time
1161 integer,
intent(in) :: iunit
1163 integer :: ichild, width
1169 if (profile%has_child(ichild))
then
1172 write(iunit,
'(a,a25,a,f8.2,a,f8.2,a,f12.4)') &
1173 repeat(
'-', level) //
'| ', &
1175 repeat(
' ', width-level-2), &
1176 profile%timings(ichild)/total_time * 100,
"% ", &
1177 profile%timings(ichild)/profile%total_time * 100,
"% ", &
1178 profile%timings(ichild)
1180 level+1, total_time, iunit)
1190 character(len=*),
intent(in) :: var
1191 character(len=*),
intent(in) :: file
1192 integer,
intent(in) :: line
1193 character(len=*),
intent(out) :: str
1195 integer :: ii, jj, nn
1200 if (var(jj:jj) ==
')')
then
1202 do ii = len(var)-1, 1, -1
1204 if (var(ii:ii) ==
')') nn = nn + 1
1205 if (var(ii:ii) ==
'(') nn = nn - 1
1209 message(1) =
"Internal Error in profiling_memory_log"
1210 call messages_fatal(1)
1214 do while (file(ii:ii+2) ==
"../")
1217 write(str,
'(4a,i5,a)') var(1:jj),
"(", trim(file(ii:len(file))),
":", line,
")"
1225 character(len=*),
intent(in) :: type
1226 character(len=*),
intent(in) :: var
1227 character(len=*),
intent(in) :: file
1228 integer,
intent(in) :: line
1229 integer(i8),
intent(in) :: size
1231 character(len=256) :: str
1239 mem = loct_get_memory_usage()
1241 write(
prof_vars%mem_iunit,
'(f16.6,a16,3i16,a70)') loct_clock() -
prof_vars%start_time, &
1249 character(len=*),
intent(in) :: var
1250 character(len=*),
intent(in) :: file
1251 integer,
intent(in) :: line
1252 integer(i8),
intent(in) :: size_
1256 character(len=256) :: str
1271 message(1) =
"Memory limit set in the input file was passed"
1272 call messages_fatal(1)
1285 if (str ==
prof_vars%large_vars(ii))
then
1286 if (
size >
prof_vars%large_vars_size(ii))
then
1303 if (
size >
prof_vars%large_vars_size(ii))
then
1320 character(len=*),
intent(in) :: var
1321 character(len=*),
intent(in) :: file
1322 integer,
intent(in) :: line
1323 integer(i8),
intent(in) :: size
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
real(8), parameter, public m_huge
logical function, public not_in_openmp()
logical, public in_profiling_mode
Same for profiling mode.
real(8), parameter, public m_zero
subroutine, public io_close(iunit, grp)
integer(i8) pure function, public io_get_open_count()
integer(i8), save io_open_count
subroutine, public io_mkdir(fname, namespace, parents)
integer(i8) pure function, public io_get_close_count()
integer(i8), save io_close_count
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
subroutine, public messages_print_with_emphasis(msg, iunit, namespace)
character(len=512), private msg
subroutine, public push_sub(sub_name)
subroutine, public messages_warning(no_lines, all_nodes, namespace)
subroutine, public messages_info(no_lines, iunit, verbose_limit, stress, all_nodes, namespace)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
subroutine, public pop_sub(sub_name)
subroutine, public messages_input_error(namespace, var, details, row, column)
logical function mpi_grp_is_root(grp)
type(mpi_grp_t), public mpi_world
real(8) function profile_total_time_per_call(this)
subroutine, public profiling_in(this, label, exclude)
Increment in counter and save entry time.
subroutine, public profiling_output(namespace)
Write profiling results of each node to profiling.NNN/profiling.nnn The format of each line is tag-la...
subroutine profiling_count_tran_int(trf, type)
subroutine profiling_count_tran_int_8_l(trf, type)
subroutine, public profiling_end(namespace)
real(8) function profile_min_time(this)
type(profile_t), save, public c_profiling_complete_run
For the moment we will have the profiler objects here, but they should be moved to their respective m...
integer, parameter max_memory_vars
subroutine iprofiling_count_operations(ops)
integer, parameter, public profiling_memory
subroutine profiling_memory_log(type, var, file, line, size)
real(8) function profile_self_throughput(this)
integer, parameter, public profiling_likwid
real(8) function profile_self_time_per_call(this)
integer, parameter, public profiling_memory_full
real(8) function profile_self_time(this)
subroutine profiling_count_tran_real_8(trf, type)
subroutine, public profiling_out(this)
Increment out counter and sum up difference between entry and exit time.
subroutine profiling_count_tran_real_8_l(trf, type)
subroutine profiling_count_tran_int_8(trf, type)
subroutine rprofiling_count_operations(ops)
type(profile_vars_t), target, public prof_vars
integer, parameter, public profiling_io
subroutine profiling_count_tran_complex_8_l(trf, type)
real(8) function profile_self_bandwidth(this)
subroutine profile_init(this, label)
Initialize a profile object and add it to the list.
subroutine, public profiling_init(namespace)
Create profiling subdirectory.
subroutine profiling_count_tran_real_4_l(trf, type)
subroutine dprofiling_count_operations(ops)
integer, parameter max_profiles
Max. number of tags.
subroutine profiling_count_tran_int_l(trf, type)
subroutine profiling_count_tran_complex_4_l(trf, type)
subroutine profiling_count_tran_real_4(trf, type)
integer, parameter, public profiling_time
integer function profile_num_calls(this)
subroutine profiling_count_tran_type(trf, type)
subroutine, public profiling_memory_deallocate(var, file, line, size)
character(label_length) function profile_label(this)
subroutine, public profiling_memory_allocate(var, file, line, size_)
subroutine profiling_count_tran_type_l(trf, type)
subroutine profiling_make_position_str(var, file, line, str)
subroutine profiling_count_tran_complex_8(trf, type)
subroutine profiling_count_tran_complex_4(trf, type)
real(r8) function profile_total_throughput(this)
real(8) function profile_total_bandwidth(this)
logical function, public profile_is_initialized(this)
real(8) function profile_total_time(this)
This module is intended to contain "only mathematical" functions and procedures.
integer pure function, public types_get_size(this)
subroutine get_output_dir()
recursive subroutine output_tree_level(profile, level, total_time, iunit)