57 use,
intrinsic :: iso_fortran_env
62#if defined(HAVE_OPENMP)
88 integer,
public,
parameter :: &
92 integer,
public,
parameter :: &
96 integer,
parameter :: n_par_types = 4
97 character(len=11),
parameter :: par_types(0:n_par_types) = &
106 integer,
parameter :: MAX_INDEX = 5
117 integer,
public :: par_strategy
119 integer,
allocatable :: group_sizes(:)
120 integer,
allocatable,
public :: who_am_i(:)
121 type(MPI_Comm),
allocatable,
public :: group_comm(:)
122 type(MPI_Comm),
public :: dom_st_comm
123 type(MPI_Comm),
public :: st_kpt_comm
124 type(MPI_Comm),
public :: dom_st_kpt_comm
125 type(mpi_grp_t),
public :: intranode_grp
126 type(mpi_grp_t),
public :: internode_grp
130 logical :: have_slaves
132 type(MPI_Comm) :: full_comm
133 integer :: full_comm_rank
134 type(MPI_Comm),
public :: master_comm
135 integer :: master_comm_rank
136 type(MPI_Comm),
public :: slave_intercomm
138 logical :: reorder_ranks
144 type(mpi_grp_t) :: grp
146 integer,
allocatable,
public :: schedule(:, :)
153 type(multicomm_all_pairs_t),
intent(inout) :: apout
154 type(multicomm_all_pairs_t),
intent(in) :: apin
159 apout%rounds = apin%rounds
160 if (
allocated(apin%schedule))
then
161 safe_allocate(apout%schedule(1:
size(apin%schedule, 1), 1:
size(apin%schedule, 2)))
162 apout%schedule = apin%schedule
170 subroutine multicomm_init(mc, namespace, base_grp, mode_para, n_node, index_range, min_range)
171 type(multicomm_t),
intent(out) :: mc
172 type(namespace_t),
intent(in) :: namespace
173 type(mpi_grp_t),
intent(in) :: base_grp
174 type(calc_mode_par_t),
intent(in) :: mode_para
175 integer,
intent(in) :: n_node
176 integer(int64),
intent(inout) :: index_range(:)
177 integer,
intent(in) :: min_range(:)
179 integer :: ii, num_slaves, slave_level, ipar
180 integer :: parse(1:P_STRATEGY_MAX), default(1:P_STRATEGY_MAX)
181 integer :: parallel_mask, default_mask
186 parallel_mask = mode_para%parallel_mask()
187 default_mask = mode_para%default_parallel_mask()
201 call parse_variable(namespace,
'ReorderRanks', .false., mc%reorder_ranks)
206 do ipar = 1, p_strategy_max
208 if (
bitand(default_mask, ibset(0, ipar - 1)) /= 0)
then
209 default(ipar) = par_auto
304 do ipar = 1, p_strategy_max
305 if (parse(ipar) ==
par_no) parse(ipar) = 1
310 mc%have_slaves = .false.
313 safe_allocate(mc%group_sizes(1:p_strategy_max))
317 do ipar = 1, p_strategy_max
319 mc%group_sizes(ipar) = parse(ipar)
320 else if (parse(ipar) /= 1)
then
321 call messages_write(
'Ignoring specification for ' // par_types(ipar))
323 call messages_write(
'This parallelization strategy is not available.')
340 call parse_variable(namespace,
'ParallelizationNumberSlaves', 0, num_slaves)
344 mc%have_slaves = (num_slaves > 0)
346 if (mc%have_slaves)
then
351 do ii = 1, p_strategy_max
352 if (mc%group_sizes(ii) == 1) mc%par_strategy = ibclr(mc%par_strategy, ii - 1)
373 if (base_grp%size > 1)
then
377 do ipar = 1, p_strategy_max
378 if (parse(ipar) == par_auto .or. parse(ipar) > 1)
then
379 mc%par_strategy = ibset(mc%par_strategy, ipar - 1)
383 if (mc%par_strategy /=
bitand(mc%par_strategy, parallel_mask))
then
384 call messages_write(
'Parallelization strategies unavailable for this run mode are being discarded.')
388 mc%par_strategy =
bitand(mc%par_strategy, parallel_mask)
391 message(1) =
"More than one node is available, but this run mode cannot run with the requested parallelization."
392 message(2) =
"Please select a parallelization strategy compatible with"
394 do ii = 1, n_par_types
395 if (
bitand(parallel_mask, 2**(ii - 1)) /= 0)
then
397 write(
message(jj),
'(2a)')
" -> ", par_types(ii)
401 write(
message(jj),
'(a,i6)')
"mc%par_strategy is : ",mc%par_strategy
409#if defined(HAVE_OPENMP)
412 mc%nthreads = omp_get_num_threads()
418 message(1) =
"Info: Octopus will run in *serial*"
421 write(
message(1),
'(a)')
'Info: Octopus will run in *parallel*'
423 write(
message(3),
'(a, i8)')
' Number of processes :', base_grp%size
424 write(
message(4),
'(a, i8)')
' Number of threads per process :', mc%nthreads
435 integer :: ii, nn, kk, n_divisors, divisors(1:50)
436 integer(int64) :: n_group_max(1:p_strategy_max)
441 n_group_max(1:p_strategy_max) = max(index_range(1:p_strategy_max), 1)
442 do kk = 1, p_strategy_max
448 do kk = 1, p_strategy_max
458 do ipar = p_strategy_max, 1, -1
460 if (mc%group_sizes(ipar) == par_auto) cycle
462 if (mc%group_sizes(ipar) > n_group_max(ipar))
then
463 call messages_write(
'The number of processors specified for '//par_types(ipar)//
'(')
466 call messages_write(
'is larger than the degrees of freedom for that level (')
472 if (mod(nn, mc%group_sizes(ipar)) /= 0)
then
473 call messages_write(
'The number of processors specified for '//par_types(ipar)//
'(')
476 call messages_write(
'is not a divisor of the number of processors (')
482 nn = nn/mc%group_sizes(ipar)
487 do ipar = p_strategy_max, 1, -1
489 if (mc%group_sizes(ipar) /= par_auto) cycle
491 n_divisors = ubound(divisors, dim = 1)
494 mc%group_sizes(ipar) = nn
495 do ii = 2, n_divisors
496 if (divisors(ii) > n_group_max(ipar))
then
497 mc%group_sizes(ipar) = divisors(ii - 1)
502 nn = nn/mc%group_sizes(ipar)
515 integer(int64) :: jj, n_max
516 integer :: real_group_sizes(1:MAX_INDEX)
520 if (num_slaves > 0)
then
522 if (mc%group_sizes(slave_level) < num_slaves + 1)
then
523 message(1) =
'Too many nodes assigned to task parallelization.'
527 write(
message(1),
'(a,i6)')
'Info: Number of slaves nodes :', &
528 num_slaves*product(mc%group_sizes(1:slave_level - 1))
535 do kk = p_strategy_max, 1, -1
536 real_group_sizes(kk) = mc%group_sizes(kk)
539 if (kk == slave_level) real_group_sizes(kk) = real_group_sizes(kk) - num_slaves
540 write(
message(ii),
'(3a,i6,a,i12,a)')
'Info: Number of nodes in ', &
541 par_types(kk),
' group:', real_group_sizes(kk),
' (', index_range(kk),
')'
546 if (product(mc%group_sizes(1:p_strategy_max)) /= base_grp%size)
then
547 write(
message(1),
'(a)')
'Inconsistent number of processors:'
548 write(
message(2),
'(a,i6)')
' MPI processes = ', base_grp%size
549 write(
message(3),
'(a,i6)')
' Required processes = ', product(mc%group_sizes(1:p_strategy_max))
551 message(5) =
'You probably have a problem in the ParDomains, ParStates, ParKPoints or ParOther.'
555 if (any(real_group_sizes(1:p_strategy_max) > index_range(1:p_strategy_max)))
then
556 message(1) =
"Could not distribute nodes in parallel job. Most likely you are trying to"
557 message(2) =
"use too many nodes for the job."
561 if (any(index_range(1:p_strategy_max) / real_group_sizes(1:p_strategy_max) < min_range(1:p_strategy_max) .and. &
562 real_group_sizes(1:p_strategy_max) > 1))
then
563 message(1) =
"I have fewer elements in a parallel group than recommended."
564 message(2) =
"Maybe you should reduce the number of nodes."
570 do ii = 1, p_strategy_max
571 n_max = ceiling(real(index_range(ii), real64) / real(real_group_sizes(ii)), real64)
572 jj = n_max*real_group_sizes(ii)
573 frac = frac*(
m_one - real(jj - index_range(ii), real64) / real(jj, real64) )
576 write(
message(1),
'(a,f5.2,a)')
"Info: Octopus will waste at least ", &
577 (
m_one - frac)*100.0_real64,
"% of computer time."
578 if (frac < 0.8_real64)
then
579 message(2) =
"Usually a number of processors which is a multiple of small primes is best."
591 logical :: dim_mask(MAX_INDEX)
592 integer :: i_strategy, irank
593 logical :: reorder, periodic_mask(MAX_INDEX)
594 integer :: coords(MAX_INDEX)
595 type(mpi_comm) :: new_comm
596 integer :: new_comm_size
597 character(len=6) :: node_type
599 type(mpi_group) :: base_group, reorder_group
600 integer :: ranks(base_grp%size)
601 integer :: ii, jj, kk, ll, nn
602 type(mpi_comm) :: reorder_comm
607 mc%node_type = p_master
609 safe_allocate(mc%group_comm(1:p_strategy_max))
610 safe_allocate(mc%who_am_i(1:p_strategy_max))
613 mc%full_comm = mpi_comm_null
614 mc%slave_intercomm = mpi_comm_null
616 if (mc%reorder_ranks)
then
620 call mpi_comm_group(base_grp%comm, base_group,
mpi_err)
621 if (
mpi_err /= mpi_success)
then
622 message(1) =
"Error in getting MPI group!"
627 do ii = 1, mc%group_sizes(1)
628 do jj = 1, mc%group_sizes(2)
629 do kk = 1, mc%group_sizes(3)
630 do ll = 1, mc%group_sizes(4)
631 ranks(nn) = (ll-1)*mc%group_sizes(3)*mc%group_sizes(2)*mc%group_sizes(1) &
632 + (kk-1)*mc%group_sizes(2)*mc%group_sizes(1) &
633 + (jj-1)*mc%group_sizes(1) + ii - 1
639 call mpi_group_incl(base_group, base_grp%size, ranks, reorder_group,
mpi_err)
640 if (
mpi_err /= mpi_success)
then
641 message(1) =
"Error in creating MPI group!"
645 call mpi_comm_create(base_grp%comm, reorder_group, reorder_comm,
mpi_err)
646 if (
mpi_err /= mpi_success)
then
647 message(1) =
"Error in creating reordered communicator!"
660 periodic_mask = .false.
669 call mpi_cart_create(reorder_grp%comm, p_strategy_max, mc%group_sizes, periodic_mask, reorder, mc%full_comm,
mpi_err)
671 call mpi_comm_rank(mc%full_comm, mc%full_comm_rank,
mpi_err)
674 call mpi_cart_coords(mc%full_comm, mc%full_comm_rank, p_strategy_max, coords,
mpi_err)
677 if (coords(slave_level) >= mc%group_sizes(slave_level) - num_slaves)
then
681 if (mc%node_type == p_master)
then
682 mc%group_sizes(slave_level) = mc%group_sizes(slave_level) - num_slaves
684 mc%group_sizes(slave_level) = num_slaves
687 call mpi_comm_split(mc%full_comm, mc%node_type, mc%full_comm_rank, new_comm,
mpi_err)
688 assert(new_comm /= mpi_comm_null)
689 call mpi_comm_size(new_comm, new_comm_size,
mpi_err)
692 if (product(mc%group_sizes(:)) /= new_comm_size)
then
693 write(stderr,*)
'node ',
mpi_world%rank,
': mc%group_sizes = ', mc%group_sizes,
' new_comm_size = ', new_comm_size
695 assert(product(mc%group_sizes(:)) == new_comm_size)
697 call mpi_cart_create(new_comm, p_strategy_max, mc%group_sizes, periodic_mask, reorder, mc%master_comm,
mpi_err)
698 assert(mc%master_comm /= mpi_comm_null)
700 call mpi_comm_free(new_comm,
mpi_err)
702 call mpi_comm_rank(mc%master_comm, mc%master_comm_rank,
mpi_err)
706 do i_strategy = 1, p_strategy_max
708 dim_mask(i_strategy) = .
true.
709 call mpi_cart_sub(mc%master_comm, dim_mask, mc%group_comm(i_strategy),
mpi_err)
710 call mpi_comm_rank(mc%group_comm(i_strategy), mc%who_am_i(i_strategy),
mpi_err)
717 call mpi_cart_sub(mc%master_comm, dim_mask, mc%dom_st_comm,
mpi_err)
723 call mpi_cart_sub(mc%master_comm, dim_mask, mc%st_kpt_comm,
mpi_err)
730 call mpi_cart_sub(mc%master_comm, dim_mask, mc%dom_st_kpt_comm,
mpi_err)
737 mc%group_comm = base_grp%comm
739 mc%master_comm = base_grp%comm
740 mc%dom_st_comm = base_grp%comm
741 mc%st_kpt_comm = base_grp%comm
742 mc%dom_st_kpt_comm = base_grp%comm
749 write(
message(1),
'(a)')
'Debug: MPI Task Assignment to MPI Groups'
750 write(
message(2),
'(5a10)')
'World',
'Domains',
'States',
'K-Points',
'Other'
753 if (mc%node_type ==
p_slave)
then
785 integer :: remote_leader
787 integer :: coords(MAX_INDEX)
794 call mpi_cart_coords(mc%full_comm, mc%full_comm_rank, p_strategy_max, coords,
mpi_err)
797 if (mc%node_type ==
p_slave)
then
798 coords(slave_level) = 0
800 coords(slave_level) = mc%group_sizes(slave_level)
802 call mpi_cart_rank(mc%full_comm, coords, remote_leader,
mpi_err)
806 call mpi_intercomm_create(mc%group_comm(slave_level), 0, base_grp%comm, remote_leader, tag, mc%slave_intercomm,
mpi_err)
829 call mpi_comm_free(mc%group_comm(ii),
mpi_err)
831 call mpi_comm_free(mc%dom_st_comm,
mpi_err)
832 call mpi_comm_free(mc%st_kpt_comm,
mpi_err)
833 call mpi_comm_free(mc%dom_st_kpt_comm,
mpi_err)
834 call mpi_comm_free(mc%full_comm,
mpi_err)
835 call mpi_comm_free(mc%master_comm,
mpi_err)
842 safe_deallocate_a(mc%group_sizes)
843 safe_deallocate_a(mc%group_comm)
844 safe_deallocate_a(mc%who_am_i)
851 logical pure function multicomm_strategy_is_parallel(mc, level) result(rr)
853 integer,
intent(in) :: level
855 rr =
bitand(mc%par_strategy, 2**(level - 1)) /= 0
870 type(mpi_grp_t),
intent(in) :: mpi_grp
873 integer :: grp_size, rounds, ir, in
875 push_sub(create_all_pairs)
878 grp_size = mpi_grp%size
881 if (mod(grp_size, 2) == 0)
then
882 rounds = grp_size - 1
889 safe_allocate(ap%schedule(0:grp_size - 1, 1:rounds))
891 do in = 0, grp_size - 1
896 pop_sub(create_all_pairs)
903 integer,
intent(in) :: in, ir
907 if (mod(grp_size, 2) == 0)
then
917 integer,
intent(in) :: grp_size, ii, rr
927 elseif (ii == rr + 1)
then
932 pp = modulo(2 * rr - ii + 1, 2 * mm - 1) + 1
939 integer,
intent(in) :: grp_size, ii, rr
945 mm = (grp_size + 1) / 2
949 if (pp == 2 * mm - 1)
then
962 integer,
intent(in) :: nobjs
963 integer,
intent(in) :: nprocs
964 integer,
intent(out) :: istart(:)
965 integer,
intent(out) :: ifinal(:)
966 integer,
optional,
intent(out) :: lsize(:)
967 logical,
optional,
intent(in) :: scalapack_compat
969 integer :: ii, jj, rank
970 logical :: scalapack_compat_
973 scalapack_compat_ = optional_default(scalapack_compat, .false.)
974#ifndef HAVE_SCALAPACK
975 scalapack_compat_ = .false.
978 if (scalapack_compat_)
then
980 if (mod(nobjs, nprocs) /= 0) nbl = nbl + 1
985 size = numroc(nobjs, nbl, rank - 1, 0, nprocs)
988 if (rank > 1) istart(rank) = ifinal(rank - 1) + 1
989 ifinal(rank) = istart(rank) +
size - 1
997 if (nprocs <= nobjs)
then
1000 do rank = 0, nprocs - 1
1002 ii = nobjs - jj*nprocs
1003 if (ii > 0 .and. rank < ii)
then
1005 istart(rank + 1) = rank*jj + 1
1006 ifinal(rank + 1) = istart(rank + 1) + jj - 1
1008 ifinal(rank + 1) = nobjs - (nprocs - rank - 1)*jj
1009 istart(rank + 1) = ifinal(rank + 1) - jj + 1
1015 if (ii <= nobjs)
then
1026 if (
present(lsize))
then
1027 lsize(1:nprocs) = ifinal(1:nprocs) - istart(1:nprocs) + 1
1028 assert(sum(lsize(1:nprocs)) == nobjs)
1038 integer,
intent(in) :: nobjs
1039 integer,
intent(out) :: ini
1040 integer,
intent(out) :: nobjs_loc
1044 integer,
allocatable :: istart(:), ifinal(:), lsize(:)
1051 nthreads = omp_get_num_threads()
1052 allocate(istart(1:nthreads))
1053 allocate(ifinal(1:nthreads))
1054 allocate(lsize(1:nthreads))
1056 rank = 1 + omp_get_thread_num()
1058 nobjs_loc = lsize(rank)
1074 slave = this%node_type ==
p_slave
1082 have_slaves = this%have_slaves
integer pure function get_partner(in, ir)
Those are from the paper cited above.
subroutine group_comm_create()
integer pure function get_partner_even(grp_size, ii, rr)
subroutine create_slave_intercommunicators()
subroutine sanity_check()
check if a balanced distribution of nodes will be used
subroutine assign_nodes()
integer pure function get_partner_odd(grp_size, ii, rr)
This module contains interfaces for BLACS routines Interfaces are from http:
This module handles the calculation mode.
integer, parameter, public p_strategy_max
integer, parameter, public p_strategy_kpoints
parallelization in k-points
integer, parameter, public p_strategy_other
something else like e-h pairs
integer, parameter, public p_strategy_domains
parallelization in domains
integer, parameter, public p_strategy_serial
single domain, all states, k-points on a single processor
integer, parameter, public p_strategy_states
parallelization in states
type(debug_t), save, public debug
real(real64), parameter, public m_one
subroutine, public messages_print_with_emphasis(msg, iunit, namespace)
character(len=512), private msg
subroutine, public messages_warning(no_lines, all_nodes, namespace)
subroutine, public messages_obsolete_variable(namespace, name, rep)
subroutine, public messages_info(no_lines, iunit, verbose_limit, stress, all_nodes, namespace)
subroutine, public messages_new_line()
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
subroutine, public messages_experimental(name, namespace)
This module contains some common usage patterns of MPI routines.
subroutine, public create_intranode_communicator(base_grp, intranode_grp, internode_grp)
subroutine mpi_grp_copy(mpi_grp_out, mpi_grp_in)
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
type(mpi_grp_t), public mpi_world
subroutine mpi_grp_init(grp, comm)
Initialize MPI group instance.
integer, public mpi_err
used to store return values of mpi calls
This module handles the communicators for the various parallelization strategies.
logical pure function, public multicomm_strategy_is_parallel(mc, level)
integer, parameter, public p_slave
subroutine, public multicomm_divide_range(nobjs, nprocs, istart, ifinal, lsize, scalapack_compat)
Divide the range of numbers [1, nobjs] between nprocs processors.
subroutine, public multicomm_all_pairs_copy(apout, apin)
subroutine, public multicomm_end(mc)
integer, parameter, public par_no
logical pure function, public multicomm_have_slaves(this)
logical pure function, public multicomm_is_slave(this)
subroutine, public multicomm_init(mc, namespace, base_grp, mode_para, n_node, index_range, min_range)
create index and domain communicators
subroutine, public multicomm_create_all_pairs(mpi_grp, ap)
This routine uses the one-factorization (or near-one-factorization of a complete graph to construct a...
subroutine, public multicomm_divide_range_omp(nobjs, ini, nobjs_loc)
Function to divide the range of numbers from 1 to nobjs between all available threads with OpenMP.
This module is intended to contain simple general-purpose utility functions and procedures.
subroutine, public get_divisors(nn, n_divisors, divisors)
This is defined even when running serial.
An all-pairs communication schedule for a given group.
Stores all communicators and groups.