Octopus
partition.F90
Go to the documentation of this file.
1!! Copyright (C) 2013 M. Oliveira
2!! Copyright (C) 2021 S. Ohlmann
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module partition_oct_m
23 use debug_oct_m
24 use global_oct_m
25 use io_oct_m
28 use mpi_oct_m
31
32 implicit none
33
34 private
35
36 public :: &
50
51
71 !
72 type partition_t
73 private
74
75 ! The following components are the same for all processes:
76 type(mpi_grp_t) :: mpi_grp
77 integer(int64) :: np_global
78 integer :: npart = 1
79 integer, allocatable :: np_local_vec(:)
80 integer(int64), allocatable :: istart_vec(:)
81
82 ! The following components are process-dependent:
83 integer :: partno
84 integer :: np_local
85 integer(int64) :: istart
86 integer, allocatable :: part(:)
87 end type partition_t
88
92
93
94contains
95
96 !---------------------------------------------------------
98 !
99 subroutine partition_init(partition, np_global, mpi_grp)
100 type(partition_t), intent(out) :: partition
101 integer(int64), intent(in) :: np_global
102 type(mpi_grp_t), intent(in) :: mpi_grp
103
104 integer :: ipart
105 integer(int64) :: iend
106
107 push_sub(partition_init)
108
109 !Global variables
110 partition%mpi_grp = mpi_grp
111 partition%np_global = np_global
112 partition%npart = mpi_grp%size
113 partition%partno = mpi_grp%rank + 1
114
115 safe_allocate(partition%np_local_vec(1:partition%npart))
116 safe_allocate(partition%istart_vec(1:partition%npart))
117
118 ! use block data decomposition
119 do ipart = 1, partition%npart
120 ! according to Fortran 2008 standard, Sect. 7.1.9.3, point 4, the kind parameter of an
121 ! operation is the one with a larger decimal range, i.e., everything is promoted to i8
122 partition%istart_vec(ipart) = (ipart-1) * np_global/partition%npart + 1
123 iend = ipart * np_global/partition%npart
124 partition%np_local_vec(ipart) = i8_to_i4(iend - partition%istart_vec(ipart) + 1)
125 end do
126 partition%istart = partition%istart_vec(partition%partno)
127 partition%np_local = partition%np_local_vec(partition%partno)
128
129 !Allocate memory for the partition
130 safe_allocate(partition%part(1:partition%np_local))
131
132 pop_sub(partition_init)
133 end subroutine partition_init
134
135 ! ---------------------------------------------------------
136 subroutine partition_end(partition)
137 type(partition_t), intent(inout) :: partition
138
139 push_sub(partition_end)
140
141 safe_deallocate_a(partition%part)
142
143 pop_sub(partition_end)
144 end subroutine partition_end
145
146 ! ---------------------------------------------------------
147 subroutine partition_set(partition, part)
148 type(partition_t), intent(inout) :: partition
149 integer, intent(in) :: part(:)
150
151 push_sub(partition_set)
152
153 partition%part(1:partition%np_local) = part(1:partition%np_local)
154
155 pop_sub(partition_set)
156 end subroutine partition_set
157
158 ! ---------------------------------------------------------
163 !
164 subroutine partition_dump(partition, dir, filename, ierr)
165 type(partition_t), intent(in) :: partition
166 character(len=*), intent(in) :: dir
167 character(len=*), intent(in) :: filename
168 integer, intent(out) :: ierr
170 integer :: err
171 character(len=MAX_PATH_LEN) :: full_filename
174
175 ierr = 0
176 full_filename = trim(dir)//'/'//trim(filename)
178 ! Write the header (root only) and wait
179 if (mpi_grp_is_root(partition%mpi_grp)) then
180 call iwrite_header(full_filename, partition%np_global, err)
181 if (err /= 0) ierr = ierr + 1
182 end if
183 call partition%mpi_grp%bcast(ierr, 1, mpi_integer, 0)
184
185 assert(all(partition%part(:) > 0))
186
187 ! Each process writes a portion of the partition
188 if (ierr == 0) then
189 call mpi_debug_in(partition%mpi_grp%comm, c_mpi_file_write)
190 ! Only one rank per partition group should write the partition restart information
191 ! Otherwise, more than once is trying to be written data
192 if (mod(mpi_world%rank, mpi_world%size/partition%mpi_grp%size) == 0) then
193 call io_binary_write_parallel(full_filename, partition%mpi_grp%comm, partition%istart, &
194 partition%np_local, partition%part, err)
195 call mpi_debug_out(partition%mpi_grp%comm, c_mpi_file_write)
196 if (err /= 0) ierr = ierr + 2
197 end if
198 end if
199
200 pop_sub(partition_dump)
201 end subroutine partition_dump
202
203 ! ---------------------------------------------------------
208 !
209 subroutine partition_load(partition, dir, filename, ierr)
210 use iso_c_binding, only: c_sizeof
211 type(partition_t), intent(inout) :: partition
212 character(len=*), intent(in) :: dir
213 character(len=*), intent(in) :: filename
214 integer, intent(out) :: ierr
215
216 integer :: err
217 integer(int64) :: np, file_size
218 integer, allocatable :: scounts(:)
219 integer(int64), allocatable :: sdispls(:)
220 character(len=MAX_PATH_LEN) :: full_filename
221
222 push_sub(partition_load)
223
224 ierr = 0
225 full_filename = trim(dir)//'/'//trim(filename)
226
227 ! This is a writing to avoid an optimization of gfortran with -O3
228 write(message(1),'(a,i8)') "Info: number of points in the partition (in root process) =", size(partition%part)
230
231 ! Check if the file exists and has the proper size (only world root)
232 if (mpi_world%rank == 0) then
233 call io_binary_get_info(full_filename, np, file_size, err)
234 assert(np == partition%np_global)
235 end if
236
237 ! All nodes need to know the result
238 call mpi_world%bcast(err, 1, mpi_integer, 0)
239 call mpi_world%bcast(file_size, 1, mpi_integer8, 0)
241 if (err /= 0) then
242 ierr = ierr + 1
243 pop_sub(partition_load)
244 return
245 end if
246 ! The size of the file is not as big as np_global
247 if (file_size - 64 /= partition%np_global * c_sizeof(int(0))) then
248 ierr = ierr + 2
249 pop_sub(partition_load)
250 return
251 end if
252
253 ! Calculate displacements for reading
254 safe_allocate(scounts(1:partition%npart))
255 safe_allocate(sdispls(1:partition%npart))
256
257 scounts(:) = partition%np_local_vec(:)
258 sdispls(:) = partition%istart_vec(:) - 1
259
260 assert(sum(scounts(:)) == partition%np_global)
261
262#ifdef HAVE_MPI
263 call mpi_debug_in(partition%mpi_grp%comm, c_mpi_file_read)
264 call io_binary_read_parallel(full_filename, partition%mpi_grp%comm, partition%istart, &
265 partition%np_local, partition%part, err)
266 call mpi_debug_out(partition%mpi_grp%comm, c_mpi_file_read)
267 if (err /= 0) ierr = ierr + 4
268#endif
269
270 if (any(partition%part(:) <= 0)) then
271 write(message(1),'(a)') 'Internal error: some elements of partition are <= 0.'
272 write(message(2),*) 'filename = ', full_filename
273 write(message(3),*) 'scounts = ', scounts(:)
274 write(message(4),*) 'sdispls = ', sdispls(:)
275 call messages_fatal(6)
276 end if
277
278 safe_deallocate_a(scounts)
279 safe_deallocate_a(sdispls)
280
281 pop_sub(partition_load)
282 end subroutine partition_load
283
284 ! ---------------------------------------------------------
285 subroutine partition_get_local_size(partition, istart, np_local)
286 type(partition_t), intent(in) :: partition
287 integer(int64), intent(out) :: istart
288 integer, intent(out) :: np_local
289
291
292 istart = partition%istart
293 np_local = partition%np_local
294
296 end subroutine partition_get_local_size
297
298 ! ---------------------------------------------------------
301 subroutine partition_get_global(partition, part_global, root)
302 type(partition_t), intent(in) :: partition
303 integer, intent(out) :: part_global(:)
304 integer, optional, intent(in) :: root
305
306 integer(int64), allocatable :: rdispls(:)
307 integer, allocatable :: rcounts(:)
308
309 push_sub(partition_get_global)
310
311 safe_allocate(rdispls(1:partition%npart))
312 safe_allocate(rcounts(1:partition%npart))
313
314 rcounts(:) = partition%np_local_vec(:)
315 rdispls(:) = partition%istart_vec(:) - 1
316
317 assert(all(partition%part(1:partition%np_local) > 0))
318
319 if (present(root)) then
320 call partition%mpi_grp%gatherv(partition%part, partition%np_local, mpi_integer, &
321 part_global, rcounts, rdispls, mpi_integer, root)
322 else
323 call partition%mpi_grp%allgatherv(partition%part, partition%np_local, mpi_integer, &
324 part_global, rcounts, rdispls, mpi_integer)
325 end if
326
327 if (.not. present(root) .or. partition%mpi_grp%rank == 0) then
328 assert(all(part_global(:) > 0))
329 end if
330
331#ifndef HAVE_MPI
332 part_global = partition%part
333#endif
334
335 safe_deallocate_a(rdispls)
336 safe_deallocate_a(rcounts)
337
338 pop_sub(partition_get_global)
339 end subroutine partition_get_global
340
341 ! ---------------------------------------------------------
346 subroutine partition_get_partition_number_4(partition, np, points, partno)
347 type(partition_t), intent(in) :: partition
348 integer, intent(in) :: np
349 integer(int64), intent(in) :: points(:)
350 integer, intent(out) :: partno(:)
351
352 integer :: ip, nproc, rnp
353 integer(int64), allocatable :: sbuffer(:), rbuffer(:)
354 integer, allocatable :: scounts(:), rcounts(:)
355 integer, allocatable :: sdispls(:), rdispls(:)
356 integer, allocatable :: ipos(:), order(:)
357
359
360 safe_allocate(scounts(1:partition%npart))
361 safe_allocate(rcounts(1:partition%npart))
362 safe_allocate(sdispls(1:partition%npart))
363 safe_allocate(rdispls(1:partition%npart))
364
365 ! How many points will we have to send/receive from each process?
366 scounts = 0
367 do ip = 1, np
368 ! Who knows where points(ip) is stored?
369 nproc = partition_get_number(partition, points(ip))
370 ! We increase the respective counter
371 scounts(nproc) = scounts(nproc) + 1
372 end do
373
374
375 !Tell each process how many points we will need from it
376 call partition%mpi_grp%alltoall(scounts, 1, mpi_integer, &
377 rcounts, 1, mpi_integer)
379 !Build displacement arrays
380 sdispls(1) = 0
381 rdispls(1) = 0
382 do ip = 2, partition%npart
383 sdispls(ip) = sdispls(ip-1) + scounts(ip-1)
384 rdispls(ip) = rdispls(ip-1) + rcounts(ip-1)
385 end do
386
387
388 rnp = sum(rcounts)
389 safe_allocate(sbuffer(1:np))
390 safe_allocate(rbuffer(1:rnp))
391
392 !Put points in correct order for sending
393 safe_allocate(ipos(1:partition%npart))
394 safe_allocate(order(1:np))
395 ipos = 0
396 do ip = 1, np
397 ! Who knows where points(ip) is stored?
398 nproc = partition_get_number(partition, points(ip))
399
400 !We increase the respective counter
401 ipos(nproc) = ipos(nproc) + 1
402
403 !Put the point in the correct place
404 order(ip) = sdispls(nproc) + ipos(nproc)
405 sbuffer(order(ip)) = points(ip) ! global index of the point
406 end do
407 safe_deallocate_a(ipos)
408
409 !Send the global indices of the points to the process that knows what is the corresponding partition
410 call partition%mpi_grp%alltoallv(sbuffer, scounts, sdispls, mpi_integer8, &
411 rbuffer, rcounts, rdispls, mpi_integer8)
412
413 !We get the partition number from the global index. This will be send back.
414 do ip = 1, rnp
415 if (rbuffer(ip) == 0) cycle
416 rbuffer(ip) = partition%part(rbuffer(ip) - partition%istart + 1)
417 end do
418
419 !Now we send the information backwards
420 call partition%mpi_grp%alltoallv(rbuffer, rcounts, rdispls, mpi_integer8, &
421 sbuffer, scounts, sdispls, mpi_integer8)
422
423 !Reorder the points
424 do ip = 1, np
425 partno(ip) = i8_to_i4(sbuffer(order(ip)))
426 end do
427
428 !Deallocate memory
429 safe_deallocate_a(order)
430 safe_deallocate_a(sbuffer)
431 safe_deallocate_a(scounts)
432 safe_deallocate_a(sdispls)
433 safe_deallocate_a(rbuffer)
434 safe_deallocate_a(rcounts)
435 safe_deallocate_a(rdispls)
436
440 !---------------------------------------------------------
441 subroutine partition_get_partition_number_8(partition, np, points, partno)
442 type(partition_t), intent(in) :: partition
443 integer(int64), intent(in) :: np
444 integer(int64), intent(in) :: points(:)
445 integer, intent(out) :: partno(:)
446
447 integer(int64) :: rounds, offset, iround
448
450
451 rounds = np/huge(0_int32)
452 do iround = 1, rounds
453 offset = (iround - 1)*huge(0_int32) + 1
454 call partition_get_partition_number(partition, huge(0_int32), points(offset:offset+huge(0_int32)), &
455 partno(offset:offset+huge(0_int32)))
456 end do
457 offset = rounds*huge(0_int32) + 1
458 call partition_get_partition_number(partition, i8_to_i4(np-offset+1), points(offset:np), &
459 partno(offset:np))
460
463
464 !---------------------------------------------------------
467 subroutine partition_get_np_local(partition, np_local_vec)
468 type(partition_t), intent(in) :: partition
469 integer, contiguous, intent(inout) :: np_local_vec(:)
470
471 integer, allocatable :: np_local_vec_tmp(:)
472 integer :: ip
473
474 push_sub(partition_get_np_local)
475
476 assert(ubound(np_local_vec, 1) >= partition%npart)
477 assert(partition%npart > 0)
478 assert(all(partition%part(:) > 0))
479 safe_allocate(np_local_vec_tmp(1:partition%npart))
480 np_local_vec_tmp = 0
481
482 ! Calculate locally the local points of each partition
483 do ip = 1, partition%np_local
484 np_local_vec_tmp(partition%part(ip)) = np_local_vec_tmp(partition%part(ip)) + 1
485 end do
486
487 ! Collect all the local points
488 call partition%mpi_grp%allreduce(np_local_vec_tmp, np_local_vec, partition%npart, mpi_integer, mpi_sum)
489 safe_deallocate_a(np_local_vec_tmp)
490
492 end subroutine partition_get_np_local
493
494 !---------------------------------------------------------
496 pure integer function partition_get_npart(partition) result(npart)
497 type(partition_t), intent(in) :: partition
498 npart = partition%npart
499 end function partition_get_npart
500
501 !---------------------------------------------------------
503 pure integer function partition_get_part(partition, local_point) result(part)
504 type(partition_t), intent(in) :: partition
505 integer, intent(in) :: local_point
506 part = partition%part(local_point)
507 end function partition_get_part
508
509 !---------------------------------------------------------
512 pure integer function partition_get_number(partition, global_point) result(part)
513 type(partition_t), intent(in) :: partition
514 integer(int64), intent(in) :: global_point
515
516 if (global_point == 0) then
517 part = partition%partno
518 else
519 part = i8_to_i4((partition%npart*global_point - 1)/partition%np_global + 1)
520 end if
521 end function partition_get_number
522
523 !---------------------------------------------------------
527 subroutine partition_get_local(partition, rbuffer, np_local)
528 type(partition_t), intent(in) :: partition
529 integer(int64), contiguous, intent(inout) :: rbuffer(:)
530 integer, intent(out) :: np_local
531
532 integer :: ip, ipart
533 integer(int64) :: istart
534 integer(int64), allocatable :: sdispls(:), rdispls(:), sbuffer(:)
535 integer, allocatable :: scounts(:), rcounts(:)
536
537 push_sub(partition_get_local)
538
539 safe_allocate(sdispls(1:partition%npart))
540 safe_allocate(scounts(1:partition%npart))
541 safe_allocate(rcounts(1:partition%npart))
542
543 ! Calculate the starting point of the running process
544 istart = partition%istart - 1
545
546 scounts = 0
547 ! Count and store the local points for each partition
548 do ip = 1, partition%np_local
549 ipart = partition%part(ip)
550 scounts(ipart) = scounts(ipart) + 1
551 end do
552
553 ! Create displacements
554 sdispls(1) = 0
555 do ipart = 2, partition%npart
556 sdispls(ipart) = sdispls(ipart-1) + scounts(ipart-1)
557 end do
558
559 ! Allocate and fill the send buffer
560 np_local = sum(scounts)
561 scounts = 0
562 safe_allocate(sbuffer(1:np_local))
563 do ip = 1, np_local
564 ipart = partition%part(ip)
565 scounts(ipart) = scounts(ipart) + 1
566 sbuffer(sdispls(ipart) + scounts(ipart)) = ip + istart
567 end do
568
569 ! Tell each process how many points we will need from it
570 call partition%mpi_grp%alltoall(scounts, 1, mpi_integer, &
571 rcounts, 1, mpi_integer)
572
573 ! Create the displacement vector from the counts vector
574 np_local = sum(rcounts)
575 safe_allocate(rdispls(1:partition%npart))
576 assert(ubound(rbuffer, 1) >= np_local)
577
578 rdispls(1) = 0
579 do ipart = 2, partition%npart
580 rdispls(ipart) = rcounts(ipart-1) + rdispls(ipart-1)
581 end do
582
583 ! Collect the corresponding points to each process
584 call partition%mpi_grp%alltoallv(sbuffer, scounts, sdispls, mpi_integer8, &
585 rbuffer, rcounts, rdispls, mpi_integer8)
586
587 safe_deallocate_a(sdispls)
588 safe_deallocate_a(scounts)
589 safe_deallocate_a(sbuffer)
590 safe_deallocate_a(rcounts)
591 safe_deallocate_a(rdispls)
592
593 pop_sub(partition_get_local)
594 end subroutine partition_get_local
595end module partition_oct_m
597!! Local Variables:
598!! mode: f90
599!! coding: utf-8
600!! End:
subroutine, public io_binary_get_info(fname, np, file_size, ierr)
subroutine, public iwrite_header(fname, np_global, ierr)
Definition: io.F90:114
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:414
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:616
integer, parameter, public c_mpi_file_read
Definition: mpi_debug.F90:137
subroutine, public mpi_debug_in(comm, index)
Definition: mpi_debug.F90:241
subroutine, public mpi_debug_out(comm, index)
Definition: mpi_debug.F90:265
integer, parameter, public c_mpi_file_write
Definition: mpi_debug.F90:137
logical function mpi_grp_is_root(grp)
Is the current MPI process of grpcomm, root.
Definition: mpi.F90:434
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:270
subroutine, public partition_init(partition, np_global, mpi_grp)
initialize the partition table
Definition: partition.F90:193
pure integer function partition_get_number(partition, global_point)
Returns the partition number for a given global index If the index is zero, return local partition.
Definition: partition.F90:606
pure integer function, public partition_get_npart(partition)
Returns the total number of partitions.
Definition: partition.F90:590
subroutine, public partition_set(partition, part)
Definition: partition.F90:241
subroutine partition_get_partition_number_4(partition, np, points, partno)
Given a list of global indices, return the partition number where those points are stored....
Definition: partition.F90:440
subroutine, public partition_get_np_local(partition, np_local_vec)
Given the partition, returns the corresponding number of local points that each partition has.
Definition: partition.F90:561
pure integer function, public partition_get_part(partition, local_point)
Returns the partition of the local point.
Definition: partition.F90:597
subroutine partition_get_partition_number_8(partition, np, points, partno)
Definition: partition.F90:535
subroutine, public partition_dump(partition, dir, filename, ierr)
write the partition data
Definition: partition.F90:258
subroutine, public partition_get_local(partition, rbuffer, np_local)
Calculates the local vector of all partitions in parallel. Local vector stores the global point indic...
Definition: partition.F90:621
subroutine, public partition_end(partition)
Definition: partition.F90:230
subroutine, public partition_load(partition, dir, filename, ierr)
read the partition data
Definition: partition.F90:303
subroutine, public partition_get_local_size(partition, istart, np_local)
Definition: partition.F90:379
subroutine, public partition_get_global(partition, part_global, root)
Returns the global partition. If root is present, the partition is gathered only in that node....
Definition: partition.F90:395
The partition is an array that contains the mapping between some global index and a process,...
Definition: partition.F90:165