Octopus
partition.F90
Go to the documentation of this file.
1!! Copyright (C) 2013 M. Oliveira
2!! Copyright (C) 2021 S. Ohlmann
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module partition_oct_m
23 use debug_oct_m
24 use global_oct_m
25 use io_oct_m
28 use mpi_oct_m
31
32 implicit none
33
34 private
35
36 public :: &
50
51
71 !
72 type partition_t
73 private
74
75 ! The following components are the same for all processes:
76 type(mpi_grp_t) :: mpi_grp
77 integer(int64) :: np_global
78 integer :: npart
79 integer, allocatable :: np_local_vec(:)
80 integer(int64), allocatable :: istart_vec(:)
81
82 ! The following components are process-dependent:
83 integer :: partno
84 integer :: np_local
85 integer(int64) :: istart
86 integer, allocatable :: part(:)
87
88 end type partition_t
89
93
94
95contains
96
97 !---------------------------------------------------------
99 !
100 subroutine partition_init(partition, np_global, mpi_grp)
101 type(partition_t), intent(out) :: partition
102 integer(int64), intent(in) :: np_global
103 type(mpi_grp_t), intent(in) :: mpi_grp
104
105 integer :: ipart
106 integer(int64) :: iend
107
108 push_sub(partition_init)
109
110 !Global variables
111 partition%mpi_grp = mpi_grp
112 partition%np_global = np_global
113 partition%npart = mpi_grp%size
114 partition%partno = mpi_grp%rank + 1
116 safe_allocate(partition%np_local_vec(1:partition%npart))
117 safe_allocate(partition%istart_vec(1:partition%npart))
118
119 ! use block data decomposition
120 do ipart = 1, partition%npart
121 ! according to Fortran 2008 standard, Sect. 7.1.9.3, point 4, the kind parameter of an
122 ! operation is the one with a larger decimal range, i.e., everything is promoted to i8
123 partition%istart_vec(ipart) = (ipart-1) * np_global/partition%npart + 1
124 iend = ipart * np_global/partition%npart
125 partition%np_local_vec(ipart) = i8_to_i4(iend - partition%istart_vec(ipart) + 1)
126 end do
127 partition%istart = partition%istart_vec(partition%partno)
128 partition%np_local = partition%np_local_vec(partition%partno)
129
130 !Allocate memory for the partition
131 safe_allocate(partition%part(1:partition%np_local))
132
133 pop_sub(partition_init)
134 end subroutine partition_init
135
136 ! ---------------------------------------------------------
137 subroutine partition_end(partition)
138 type(partition_t), intent(inout) :: partition
139
140 push_sub(partition_end)
141
142 safe_deallocate_a(partition%part)
143
144 pop_sub(partition_end)
145 end subroutine partition_end
146
147 ! ---------------------------------------------------------
148 subroutine partition_set(partition, part)
149 type(partition_t), intent(inout) :: partition
150 integer, intent(in) :: part(:)
151
152 push_sub(partition_set)
153
154 partition%part(1:partition%np_local) = part(1:partition%np_local)
155
156 pop_sub(partition_set)
157 end subroutine partition_set
158
159 ! ---------------------------------------------------------
164 !
165 subroutine partition_dump(partition, dir, filename, ierr)
166 type(partition_t), intent(in) :: partition
167 character(len=*), intent(in) :: dir
168 character(len=*), intent(in) :: filename
169 integer, intent(out) :: ierr
171 integer :: err
172 character(len=MAX_PATH_LEN) :: full_filename
174 push_sub(partition_dump)
175
176 ierr = 0
177 full_filename = trim(dir)//'/'//trim(filename)
179 ! Write the header (root only) and wait
180 if (mpi_grp_is_root(partition%mpi_grp)) then
181 call iwrite_header(full_filename, partition%np_global, err)
182 if (err /= 0) ierr = ierr + 1
183 end if
184 call partition%mpi_grp%bcast(ierr, 1, mpi_integer, 0)
185
186 assert(all(partition%part(:) > 0))
187
188 ! Each process writes a portion of the partition
189 if (ierr == 0) then
190 call mpi_debug_in(partition%mpi_grp%comm, c_mpi_file_write)
191 ! Only one rank per partition group should write the partition restart information
192 ! Otherwise, more than once is trying to be written data
193 if (mod(mpi_world%rank, mpi_world%size/partition%mpi_grp%size) == 0) then
194 call io_binary_write_parallel(full_filename, partition%mpi_grp%comm, partition%istart, &
195 partition%np_local, partition%part, err)
196 call mpi_debug_out(partition%mpi_grp%comm, c_mpi_file_write)
197 if (err /= 0) ierr = ierr + 2
198 end if
199 end if
200
201 pop_sub(partition_dump)
202 end subroutine partition_dump
203
204 ! ---------------------------------------------------------
209 !
210 subroutine partition_load(partition, dir, filename, ierr)
211 use iso_c_binding, only: c_sizeof
212 type(partition_t), intent(inout) :: partition
213 character(len=*), intent(in) :: dir
214 character(len=*), intent(in) :: filename
215 integer, intent(out) :: ierr
216
217 integer :: err
218 integer(int64) :: np, file_size
219 integer, allocatable :: scounts(:)
220 integer(int64), allocatable :: sdispls(:)
221 character(len=MAX_PATH_LEN) :: full_filename
222
223 push_sub(partition_load)
224
225 ierr = 0
226 full_filename = trim(dir)//'/'//trim(filename)
227
228 ! This is a writing to avoid an optimization of gfortran with -O3
229 write(message(1),'(a,i8)') "Info: number of points in the partition (in root process) =", size(partition%part)
231
232 ! Check if the file exists and has the proper size (only world root)
233 if (mpi_world%rank == 0) then
234 call io_binary_get_info(full_filename, np, file_size, err)
235 assert(np == partition%np_global)
236 end if
237
238 ! All nodes need to know the result
239 call mpi_world%bcast(err, 1, mpi_integer, 0)
240 call mpi_world%bcast(file_size, 1, mpi_integer8, 0)
242 if (err /= 0) then
243 ierr = ierr + 1
244 pop_sub(partition_load)
245 return
246 end if
247 ! The size of the file is not as big as np_global
248 if (file_size - 64 /= partition%np_global * c_sizeof(int(0))) then
249 ierr = ierr + 2
250 pop_sub(partition_load)
251 return
252 end if
253
254 ! Calculate displacements for reading
255 safe_allocate(scounts(1:partition%npart))
256 safe_allocate(sdispls(1:partition%npart))
257
258 scounts = partition%np_local_vec
259 sdispls = partition%istart_vec - 1
260
261 assert(sum(scounts(:)) == partition%np_global)
262
263#ifdef HAVE_MPI
264 call mpi_debug_in(partition%mpi_grp%comm, c_mpi_file_read)
265 call io_binary_read_parallel(full_filename, partition%mpi_grp%comm, partition%istart, &
266 partition%np_local, partition%part, err)
267 call mpi_debug_out(partition%mpi_grp%comm, c_mpi_file_read)
268 if (err /= 0) ierr = ierr + 4
269#endif
270
271 if (any(partition%part(:) <= 0)) then
272 write(message(1),'(a)') 'Internal error: some elements of partition are <= 0.'
273 write(message(2),*) 'filename = ', full_filename
274 write(message(3),*) 'scounts = ', scounts(:)
275 write(message(4),*) 'sdispls = ', sdispls(:)
276 call messages_fatal(6)
277 end if
278
279 safe_deallocate_a(scounts)
280 safe_deallocate_a(sdispls)
281
282 pop_sub(partition_load)
283 end subroutine partition_load
284
285 ! ---------------------------------------------------------
286 subroutine partition_get_local_size(partition, istart, np_local)
287 type(partition_t), intent(in) :: partition
288 integer(int64), intent(out) :: istart
289 integer, intent(out) :: np_local
290
292
293 istart = partition%istart
294 np_local = partition%np_local
295
297 end subroutine partition_get_local_size
298
299 ! ---------------------------------------------------------
302 subroutine partition_get_global(partition, part_global, root)
303 type(partition_t), intent(in) :: partition
304 integer, intent(out) :: part_global(:)
305 integer, optional, intent(in) :: root
306
307 integer(int64), allocatable :: rdispls(:)
308 integer, allocatable :: rcounts(:)
309
310 push_sub(partition_get_global)
311
312 safe_allocate(rdispls(1:partition%npart))
313 safe_allocate(rcounts(1:partition%npart))
314
315 rcounts = partition%np_local_vec
316 rdispls = partition%istart_vec - 1
317
318 assert(all(partition%part(1:partition%np_local) > 0))
319
320 if (present(root)) then
321 call partition%mpi_grp%gatherv(partition%part, partition%np_local, mpi_integer, &
322 part_global, rcounts, rdispls, mpi_integer, root)
323 else
324 call partition%mpi_grp%allgatherv(partition%part, partition%np_local, mpi_integer, &
325 part_global, rcounts, rdispls, mpi_integer)
326 end if
327
328 if (.not. present(root) .or. partition%mpi_grp%rank == 0) then
329 assert(all(part_global(:) > 0))
330 end if
331
332#ifndef HAVE_MPI
333 part_global = partition%part
334#endif
335
336 safe_deallocate_a(rdispls)
337 safe_deallocate_a(rcounts)
338
339 pop_sub(partition_get_global)
340 end subroutine partition_get_global
341
342 ! ---------------------------------------------------------
347 subroutine partition_get_partition_number_4(partition, np, points, partno)
348 type(partition_t), intent(in) :: partition
349 integer, intent(in) :: np
350 integer(int64), intent(in) :: points(:)
351 integer, intent(out) :: partno(:)
352
353 integer :: ip, nproc, rnp
354 integer(int64), allocatable :: sbuffer(:), rbuffer(:)
355 integer, allocatable :: scounts(:), rcounts(:)
356 integer, allocatable :: sdispls(:), rdispls(:)
357 integer, allocatable :: ipos(:), order(:)
358
360
361 safe_allocate(scounts(1:partition%npart))
362 safe_allocate(rcounts(1:partition%npart))
363 safe_allocate(sdispls(1:partition%npart))
364 safe_allocate(rdispls(1:partition%npart))
365
366 ! How many points will we have to send/receive from each process?
367 scounts = 0
368 do ip = 1, np
369 ! Who knows where points(ip) is stored?
370 nproc = partition_get_number(partition, points(ip))
371 ! We increase the respective counter
372 scounts(nproc) = scounts(nproc) + 1
373 end do
374
375
376 !Tell each process how many points we will need from it
377 call partition%mpi_grp%alltoall(scounts, 1, mpi_integer, &
378 rcounts, 1, mpi_integer)
380 !Build displacement arrays
381 sdispls(1) = 0
382 rdispls(1) = 0
383 do ip = 2, partition%npart
384 sdispls(ip) = sdispls(ip-1) + scounts(ip-1)
385 rdispls(ip) = rdispls(ip-1) + rcounts(ip-1)
386 end do
387
388
389 rnp = sum(rcounts)
390 safe_allocate(sbuffer(1:np))
391 safe_allocate(rbuffer(1:rnp))
392
393 !Put points in correct order for sending
394 safe_allocate(ipos(1:partition%npart))
395 safe_allocate(order(1:np))
396 ipos = 0
397 do ip = 1, np
398 ! Who knows where points(ip) is stored?
399 nproc = partition_get_number(partition, points(ip))
400
401 !We increase the respective counter
402 ipos(nproc) = ipos(nproc) + 1
403
404 !Put the point in the correct place
405 order(ip) = sdispls(nproc) + ipos(nproc)
406 sbuffer(order(ip)) = points(ip) ! global index of the point
407 end do
408 safe_deallocate_a(ipos)
409
410 !Send the global indices of the points to the process that knows what is the corresponding partition
411 call partition%mpi_grp%alltoallv(sbuffer, scounts, sdispls, mpi_integer8, &
412 rbuffer, rcounts, rdispls, mpi_integer8)
413
414 !We get the partition number from the global index. This will be send back.
415 do ip = 1, rnp
416 if (rbuffer(ip) == 0) cycle
417 rbuffer(ip) = partition%part(rbuffer(ip) - partition%istart + 1)
418 end do
419
420 !Now we send the information backwards
421 call partition%mpi_grp%alltoallv(rbuffer, rcounts, rdispls, mpi_integer8, &
422 sbuffer, scounts, sdispls, mpi_integer8)
423
424 !Reorder the points
425 do ip = 1, np
426 partno(ip) = i8_to_i4(sbuffer(order(ip)))
427 end do
428
429 !Deallocate memory
430 safe_deallocate_a(order)
431 safe_deallocate_a(sbuffer)
432 safe_deallocate_a(scounts)
433 safe_deallocate_a(sdispls)
434 safe_deallocate_a(rbuffer)
435 safe_deallocate_a(rcounts)
436 safe_deallocate_a(rdispls)
437
441 !---------------------------------------------------------
442 subroutine partition_get_partition_number_8(partition, np, points, partno)
443 type(partition_t), intent(in) :: partition
444 integer(int64), intent(in) :: np
445 integer(int64), intent(in) :: points(:)
446 integer, intent(out) :: partno(:)
447
448 integer(int64) :: rounds, offset, iround
449
451
452 rounds = np/huge(0_int32)
453 do iround = 1, rounds
454 offset = (iround - 1)*huge(0_int32) + 1
455 call partition_get_partition_number(partition, huge(0_int32), points(offset:offset+huge(0_int32)), &
456 partno(offset:offset+huge(0_int32)))
457 end do
458 offset = rounds*huge(0_int32) + 1
459 call partition_get_partition_number(partition, i8_to_i4(np-offset+1), points(offset:np), &
460 partno(offset:np))
461
464
465 !---------------------------------------------------------
468 subroutine partition_get_np_local(partition, np_local_vec)
469 type(partition_t), intent(in) :: partition
470 integer, contiguous, intent(inout) :: np_local_vec(:)
471
472 integer, allocatable :: np_local_vec_tmp(:)
473 integer :: ip
474
475 push_sub(partition_get_np_local)
476
477 assert(ubound(np_local_vec, 1) >= partition%npart)
478 assert(partition%npart > 0)
479 assert(all(partition%part(:) > 0))
480 safe_allocate(np_local_vec_tmp(1:partition%npart))
481 np_local_vec_tmp = 0
482
483 ! Calculate locally the local points of each partition
484 do ip = 1, partition%np_local
485 np_local_vec_tmp(partition%part(ip)) = np_local_vec_tmp(partition%part(ip)) + 1
486 end do
487
488 ! Collect all the local points
489 call partition%mpi_grp%allreduce(np_local_vec_tmp, np_local_vec, partition%npart, mpi_integer, mpi_sum)
490 safe_deallocate_a(np_local_vec_tmp)
491
493 end subroutine partition_get_np_local
494
495 !---------------------------------------------------------
497 pure integer function partition_get_npart(partition) result(npart)
498 type(partition_t), intent(in) :: partition
499 npart = partition%npart
500 end function partition_get_npart
501
502 !---------------------------------------------------------
504 pure integer function partition_get_part(partition, local_point) result(part)
505 type(partition_t), intent(in) :: partition
506 integer, intent(in) :: local_point
507 part = partition%part(local_point)
508 end function partition_get_part
509
510 !---------------------------------------------------------
513 pure integer function partition_get_number(partition, global_point) result(part)
514 type(partition_t), intent(in) :: partition
515 integer(int64), intent(in) :: global_point
516
517 if (global_point == 0) then
518 part = partition%partno
519 else
520 part = i8_to_i4((partition%npart*global_point - 1)/partition%np_global + 1)
521 end if
522 end function partition_get_number
523
524 !---------------------------------------------------------
528 subroutine partition_get_local(partition, rbuffer, np_local)
529 type(partition_t), intent(in) :: partition
530 integer(int64), contiguous, intent(inout) :: rbuffer(:)
531 integer, intent(out) :: np_local
532
533 integer :: ip, ipart
534 integer(int64) :: istart
535 integer(int64), allocatable :: sdispls(:), rdispls(:), sbuffer(:)
536 integer, allocatable :: scounts(:), rcounts(:)
537
538 push_sub(partition_get_local)
539
540 safe_allocate(sdispls(1:partition%npart))
541 safe_allocate(scounts(1:partition%npart))
542 safe_allocate(rcounts(1:partition%npart))
543
544 ! Calculate the starting point of the running process
545 istart = partition%istart - 1
546
547 scounts = 0
548 ! Count and store the local points for each partition
549 do ip = 1, partition%np_local
550 ipart = partition%part(ip)
551 scounts(ipart) = scounts(ipart) + 1
552 end do
553
554 ! Create displacements
555 sdispls(1) = 0
556 do ipart = 2, partition%npart
557 sdispls(ipart) = sdispls(ipart-1) + scounts(ipart-1)
558 end do
559
560 ! Allocate and fill the send buffer
561 np_local = sum(scounts)
562 scounts = 0
563 safe_allocate(sbuffer(1:np_local))
564 do ip = 1, np_local
565 ipart = partition%part(ip)
566 scounts(ipart) = scounts(ipart) + 1
567 sbuffer(sdispls(ipart) + scounts(ipart)) = ip + istart
568 end do
569
570 ! Tell each process how many points we will need from it
571 call partition%mpi_grp%alltoall(scounts, 1, mpi_integer, &
572 rcounts, 1, mpi_integer)
573
574 ! Create the displacement vector from the counts vector
575 np_local = sum(rcounts)
576 safe_allocate(rdispls(1:partition%npart))
577 assert(ubound(rbuffer, 1) >= np_local)
578
579 rdispls(1) = 0
580 do ipart = 2, partition%npart
581 rdispls(ipart) = rcounts(ipart-1) + rdispls(ipart-1)
582 end do
583
584 ! Collect the corresponding points to each process
585 call partition%mpi_grp%alltoallv(sbuffer, scounts, sdispls, mpi_integer8, &
586 rbuffer, rcounts, rdispls, mpi_integer8)
587
588 safe_deallocate_a(sdispls)
589 safe_deallocate_a(scounts)
590 safe_deallocate_a(sbuffer)
591 safe_deallocate_a(rcounts)
592 safe_deallocate_a(rdispls)
593
594 pop_sub(partition_get_local)
595 end subroutine partition_get_local
596end module partition_oct_m
598!! Local Variables:
599!! mode: f90
600!! coding: utf-8
601!! End:
subroutine, public io_binary_get_info(fname, np, file_size, ierr)
subroutine, public iwrite_header(fname, np_global, ierr)
Definition: io.F90:114
subroutine, public messages_info(no_lines, iunit, verbose_limit, stress, all_nodes, namespace)
Definition: messages.F90:624
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:420
integer, parameter, public c_mpi_file_read
Definition: mpi_debug.F90:137
subroutine, public mpi_debug_in(comm, index)
Definition: mpi_debug.F90:241
subroutine, public mpi_debug_out(comm, index)
Definition: mpi_debug.F90:265
integer, parameter, public c_mpi_file_write
Definition: mpi_debug.F90:137
logical function mpi_grp_is_root(grp)
Is the current MPI process of grpcomm, root.
Definition: mpi.F90:430
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:266
subroutine, public partition_init(partition, np_global, mpi_grp)
initialize the partition table
Definition: partition.F90:194
pure integer function partition_get_number(partition, global_point)
Returns the partition number for a given global index If the index is zero, return local partition.
Definition: partition.F90:607
pure integer function, public partition_get_npart(partition)
Returns the total number of partitions.
Definition: partition.F90:591
subroutine, public partition_set(partition, part)
Definition: partition.F90:242
subroutine partition_get_partition_number_4(partition, np, points, partno)
Given a list of global indices, return the partition number where those points are stored....
Definition: partition.F90:441
subroutine, public partition_get_np_local(partition, np_local_vec)
Given the partition, returns the corresponding number of local points that each partition has.
Definition: partition.F90:562
pure integer function, public partition_get_part(partition, local_point)
Returns the partition of the local point.
Definition: partition.F90:598
subroutine partition_get_partition_number_8(partition, np, points, partno)
Definition: partition.F90:536
subroutine, public partition_dump(partition, dir, filename, ierr)
write the partition data
Definition: partition.F90:259
subroutine, public partition_get_local(partition, rbuffer, np_local)
Calculates the local vector of all partitions in parallel. Local vector stores the global point indic...
Definition: partition.F90:622
subroutine, public partition_end(partition)
Definition: partition.F90:231
subroutine, public partition_load(partition, dir, filename, ierr)
read the partition data
Definition: partition.F90:304
subroutine, public partition_get_local_size(partition, istart, np_local)
Definition: partition.F90:380
subroutine, public partition_get_global(partition, part_global, root)
Returns the global partition. If root is present, the partition is gathered only in that node....
Definition: partition.F90:396
The partition is an array that contains the mapping between some global index and a process,...
Definition: partition.F90:165