Octopus
par_vec.F90
Go to the documentation of this file.
1!! Copyright (C) 2005-2006 Florian Lorenzen, Heiko Appel, J. Alberdi-Rodriguez
2!! Copyright (C) 2021 Sebastian Ohlmann
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
78module par_vec_oct_m
79 use accel_oct_m
80 use debug_oct_m
81 use global_oct_m
82 use iihash_oct_m
83 use index_oct_m
84 use io_oct_m
86 use mpi_oct_m
89 use parser_oct_m
92 use sort_oct_m
93 use space_oct_m
95 use types_oct_m
96 use utils_oct_m
97
98 implicit none
99
100 private
101
102 public :: &
103 par_vec_t, &
104 par_vec_init, &
105 par_vec_end, &
111
116 !
117 type par_vec_t
118 ! Components are public by default
119
120 ! The content of these members is process-dependent.
121 integer :: rank
122 integer :: partno
123 integer, allocatable :: ghost_sendpos(:)
124
125 integer, allocatable :: ghost_rdispls(:)
126 integer, allocatable :: ghost_sdispls(:)
127 integer, allocatable :: ghost_rcounts(:)
128 integer, allocatable :: ghost_scounts(:)
129 integer :: ghost_scount
130 integer, allocatable :: ghost_sendmap(:)
131 integer, allocatable :: ghost_recvmap(:)
132 type(accel_mem_t) :: buff_sendmap
133 type(accel_mem_t) :: buff_recvmap
134
135 ! The following members are set independent of the processs.
136 type(mpi_grp_t) :: mpi_grp
137 integer :: npart
138 integer(int64) :: np_global
139
140 integer, allocatable :: np_local_vec(:)
142 integer :: np_local
144 integer(int64), allocatable, private :: xlocal_vec(:)
148 integer(int64) :: xlocal
150
151 integer(int64), allocatable, private :: local(:)
153 integer, allocatable :: recv_count(:)
154 integer, allocatable :: send_count(:)
156 integer, allocatable :: recv_disp(:)
157 integer, allocatable :: send_disp(:)
158 integer(int64), allocatable:: sendmap(:)
159 integer(int64), allocatable:: recvmap(:)
161 integer :: np_bndry
162
163 integer(int64), allocatable, private :: bndry(:)
164
165 type(lihash_t), private :: global
166
167 integer :: np_ghost
168 integer(int64), allocatable, private :: ghost(:)
169 end type par_vec_t
170
172 module procedure dpar_vec_scatter
173 module procedure zpar_vec_scatter
174 module procedure ipar_vec_scatter
175 end interface par_vec_scatter
176
177 interface par_vec_gather
178 module procedure dpar_vec_gather
179 module procedure zpar_vec_gather
180 module procedure ipar_vec_gather
181 end interface par_vec_gather
182
183 interface par_vec_allgather
184 module procedure dpar_vec_allgather
185 module procedure zpar_vec_allgather
186 module procedure ipar_vec_allgather
187 end interface par_vec_allgather
188
189contains
190
199 !
200 subroutine par_vec_init(mpi_grp, np_global, idx, stencil, space, partition, pv, namespace)
201 type(mpi_grp_t), intent(in) :: mpi_grp
202 ! The next seven entries come from the mesh.
203 integer(int64), intent(in) :: np_global
204 type(index_t), intent(in) :: idx
205 type(stencil_t), intent(in) :: stencil
206 class(space_t), intent(in) :: space
207 type(partition_t), intent(in) :: partition
208 type(par_vec_t), intent(inout) :: pv
209 type(namespace_t), intent(in) :: namespace
211 ! Careful: MPI counts process ranks from 0 to numproc-1.
212 ! Partition numbers from METIS range from 1 to numproc.
213 ! For this reason, all ranks are incremented by one.
214 integer :: npart
215 integer(int64) :: gip, index
216 integer :: ip, jp, jj, inode
217 integer :: p1(space%dim)
218 type(lihash_t) :: boundary
219 type(lihash_t) :: ghost
220 integer(int64), allocatable :: boundary_inv(:), ghost_inv(:)
221 integer :: iunit
222 character(len=6) :: filenum
223 logical :: found
225 integer :: idir, ipart, itmp
226 integer(int64) :: tmp
227 integer, allocatable :: points(:), part_ghost(:), part_ghost_tmp(:)
228 integer(int64), allocatable :: indices(:), ghost_tmp(:)
230 push_sub(par_vec_init)
232 ! Shortcuts.
233 npart = mpi_grp%size
234
235 ! Store partition number and rank for later reference.
236 ! Having both variables is a bit redundant but makes the code readable.
237 pv%rank = mpi_grp%rank
238 pv%partno = pv%rank + 1
239
240 call mpi_grp_copy(pv%mpi_grp, mpi_grp)
241 pv%np_global = np_global
242 pv%npart = npart
243
245 safe_allocate(pv%np_local_vec(1:npart))
246 safe_allocate(pv%xlocal_vec(1:npart))
247 safe_allocate(pv%ghost_rcounts(1:npart))
248 safe_allocate(pv%ghost_scounts(1:npart))
250 ! Count number of points for each process.
251 ! Local points.
252 call partition_get_np_local(partition, pv%np_local_vec)
253 pv%np_local = pv%np_local_vec(pv%partno)
255
256 ! Set up local-to-global index table for local points (xlocal_vec, local)
257 pv%xlocal_vec(1) = 1
258 ! Set the starting point of local and boundary points
259 do inode = 2, npart
260 pv%xlocal_vec(inode) = pv%xlocal_vec(inode - 1) + pv%np_local_vec(inode - 1)
261 end do
262 pv%xlocal = pv%xlocal_vec(pv%partno)
263
264 safe_allocate(pv%local(pv%xlocal:pv%xlocal + pv%np_local - 1))
265 ! Calculate the local vector in parallel
266 call partition_get_local(partition, pv%local(pv%xlocal:), itmp)
267
268 ! Create hash table.
269 call lihash_init(pv%global)
270 ! Insert local points.
271 do ip = 1, pv%np_local
272 call lihash_insert(pv%global, pv%local(pv%xlocal + ip - 1), ip)
273 end do
274
275 ! reorder points if needed
277
278 call lihash_init(boundary)
279 call lihash_init(ghost)
280 safe_allocate(boundary_inv(1:pv%np_local))
281 safe_allocate(ghost_inv(1:pv%np_local))
282 pv%np_ghost = 0
283 pv%np_bndry = 0
284 do gip = pv%xlocal, pv%xlocal + pv%np_local - 1
285 ! Get coordinates of current point.
286 call index_to_coords(idx, pv%local(gip), p1)
287 ! For all points in stencil.
288 do jj = 1, stencil%size
289 ! Get point number of possible ghost point.
290 index = index_from_coords(idx, p1(:) + stencil%points(:, jj))
291 assert(index /= 0)
292 ! check if this point is a local point
293 tmp = lihash_lookup(pv%global, index, found)
294 if (found) cycle
295 ! now check if the point is a potential ghost or boundary point
296 if (index > np_global) then
297 tmp = lihash_lookup(boundary, index, found)
298 if (found) cycle
299 pv%np_bndry = pv%np_bndry + 1
300 call lihash_insert(boundary, index, pv%np_bndry)
301 if (pv%np_bndry >= ubound(boundary_inv, 1)) call make_array_larger(boundary_inv, pv%np_bndry*2)
302 boundary_inv(pv%np_bndry) = index
303 else
304 tmp = lihash_lookup(ghost, index, found)
305 if (found) cycle
306 pv%np_ghost = pv%np_ghost + 1
307 call lihash_insert(ghost, index, pv%np_ghost)
308 if (pv%np_ghost >= ubound(ghost_inv, 1)) call make_array_larger(ghost_inv, pv%np_ghost*2)
309 ghost_inv(pv%np_ghost) = index
310 end if
311 end do
312 end do
313
314 safe_allocate(pv%bndry(1:pv%np_bndry))
315 do ip = 1, pv%np_bndry
316 pv%bndry(ip) = boundary_inv(ip)
317 end do
318
319 ! first get the temporary array of ghost points, will be later reorder by partition
320 safe_allocate(ghost_tmp(1:pv%np_ghost))
321 do ip = 1, pv%np_ghost
322 ghost_tmp(ip) = ghost_inv(ip)
323 end do
324 call lihash_end(ghost)
325 call lihash_end(boundary)
326 safe_deallocate_a(ghost_inv)
327 safe_deallocate_a(boundary_inv)
328
329 safe_allocate(part_ghost_tmp(1:pv%np_ghost))
330 call partition_get_partition_number(partition, pv%np_ghost, &
331 ghost_tmp, part_ghost_tmp)
332
333 ! determine parallel distribution (counts, displacements)
334 pv%ghost_rcounts(:) = 0
335 do ip = 1, pv%np_ghost
336 ipart = part_ghost_tmp(ip)
337 pv%ghost_rcounts(ipart) = pv%ghost_rcounts(ipart)+1
338 end do
339 assert(sum(pv%ghost_rcounts) == pv%np_ghost)
340
341 safe_allocate(pv%ghost_rdispls(1:pv%npart))
342 pv%ghost_rdispls(1) = 0
343 do ipart = 2, pv%npart
344 pv%ghost_rdispls(ipart) = pv%ghost_rdispls(ipart - 1) + pv%ghost_rcounts(ipart - 1)
345 end do
346
347 ! reorder points by partition
348 safe_allocate(pv%ghost(1:pv%np_ghost))
349 safe_allocate(part_ghost(1:pv%np_ghost))
350 safe_allocate(points(1:pv%npart))
351 points = 0
352 do ip = 1, pv%np_ghost
353 ipart = part_ghost_tmp(ip)
354 points(ipart) = points(ipart)+1
355 ! jp is the new index, sorted according to partitions
356 jp = pv%ghost_rdispls(ipart) + points(ipart)
357 pv%ghost(jp) = ghost_tmp(ip)
358 part_ghost(jp) = part_ghost_tmp(ip)
359 end do
360 safe_deallocate_a(points)
361 safe_deallocate_a(ghost_tmp)
362 safe_deallocate_a(part_ghost_tmp)
363
364 call pv%mpi_grp%alltoall(pv%ghost_rcounts, 1, mpi_integer, &
365 pv%ghost_scounts, 1, mpi_integer)
366
367 safe_allocate(pv%ghost_sdispls(1:pv%npart))
368
369 pv%ghost_sdispls(1) = 0
370 do ipart = 2, pv%npart
371 pv%ghost_sdispls(ipart) = pv%ghost_sdispls(ipart - 1) + pv%ghost_scounts(ipart - 1)
372 end do
373 pv%ghost_scount = sum(pv%ghost_scounts)
374
375 safe_allocate(pv%ghost_recvmap(1:pv%np_ghost))
376 safe_allocate(points(1:pv%npart))
377 points = 0
378 do ip = 1, pv%np_ghost
379 ipart = part_ghost(ip)
380 points(ipart) = points(ipart) + 1
381 pv%ghost_recvmap(ip) = pv%ghost_rdispls(ipart) + points(ipart)
382 end do
383 safe_deallocate_a(points)
384
385 safe_allocate(indices(1:pv%np_ghost))
386 do ip = 1, pv%np_ghost
387 indices(pv%ghost_recvmap(ip)) = pv%ghost(ip)
388 end do
389 safe_allocate(pv%ghost_sendmap(1:pv%ghost_scount))
390 safe_allocate(ghost_tmp(1:pv%ghost_scount))
391 call pv%mpi_grp%alltoallv(indices, pv%ghost_rcounts, pv%ghost_rdispls, mpi_integer8, &
392 ghost_tmp, pv%ghost_scounts, pv%ghost_sdispls, mpi_integer8)
393 do ip = 1, pv%ghost_scount
394 ! get local index
395 jp = lihash_lookup(pv%global, ghost_tmp(ip), found)
396 assert(found)
397 pv%ghost_sendmap(ip) = jp
398 end do
399 safe_deallocate_a(indices)
400 safe_deallocate_a(ghost_tmp)
401
402 if (accel_is_enabled()) then
403 ! copy maps to GPU
404 call accel_create_buffer(pv%buff_sendmap, accel_mem_read_only, type_integer, pv%ghost_scount)
405 call accel_write_buffer(pv%buff_sendmap, pv%ghost_scount, pv%ghost_sendmap)
406
407 call accel_create_buffer(pv%buff_recvmap, accel_mem_read_only, type_integer, pv%np_ghost)
408 call accel_write_buffer(pv%buff_recvmap, pv%np_ghost, pv%ghost_recvmap)
409 end if
410
411 if (debug%info) then
412 ! Write numbers and coordinates of each process` ghost points
413 ! to a single file (like in mesh_partition_init) called
414 ! debug/mesh_partition/ghost_points.###.
415 call io_mkdir('debug/mesh_partition', namespace, parents=.true.)
416
417 write(filenum, '(i6.6)') pv%partno
418 iunit = io_open('debug/mesh_partition/ghost_points.'//filenum, namespace, action='write')
419 do ip = 1, pv%np_ghost
420 call index_to_coords(idx, pv%ghost(ip), p1)
421 write(iunit, '(99i8)') pv%ghost(ip), (p1(idir), idir = 1, space%dim)
422 end do
423
424 call io_close(iunit)
425 end if
426
427 call init_mpi_alltoall()
428
429 ! Insert ghost points.
430 do ip = 1, pv%np_ghost
431 call lihash_insert(pv%global, pv%ghost(ip), ip + pv%np_local)
432 end do
433 ! Insert boundary points.
434 do ip = 1, pv%np_bndry
435 call lihash_insert(pv%global, pv%bndry(ip), ip + pv%np_local + pv%np_ghost)
436 end do
437
438 safe_deallocate_a(part_ghost)
439
440 pop_sub(par_vec_init)
441
442 contains
443 subroutine reorder_points()
444 integer :: ip, nn, direction
445 integer(int64) :: ipg
446 integer :: bsize(space%dim), order, point(space%dim), number_of_blocks(space%dim)
447 integer(int64), allocatable :: reorder_indices(:)
448 integer(int64), allocatable :: reordered(:)
449 integer, allocatable :: global_indices(:)
450 type(block_t) :: blk
451 logical :: increase_with_dimension
452 integer, parameter :: &
453 order_blocks = 1, &
454 order_cube = 3, &
455 order_global = 4
456
458
459 !%Variable MeshLocalOrder
460 !%Default blocks
461 !%Type integer
462 !%Section Execution::Optimization
463 !%Description
464 !% This variable controls how the grid points are mapped to a
465 !% linear array. This influences the performance of the code.
466 !%Option order_blocks 1
467 !% The grid is mapped using small parallelepipedic grids. The size
468 !% of the blocks is controlled by <tt>MeshBlockSize</tt>.
469 !%Option order_cube 3
470 !% The grid is mapped using a full cube, i.e. without blocking.
471 !%Option order_global 4
472 !% Use the ordering from the global mesh
473 !%End
474 call parse_variable(namespace, 'MeshLocalOrder', order_blocks, order)
475 ! no reordering in 1D necessary
476 if (space%dim == 1) then
477 order = order_global
478 end if
479
480 !%Variable MeshLocalBlockDirection
481 !%Type integer
482 !%Section Execution::Optimization
483 !%Description
484 !% Determines the direction in which the dimensions are chosen to compute
485 !% the blocked index for sorting the mesh points (see MeshLocalBlockSize).
486 !% The default is increase_with_dimensions, corresponding to xyz ordering
487 !% in 3D.
488 !%Option increase_with_dimension 1
489 !% The fastest changing index is in the first dimension, i.e., in 3D this
490 !% corresponds to ordering in xyz directions.
491 !%Option decrease_with_dimension 2
492 !% The fastest changing index is in the last dimension, i.e., in 3D this
493 !% corresponds to ordering in zyx directions.
494 !%End
495 call parse_variable(namespace, 'MeshLocalBlockDirection', 1, direction)
496 increase_with_dimension = direction == 1
497 if (direction /= 1 .and. direction /= 2) then
498 call messages_input_error(namespace, 'MeshLocalBlockDirection')
499 end if
500
501 select case (order)
502 case (order_global)
503 ! nothing to do, points are ordered along the global distribution by default
504 case (order_blocks, order_cube)
505 if (order == order_cube) then
506 bsize(1:space%dim) = idx%nr(2, 1:space%dim) - idx%nr(1, 1:space%dim) + 1
507 else
508 !%Variable MeshLocalBlockSize
509 !%Type block
510 !%Section Execution::Optimization
511 !%Description
512 !% To improve memory-access locality when calculating derivatives,
513 !% <tt>Octopus</tt> arranges mesh points in blocks. This variable
514 !% controls the size of this blocks in the different
515 !% directions. The default is selected according to the value of
516 !% the <tt>StatesBlockSize</tt> variable. (This variable only affects the
517 !% performance of <tt>Octopus</tt> and not the results.)
518 !%End
519 if (conf%target_states_block_size < 16) then
520 bsize(1) = 80 * 4 / abs(conf%target_states_block_size)
521 if (space%dim > 1) bsize(2) = 4
522 if (space%dim > 2) bsize(3:) = 10
523 else
524 bsize(1) = max(4 * 16 / abs(conf%target_states_block_size), 1)
525 if (space%dim > 1) bsize(2) = 15
526 if (space%dim > 2) bsize(3:) = 15
527 end if
528
529 if (parse_block(namespace, 'MeshLocalBlockSize', blk) == 0) then
530 nn = parse_block_cols(blk, 0)
531 if (nn /= space%dim) then
532 message(1) = "Error: number of entries in MeshLocalBlockSize must match the umber of dimensions."
533 call messages_fatal(1, namespace=namespace)
534 end if
535 do idir = 1, nn
536 call parse_block_integer(blk, 0, idir - 1, bsize(idir))
537 end do
538 end if
539 end if
540
541 number_of_blocks(1:space%dim) = idx%ll(1:space%dim)/bsize(1:space%dim) + 1
542
543 ! compute new indices locally
544 safe_allocate(reorder_indices(1:pv%np_local))
545 safe_allocate(global_indices(1:pv%np_local))
546 do ip = 1, pv%np_local
547 ipg = pv%local(ip + pv%xlocal - 1)
548 call index_spatial_to_point(idx, space%dim, idx%grid_to_spatial_global(ipg), point)
549 point = point + idx%offset + idx%enlarge
550 reorder_indices(ip) = get_blocked_index(space%dim, point, bsize, number_of_blocks, increase_with_dimension)
551 end do
552 ! sort the local array
553 call sort(reorder_indices, global_indices)
554 safe_deallocate_a(reorder_indices)
555
556 ! reorder according to new order
557 safe_allocate(reordered(1:pv%np_local))
558 do ip = 1, pv%np_local
559 reordered(ip) = pv%local(global_indices(ip) + pv%xlocal - 1)
560 end do
561 safe_deallocate_a(global_indices)
562
563 ! assign the reordered points
564 do ip = 1, pv%np_local
565 pv%local(ip + pv%xlocal - 1) = reordered(ip)
566 end do
567 safe_deallocate_a(reordered)
568
569 ! Recreate hash table.
570 call lihash_end(pv%global)
571 call lihash_init(pv%global)
572 ! Insert local points.
573 do ip = 1, pv%np_local
574 call lihash_insert(pv%global, pv%local(pv%xlocal + ip - 1), ip)
575 end do
576 end select
577
579 end subroutine reorder_points
580
581 subroutine init_mpi_alltoall()
582 integer, allocatable :: part_local(:)
583
585
586 safe_allocate(part_local(1:pv%np_local))
587 safe_allocate(indices(1:pv%np_local))
588 do ip = 1, pv%np_local
589 indices(ip) = pv%xlocal + ip - 1
590 end do
591 call partition_get_partition_number(partition, pv%np_local, &
592 indices, part_local)
593 safe_deallocate_a(indices)
594
595 safe_allocate(pv%send_count(1:npart))
596 safe_allocate(pv%recv_count(1:npart))
597 safe_allocate(pv%send_disp(1:npart))
598 safe_allocate(pv%recv_disp(1:npart))
599
600 pv%send_count = 0
601 do ip = 1, pv%np_local
602 ipart = part_local(ip)
603 pv%send_count(ipart) = pv%send_count(ipart) + 1
604 end do
605 pv%send_disp(1) = 0
606 do ipart = 2, npart
607 pv%send_disp(ipart) = pv%send_disp(ipart - 1) + pv%send_count(ipart - 1)
608 end do
609
610 call pv%mpi_grp%alltoall(pv%send_count, 1, mpi_integer, &
611 pv%recv_count, 1, mpi_integer)
612
613 pv%recv_disp(1) = 0
614 do ipart = 2, npart
615 pv%recv_disp(ipart) = pv%recv_disp(ipart - 1) + pv%recv_count(ipart - 1)
616 end do
617
618 ! create maps
619 safe_allocate(pv%sendmap(1:pv%np_local))
620 safe_allocate(points(1:pv%npart))
621 points = 0
622 do ip = 1, pv%np_local
623 ipart = part_local(ip)
624 points(ipart) = points(ipart) + 1
625 pv%sendmap(ip) = pv%send_disp(ipart) + points(ipart)
626 end do
627 safe_deallocate_a(points)
628
629 safe_allocate(indices(1:pv%np_local))
630 do ip = 1, pv%np_local
631 indices(pv%sendmap(ip)) = pv%xlocal + ip - 1
632 end do
633 safe_allocate(pv%recvmap(1:sum(pv%recv_count)))
634 call pv%mpi_grp%alltoallv(indices, pv%send_count, pv%send_disp, mpi_integer8, &
635 pv%recvmap, pv%recv_count, pv%recv_disp, mpi_integer8)
636 do ip = 1, sum(pv%recv_count)
637 ! get local index
638 index = lihash_lookup(pv%global, pv%recvmap(ip), found)
639 assert(found)
640 pv%recvmap(ip) = index
641 end do
642 safe_deallocate_a(indices)
643
645 end subroutine init_mpi_alltoall
646 end subroutine par_vec_init
647
648 ! ---------------------------------------------------------
650 subroutine par_vec_end(pv)
651 type(par_vec_t), intent(inout) :: pv
652
653 push_sub(par_vec_end)
654
655 safe_deallocate_a(pv%ghost_rdispls)
656 safe_deallocate_a(pv%ghost_sdispls)
657 safe_deallocate_a(pv%ghost_rcounts)
658 safe_deallocate_a(pv%ghost_scounts)
659 safe_deallocate_a(pv%ghost_sendpos)
660 safe_deallocate_a(pv%ghost_recvmap)
661 safe_deallocate_a(pv%ghost_sendmap)
662 safe_deallocate_a(pv%send_disp)
663 safe_deallocate_a(pv%recv_disp)
664 safe_deallocate_a(pv%np_local_vec)
665 safe_deallocate_a(pv%xlocal_vec)
666 safe_deallocate_a(pv%local)
667 safe_deallocate_a(pv%send_count)
668 safe_deallocate_a(pv%recv_count)
669 safe_deallocate_a(pv%sendmap)
670 safe_deallocate_a(pv%recvmap)
671 safe_deallocate_a(pv%bndry)
672 safe_deallocate_a(pv%ghost)
673
674 call lihash_end(pv%global)
675
676 if (accel_is_enabled()) then
677 call accel_release_buffer(pv%buff_recvmap)
678 call accel_release_buffer(pv%buff_sendmap)
679 end if
680
681 pop_sub(par_vec_end)
682 end subroutine par_vec_end
683
684
685 ! ---------------------------------------------------------
688 integer function par_vec_global2local(pv, ipg) result(ip)
689 type(par_vec_t), intent(in) :: pv
690 integer(int64), intent(in) :: ipg
691
692 integer :: nn
693 logical :: found
694
695! no push_sub because called too frequently
696
697 if (pv%npart > 1) then
698 ip = 0
699 nn = lihash_lookup(pv%global, ipg, found)
700 if (found) ip = nn
701 else
702 assert(ipg < huge(ip))
703 ip = int(ipg, int32)
704 end if
705 end function par_vec_global2local
706
707 ! ---------------------------------------------------------
709 integer(int64) function par_vec_local2global(pv, ip) result(ipg)
710 type(par_vec_t), intent(in) :: pv
711 integer, intent(in) :: ip
712
713! no push_sub because called too frequently
714
715 if (pv%npart == 1) then
716 ipg = ip
717 else
718 if (ip <= pv%np_local) then
719 ipg = pv%local(pv%xlocal + ip - 1)
720 else if (ip <= pv%np_local + pv%np_ghost) then
721 ipg = pv%ghost(ip - pv%np_local)
722 else if (ip <= pv%np_local + pv%np_ghost + pv%np_bndry) then
723 ipg = pv%bndry(ip - pv%np_local - pv%np_ghost)
724 else
725 ipg = 0_int64
726 end if
727 end if
728 end function par_vec_local2global
729
730 ! gather all local arrays into a global one on rank root
731 ! this gives the global mapping of the index in the partition to the global index
732 subroutine gather_local_vec(pv, root, local_vec)
733 type(par_vec_t), intent(in) :: pv
734 integer, intent(in) :: root
735 integer(int64), allocatable, intent(inout) :: local_vec(:)
736
737 integer(int64), allocatable :: xlocal_tmp(:)
738
739 push_sub(gather_local_vec)
740
741 safe_allocate(xlocal_tmp(1:pv%npart))
742 xlocal_tmp = pv%xlocal_vec - 1
743 ! Gather all the local vectors in a unique big one
744 call pv%mpi_grp%gatherv(pv%local(pv%xlocal:), pv%np_local, mpi_integer8, &
745 local_vec, pv%np_local_vec, xlocal_tmp, mpi_integer8, root)
746 safe_deallocate_a(xlocal_tmp)
747
748 pop_sub(gather_local_vec)
749 end subroutine gather_local_vec
750
751#include "undef.F90"
752#include "complex.F90"
753#include "par_vec_inc.F90"
754
755#include "undef.F90"
756#include "real.F90"
757#include "par_vec_inc.F90"
758
759#include "undef.F90"
760#include "integer.F90"
761#include "par_vec_inc.F90"
762
763end module par_vec_oct_m
764
765!! Local Variables:
766!! mode: f90
767!! coding: utf-8
768!! End:
This is the common interface to a sorting routine. It performs the shell algorithm,...
Definition: sort.F90:149
subroutine, public accel_release_buffer(this)
Definition: accel.F90:1246
pure logical function, public accel_is_enabled()
Definition: accel.F90:400
integer, parameter, public accel_mem_read_only
Definition: accel.F90:183
type(debug_t), save, public debug
Definition: debug.F90:154
type(conf_t), public conf
Global instance of Octopus configuration.
Definition: global.F90:177
This module implements a simple hash table for non-negative integer keys and integer values.
Definition: iihash.F90:125
integer function, public lihash_lookup(h, key, found)
Look up a value in the hash table h. If found is present, it indicates if key could be found in the t...
Definition: iihash.F90:334
subroutine, public lihash_insert(h, key, val)
Insert a (key, val) pair into the hash table h.
Definition: iihash.F90:308
subroutine, public lihash_end(h)
Free a hash table.
Definition: iihash.F90:286
subroutine, public lihash_init(h)
Initialize a hash table h.
Definition: iihash.F90:263
This module implements the index, used for the mesh points.
Definition: index.F90:122
integer(int64) function, public get_blocked_index(dim, point, bsize, number_of_blocks, increase_with_dimensions)
Definition: index.F90:497
subroutine, public index_spatial_to_point(idx, dim, ispatial, point)
Definition: index.F90:398
subroutine, public index_to_coords(idx, ip, ix)
Given a global point index, this function returns the set of integer coordinates of the point.
Definition: index.F90:297
integer(int64) function, public index_from_coords(idx, ix)
This function takes care of the boundary conditions for a given vector of integer coordinates it retu...
Definition: index.F90:256
Definition: io.F90:114
subroutine, public io_close(iunit, grp)
Definition: io.F90:468
subroutine, public io_mkdir(fname, namespace, parents)
Definition: io.F90:354
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
Definition: io.F90:395
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:420
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:723
subroutine mpi_grp_copy(mpi_grp_out, mpi_grp_in)
Definition: mpi.F90:403
Some general things and nomenclature:
Definition: par_vec.F90:171
subroutine, public par_vec_end(pv)
Deallocate memory used by pv.
Definition: par_vec.F90:744
integer function, public par_vec_global2local(pv, ipg)
Returns local number of global point ip on the local node If the result is zero, the point is not ava...
Definition: par_vec.F90:782
subroutine ipar_vec_scatter(pv, root, v_local, v)
Generally: Xpar_vec_gather and Xpar_vec_scatter only consider inner points. Xpar_vec_scatter_bndry ta...
Definition: par_vec.F90:1379
subroutine dpar_vec_allgather(pv, v, v_local)
Like Xpar_vec_gather but the result is gathered on all nodes, i. e. v has to be a properly allocated ...
Definition: par_vec.F90:1276
subroutine zpar_vec_scatter(pv, root, v_local, v)
Generally: Xpar_vec_gather and Xpar_vec_scatter only consider inner points. Xpar_vec_scatter_bndry ta...
Definition: par_vec.F90:923
subroutine dpar_vec_scatter(pv, root, v_local, v)
Generally: Xpar_vec_gather and Xpar_vec_scatter only consider inner points. Xpar_vec_scatter_bndry ta...
Definition: par_vec.F90:1151
integer(int64) function, public par_vec_local2global(pv, ip)
Returns global index of local point ip.
Definition: par_vec.F90:803
subroutine, public par_vec_init(mpi_grp, np_global, idx, stencil, space, partition, pv, namespace)
Initializes a par_vec_type object (parallel vector).
Definition: par_vec.F90:294
subroutine ipar_vec_gather(pv, root, v_local, v)
Reverse operation of Xpar_vec_scatter. All v_locals from the nodes are packed together into v on node...
Definition: par_vec.F90:1442
subroutine zpar_vec_gather(pv, root, v_local, v)
Reverse operation of Xpar_vec_scatter. All v_locals from the nodes are packed together into v on node...
Definition: par_vec.F90:986
subroutine gather_local_vec(pv, root, local_vec)
Definition: par_vec.F90:826
subroutine dpar_vec_gather(pv, root, v_local, v)
Reverse operation of Xpar_vec_scatter. All v_locals from the nodes are packed together into v on node...
Definition: par_vec.F90:1214
subroutine zpar_vec_allgather(pv, v, v_local)
Like Xpar_vec_gather but the result is gathered on all nodes, i. e. v has to be a properly allocated ...
Definition: par_vec.F90:1048
subroutine ipar_vec_allgather(pv, v, v_local)
Like Xpar_vec_gather but the result is gathered on all nodes, i. e. v has to be a properly allocated ...
Definition: par_vec.F90:1504
integer function, public parse_block(namespace, name, blk, check_varinfo_)
Definition: parser.F90:618
subroutine, public partition_get_np_local(partition, np_local_vec)
Given the partition, returns the corresponding number of local points that each partition has.
Definition: partition.F90:562
subroutine, public partition_get_local(partition, rbuffer, np_local)
Calculates the local vector of all partitions in parallel. Local vector stores the global point indic...
Definition: partition.F90:622
This module is intended to contain "only mathematical" functions and procedures.
Definition: sort.F90:117
This module defines stencils used in Octopus.
Definition: stencil.F90:135
type(type_t), public type_integer
Definition: types.F90:135
This module is intended to contain simple general-purpose utility functions and procedures.
Definition: utils.F90:118
subroutine, public make_array_larger(array, new_size)
Definition: utils.F90:611
subroutine init_mpi_alltoall()
Definition: par_vec.F90:675
subroutine reorder_points()
Definition: par_vec.F90:537
Parallel information.
Definition: par_vec.F90:210
int true(void)