30 use,
intrinsic :: iso_fortran_env
60 logical :: parallel_in_domains
61 type(mpi_grp_t) :: mpi_grp
63 integer :: rs_n_global(1:3)
64 integer :: fs_n_global(1:3)
67 integer :: rs_istart(1:3)
68 integer :: fs_istart(1:3)
69 integer :: center(1:3)
71 real(real64),
allocatable :: Lrs(:,:)
72 real(real64),
allocatable :: Lfs(:,:)
74 integer,
allocatable :: np_local(:)
75 integer,
allocatable :: xlocal(:)
76 integer,
allocatable :: local(:,:)
77 integer,
allocatable :: np_local_fs(:)
78 integer,
allocatable :: xlocal_fs(:)
79 integer,
allocatable :: local_fs(:,:)
82 type(fft_t),
allocatable :: fft
83 logical,
private :: has_cube_mapping = .false.
86 real(real64) :: spacing(3)
88 type(lattice_vectors_t),
allocatable :: latt
90 type(mesh_cube_map_t) :: cube_map
91 logical :: cube_map_present = .false.
100 integer :: start_xyz(1:3)
101 integer :: end_xyz(1:3)
107 subroutine cube_init(cube, nn, namespace, space, spacing, coord_system, fft_type, fft_library, dont_optimize, nn_out, &
108 mpi_grp, need_partition, tp_enlarge, blocksize)
109 type(cube_t),
intent(out) :: cube
110 integer,
intent(in) :: nn(:)
111 type(namespace_t),
intent(in) :: namespace
112 class(space_t),
intent(in) :: space
113 real(real64),
intent(in) :: spacing(:)
114 class(coordinate_system_t),
intent(in) :: coord_system
115 integer,
optional,
intent(in) :: fft_type
116 integer,
optional,
intent(in) :: fft_library
117 logical,
optional,
intent(in) :: dont_optimize
118 integer,
optional,
intent(out) :: nn_out(3)
120 type(mpi_grp_t),
optional,
intent(in) :: mpi_grp
121 logical,
optional,
intent(in) :: need_partition
122 real(real64),
optional,
intent(in) :: tp_enlarge(3)
125 integer,
optional,
intent(in) :: blocksize
128 type(MPI_Comm) :: comm
129 integer :: tmp_n(3), fft_type_, optimize_parity(3), fft_library_, nn3d(3)
130 integer :: effdim_fft, my_n(3), idir, idir2
131 logical :: optimize(3)
132 type(mpi_grp_t) :: mpi_grp_
133 real(real64) :: tp_enlarge_(3), lattice_vectors(3, 3)
134 type(space_t) :: cube_space
139 assert(space%dim <= 3)
141 nn3d(1:space%dim) = nn(1:space%dim)
142 nn3d(space%dim+1:3) = 1
144 cube%spacing(1:space%dim) = spacing(1:space%dim)
145 cube%spacing(space%dim+1:3) = -
m_one
149 if (
present(tp_enlarge)) tp_enlarge_(:)=tp_enlarge(:)
151 effdim_fft = min(3, space%dim)
154 if (
present(mpi_grp)) mpi_grp_ = mpi_grp
158 if (
present(fft_library))
then
159 fft_library_ = fft_library
166 write(
message(1),
'(a)')
'You have selected the PFFT for FFT, but it was not linked.'
177 if (
present(blocksize))
then
178 assert(
present(need_partition).and.need_partition)
185 cube%rs_n_global = nn3d
186 cube%fs_n_global = nn3d
187 cube%fs_n = cube%fs_n_global
191 cube%parallel_in_domains = (mpi_grp_%size > 1)
194 cube%rs_n_global = nn3d
195 cube%fs_n_global = nn3d
196 cube%rs_n = cube%rs_n_global
197 cube%fs_n = cube%fs_n_global
201 if (
present(nn_out)) nn_out(1:3) = nn(1:3)
203 safe_allocate(cube%fft)
206 optimize(1:3) = .false.
207 optimize_parity(1:3) = 0
208 optimize(space%periodic_dim + 1:effdim_fft) = .
true.
209 optimize_parity(space%periodic_dim + 1:effdim_fft) = 1
211 if (
present(dont_optimize))
then
212 if (dont_optimize) optimize = .false.
217 call fft_init(cube%fft, tmp_n, space%dim, fft_type_, fft_library_, optimize, optimize_parity, &
218 comm=comm, mpi_grp = mpi_grp_, use_aligned=.
true.)
219 if (
present(nn_out)) nn_out(1:3) = tmp_n(1:3)
221 call fft_get_dims(cube%fft, cube%rs_n_global, cube%fs_n_global, cube%rs_n, cube%fs_n, &
222 cube%rs_istart, cube%fs_istart)
224 if (
present(tp_enlarge))
then
231 call fft_get_dims(cube%fft, cube%rs_n_global, cube%fs_n_global, cube%rs_n, cube%fs_n, &
232 cube%rs_istart, cube%fs_istart)
237 if (.not.
allocated(cube%Lrs))
then
242 cube%center(1:3) = cube%rs_n_global(1:3)/2 + 1
248 if (
present(need_partition) .and. cube%parallel_in_domains)
then
249 cube%has_cube_mapping = need_partition
251 cube%has_cube_mapping = .false.
253 if (cube%has_cube_mapping)
then
259 select type (coord_system)
265 my_n(1:space%periodic_dim) = cube%rs_n_global(1:space%periodic_dim) + 1
266 my_n(space%periodic_dim + 1:space%dim) = cube%rs_n_global(space%periodic_dim + 1:space%dim)
269 do idir = 1, space%dim
270 do idir2 = 1, space%dim
271 lattice_vectors(idir2, idir) = cube%spacing(idir) * (my_n(idir) - 1) * coord_system%basis%vectors(idir2, idir)
274 do idir = space%dim + 1, 3
275 lattice_vectors(idir, idir) =
m_one
279 cube_space%periodic_dim = space%periodic_dim
280 safe_allocate(cube%latt)
283 message(1) =
"The cube only support affine coordinate systems."
292 type(
cube_t),
intent(inout) :: cube
296 if (
allocated(cube%fft))
then
298 safe_deallocate_a(cube%fft)
301 if (cube%has_cube_mapping)
then
302 safe_deallocate_a(cube%np_local)
303 safe_deallocate_a(cube%xlocal)
304 safe_deallocate_a(cube%local)
306 safe_deallocate_a(cube%np_local_fs)
307 safe_deallocate_a(cube%xlocal_fs)
308 safe_deallocate_a(cube%local_fs)
311 if (cube%cube_map_present)
then
315 safe_deallocate_a(cube%Lrs)
316 safe_deallocate_a(cube%Lfs)
318 safe_deallocate_a(cube%latt)
326 type(
cube_t),
intent(inout) :: cube
327 integer,
intent(in) :: fft_library
330 select case (fft_library)
335 cube%fft%nfft%set_defaults = .
true.
336 cube%fft%nfft%guru = .
true.
338 cube%fft%nfft%sigma = 1.1_real64
342 cube%fft%pnfft%set_defaults = .
true.
344 cube%fft%pnfft%sigma = 1.1_real64
356 type(
cube_t),
intent(inout) :: cube
357 real(real64),
intent(in) :: tp_enlarge(3)
358 real(real64),
intent(in) :: spacing(3)
359 integer,
intent(in) :: fft_library
362 integer :: ii, nn(3), maxn, idim
367 nn(1:3) = cube%fs_n_global(1:3)
370 safe_allocate(cube%Lrs(1:maxn, 1:3))
375 if (tp_enlarge(idim) >
m_one)
then
376 do ii = 2, nn(idim) - 1
377 cube%Lrs(ii, idim) = (ii - int(nn(idim)/2) -1) * spacing(idim)
379 cube%Lrs(1, idim) = (-int(nn(idim)/2)) * spacing(idim) * tp_enlarge(idim)
380 cube%Lrs(nn(idim), idim) = (int(nn(idim)/2)) * spacing(idim) * tp_enlarge(idim)
383 cube%Lrs(ii, idim) = (ii - int(nn(idim)/2) -1) * spacing(idim)
392 safe_allocate(cube%Lfs(1:maxn, 1:3))
396 temp =
m_two *
m_pi / (nn(idim) * spacing(idim))
404 cube%Lfs(ii, idim) = (ii - nn(idim)/2 - 1)*temp/tp_enlarge(idim)
406 cube%Lfs(ii, idim) =
pad_feq(ii,nn(idim), .
true.) * temp
420 type(
cube_t),
intent(in) :: cube
421 integer,
intent(in) :: ixyz(3)
422 integer,
intent(out) :: lxyz(3)
424 lxyz(1) = ixyz(1) - cube%rs_istart(1) + 1
425 lxyz(2) = ixyz(2) - cube%rs_istart(2) + 1
426 lxyz(3) = ixyz(3) - cube%rs_istart(3) + 1
427 is_here = lxyz(1) >= 1 .and. lxyz(1) <= cube%rs_n(1) .and. &
428 lxyz(2) >= 1 .and. lxyz(2) <= cube%rs_n(2) .and. &
429 lxyz(3) >= 1 .and. lxyz(3) <= cube%rs_n(3)
440 type(
cube_t),
intent(in) :: cube
442 if (
allocated(cube%fft))
then
443 fft_library = cube%fft%library
452 type(
cube_t),
intent(inout) :: cube
453 logical,
intent(in) :: fs
455 integer :: tmp_local(6), position, process, ix, iy, iz, index
456 integer,
allocatable :: local_sizes(:)
457 integer(int64) :: number_points
463 tmp_local(1) = cube%rs_istart(1)
464 tmp_local(2) = cube%rs_istart(2)
465 tmp_local(3) = cube%rs_istart(3)
466 tmp_local(4) = cube%rs_n(1)
467 tmp_local(5) = cube%rs_n(2)
468 tmp_local(6) = cube%rs_n(3)
470 if (cube%parallel_in_domains)
then
471 safe_allocate(local_sizes(1:6*cube%mpi_grp%size))
473 call cube%mpi_grp%allgather(tmp_local, 6, mpi_integer, local_sizes, 6, mpi_integer)
476 safe_allocate(local_sizes(1:6))
477 local_sizes = tmp_local
482 safe_allocate(cube%xlocal(1:cube%mpi_grp%size))
483 safe_allocate(cube%np_local(1:cube%mpi_grp%size))
485 number_points = cube%rs_n_global(1) * cube%rs_n_global(2)
486 number_points = number_points * cube%rs_n_global(3)
487 if (number_points >= huge(0))
then
488 message(1) =
"Error: too many points for the normal cube. Please try to use a distributed FFT."
491 safe_allocate(cube%local(1:cube%rs_n_global(1)*cube%rs_n_global(2)*cube%rs_n_global(3), 1:3))
494 do process = 1, cube%mpi_grp%size
495 position = ((process-1)*6)+1
496 if (position == 1)
then
498 cube%np_local(1) = local_sizes(4)*local_sizes(5)*local_sizes(6)
501 cube%xlocal(process) = cube%xlocal(process-1) + cube%np_local(process-1)
502 cube%np_local(process) = local_sizes(position+3)*local_sizes(position+4)*local_sizes(position+5)
507 do iz = local_sizes(position+2), local_sizes(position+2)+local_sizes(position+5)-1
508 do iy = local_sizes(position+1), local_sizes(position+1)+local_sizes(position+4)-1
509 do ix = local_sizes(position), local_sizes(position)+local_sizes(position+3)-1
510 cube%local(index, 1) = ix
511 cube%local(index, 2) = iy
512 cube%local(index, 3) = iz
523 tmp_local(1) = cube%fs_istart(1)
524 tmp_local(2) = cube%fs_istart(2)
525 tmp_local(3) = cube%fs_istart(3)
526 tmp_local(4) = cube%fs_n(1)
527 tmp_local(5) = cube%fs_n(2)
528 tmp_local(6) = cube%fs_n(3)
531 if (cube%parallel_in_domains)
then
533 call cube%mpi_grp%allgather(tmp_local, 6, mpi_integer, local_sizes, 6, mpi_integer)
536 local_sizes = tmp_local
541 safe_allocate(cube%xlocal_fs(1:cube%mpi_grp%size))
542 safe_allocate(cube%np_local_fs(1:cube%mpi_grp%size))
544 number_points = cube%fs_n_global(1) * cube%fs_n_global(2)
545 number_points = number_points * cube%fs_n_global(3)
546 if (number_points >= huge(0))
then
547 message(1) =
"Error: too many points for the normal cube. Please try to use a distributed FFT."
550 safe_allocate(cube%local_fs(1:cube%fs_n_global(1)*cube%fs_n_global(2)*cube%fs_n_global(3), 1:3))
553 do process = 1, cube%mpi_grp%size
554 position = ((process-1)*6)+1
555 if (position == 1)
then
556 cube%xlocal_fs(1) = 1
557 cube%np_local_fs(1) = local_sizes(4)*local_sizes(5)*local_sizes(6)
560 cube%xlocal_fs(process) = cube%xlocal_fs(process-1) + cube%np_local_fs(process-1)
561 cube%np_local_fs(process) = local_sizes(position+3)*local_sizes(position+4)*local_sizes(position+5)
566 do iz = local_sizes(position+2), local_sizes(position+2)+local_sizes(position+5)-1
567 do iy = local_sizes(position+1), local_sizes(position+1)+local_sizes(position+4)-1
568 do ix = local_sizes(position), local_sizes(position)+local_sizes(position+3)-1
569 cube%local_fs(index, 1) = ix
570 cube%local_fs(index, 2) = iy
571 cube%local_fs(index, 3) = iz
584 safe_deallocate_a(local_sizes)
592 integer pure function cube_point_to_process(xyz, part) result(process)
593 integer,
intent(in) :: xyz(1:3)
604 if (all(xyz >= part(proc)%start_xyz) .and. all(xyz <= part(proc)%end_xyz))
then
612 if (.not. found)
then
622 integer,
intent(in) :: rs_n_global(1:3)
623 integer,
intent(in) :: blocksize
624 integer,
intent(in) :: rank
625 integer,
intent(out) :: rs_n(1:3)
626 integer,
intent(out) :: rs_istart(1:3)
628 integer :: imin, imax
633 imin = min(blocksize * rank, rs_n_global(3))
634 imax = min(imin + blocksize, rs_n_global(3))
635 rs_istart(3) = 1 + imin
636 rs_n(3) = imax - imin
641 type(
cube_t),
intent(in) :: cube
644 integer :: tmp_local(6), position, process
645 integer,
allocatable :: local_sizes(:)
650 tmp_local(1) = cube%rs_istart(1)
651 tmp_local(2) = cube%rs_istart(2)
652 tmp_local(3) = cube%rs_istart(3)
653 tmp_local(4) = cube%rs_n(1)
654 tmp_local(5) = cube%rs_n(2)
655 tmp_local(6) = cube%rs_n(3)
657 if (cube%parallel_in_domains)
then
658 safe_allocate(local_sizes(1:6*cube%mpi_grp%size))
659 call cube%mpi_grp%allgather(tmp_local, 6, mpi_integer, local_sizes, 6, mpi_integer)
661 safe_allocate(local_sizes(1:6))
662 local_sizes = tmp_local
665 do process = 1, cube%mpi_grp%size
666 position = ((process-1)*6)+1
668 part(process)%start_xyz(1) = local_sizes(position)
669 part(process)%start_xyz(2) = local_sizes(position+1)
670 part(process)%start_xyz(3) = local_sizes(position+2)
671 part(process)%end_xyz(1) = local_sizes(position)+local_sizes(position+3)-1
672 part(process)%end_xyz(2) = local_sizes(position+1)+local_sizes(position+4)-1
673 part(process)%end_xyz(3) = local_sizes(position+2)+local_sizes(position+5)-1
682 type(
cube_t),
intent(in) :: cube
683 type(namespace_t),
intent(in) :: namespace
685 integer :: nn, ii, jj, kk
689 character(len=3) :: filenum
695 safe_allocate(part(1:cube%mpi_grp%size))
698 if (mpi_grp_is_root(mpi_world))
then
699 call io_mkdir(
'debug/cube_partition', namespace)
700 npart = cube%mpi_grp%size
705 write(filenum,
'(i3.3)') nn
707 iunit = io_open(
'debug/cube_partition/cube_partition.'//filenum, &
708 namespace, action=
'write')
709 do kk = 1, cube%rs_n_global(3)
710 do jj = 1, cube%rs_n_global(2)
711 do ii = 1, cube%rs_n_global(1)
716 write(iunit,
'(3i8)') ii, jj, kk
727 safe_deallocate_a(part)
730 call mpi_world%barrier()
737 type(
cube_t),
intent(inout) :: cube
738 class(mesh_t),
intent(in) :: mesh
742 call mesh_cube_map_init(cube%cube_map, mesh, mesh%np)
743 cube%cube_map_present = .
true.
subroutine cube_set_blocksize(rs_n_global, blocksize, rank, rs_n, rs_istart)
subroutine, public cube_init(cube, nn, namespace, space, spacing, coord_system, fft_type, fft_library, dont_optimize, nn_out, mpi_grp, need_partition, tp_enlarge, blocksize)
subroutine cube_do_mapping(cube, fs)
do the mapping between global and local points of the cube
subroutine, public cube_end(cube)
logical function, public cube_global2local(cube, ixyz, lxyz)
True if global coordinates belong to this process. On output lxyz contains the local coordinates.
subroutine cube_tp_fft_defaults(cube, fft_library)
integer function, public cube_getfftlibrary(cube)
Returns the FFT library of the cube. Possible values are FFTLIB_NONE, FFTLIB_FFTW,...
subroutine cube_init_coords(cube, tp_enlarge, spacing, fft_library)
subroutine, public cube_partition(cube, part)
integer pure function, public cube_point_to_process(xyz, part)
subroutine, public cube_init_cube_map(cube, mesh)
subroutine cube_partition_messages_debug(cube, namespace)
Fast Fourier Transform module. This module provides a single interface that works with different FFT ...
subroutine, public fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
integer, parameter, public fft_none
global constants
subroutine, public fft_end(this)
integer, public fft_default_lib
pure integer function, public pad_feq(ii, nn, mode)
convert between array index and G-vector
subroutine, public fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
integer, parameter, public fftlib_nfft
integer, parameter, public fftlib_none
integer, parameter, public fftlib_pnfft
integer, parameter, public fftlib_pfft
subroutine, public fft_init_stage1(this, namespace, XX, nn)
Some fft-libraries (only NFFT for the moment) need an additional precomputation stage that depends on...
real(real64), parameter, public m_two
real(real64), parameter, public m_zero
real(real64), parameter, public m_pi
some mathematical constants
real(real64), parameter, public m_one
subroutine, public mesh_cube_map_end(this)
This module defines the meshes, which are used in Octopus.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
type(mpi_grp_t), public mpi_world
subroutine mpi_grp_init(grp, comm)
Initialize MPI group instance.
integer, parameter, public nfft_pre_psi
The low level module to work with the PFFT library. http:
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
It is intended to be used within a vector.