23 use,
intrinsic :: iso_fortran_env
51 integer,
parameter :: SERIAL = 1
52 integer,
parameter :: WORLD = 2
53 integer,
parameter :: DOMAIN = 3
54 integer,
parameter :: N_CNF = 3
60 real(real64),
allocatable :: kernel(:, :, :)
61 integer :: nfft1, nfft2, nfft3
62 type(mpi_grp_t) :: mpi_grp
68 type(MPI_Comm) :: all_nodes_comm
69 type(isf_cnf_t) :: cnf(1:N_CNF)
72 integer,
parameter :: order_scaling_function = 8
78 subroutine poisson_isf_init(this, namespace, mesh, cube, all_nodes_comm, init_world)
79 type(poisson_isf_t),
intent(out) :: this
80 type(namespace_t),
target,
intent(in) :: namespace
81 type(mesh_t),
intent(in) :: mesh
82 type(cube_t),
intent(inout) :: cube
83 type(MPI_Comm),
intent(in) :: all_nodes_comm
84 logical,
optional,
intent(in) :: init_world
88 integer :: m1, m2, m3, md1, md2, md3
90 logical :: init_world_
91 integer :: default_nodes
93 integer,
allocatable :: ranks(:)
99 type(MPI_Group) :: world_grp, poisson_grp
105 if (
present(init_world)) init_world_ = init_world
107 if (.not. mesh%parallel_in_domains)
then
112 n1 = this%cnf(
serial)%nfft1/2 + 1
113 n2 = this%cnf(
serial)%nfft2/2 + 1
114 n3 = this%cnf(
serial)%nfft3/2 + 1
116 safe_allocate(this%cnf(
serial)%kernel(1:n1, 1:n2, 1:n3))
118 call build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
120 real(cube%spacing(1), real64), order_scaling_function, this%cnf(SERIAL)%kernel)
123#if !defined(HAVE_MPI)
132 this%cnf(
domain)%mpi_grp = mesh%mpi_grp
146 call parse_variable(namespace,
'PoissonSolverNodes', default_nodes, nodes)
148 this%all_nodes_comm = all_nodes_comm
151 call mpi_comm_size(all_nodes_comm, world_size, mpi_err)
154 if (nodes <= 0 .or. nodes > world_size) nodes = world_size
155 this%cnf(
world)%all_nodes = (nodes == mpi_world%size)
157 safe_allocate(ranks(1:nodes))
166 call mpi_comm_group(all_nodes_comm, world_grp, ierr)
167 call mpi_group_incl(world_grp, nodes, ranks, poisson_grp, ierr)
168 call mpi_comm_create(mpi_world%comm, poisson_grp, this%cnf(
world)%mpi_grp%comm, ierr)
171 safe_deallocate_a(ranks)
175 if (this%cnf(
world)%mpi_grp%comm /= mpi_comm_null)
then
176 call mpi_comm_rank(this%cnf(
world)%mpi_grp%comm, this%cnf(
world)%mpi_grp%rank, ierr)
177 call mpi_comm_size(this%cnf(
world)%mpi_grp%comm, this%cnf(
world)%mpi_grp%size, ierr)
179 this%cnf(
world)%mpi_grp%rank = -1
180 this%cnf(
world)%mpi_grp%size = -1
188 if ((i_cnf ==
world .and. .not. init_world_) &
189 .or. (i_cnf ==
domain .and. .not. mesh%parallel_in_domains) &
193 if (this%cnf(i_cnf)%mpi_grp%rank /= -1 .or. i_cnf /=
world)
then
195 m1, m2, m3, n1, n2, n3, md1, md2, md3, this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, &
196 this%cnf(i_cnf)%nfft3, this%cnf(i_cnf)%mpi_grp%size)
199 n(1) = this%cnf(i_cnf)%nfft1
200 n(2) = this%cnf(i_cnf)%nfft2
201 n(3) = this%cnf(i_cnf)%nfft3
203 safe_allocate(this%cnf(i_cnf)%kernel(1:n(1), 1:n(2), 1:n(3)/this%cnf(i_cnf)%mpi_grp%size))
205 call par_build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), n1, n2, n3, &
206 this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, this%cnf(i_cnf)%nfft3, &
207 cube%spacing(1), order_scaling_function, &
208 this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm, &
209 this%cnf(i_cnf)%kernel)
221 type(mesh_t),
intent(in) :: mesh
222 type(cube_t),
intent(in) :: cube
223 real(real64),
contiguous,
intent(out) :: pot(:)
224 real(real64),
contiguous,
intent(in) :: rho(:)
225 logical,
intent(in) :: all_nodes
226 type(submesh_t),
optional,
intent(in) :: sm
228 integer :: i_cnf, nn(1:3)
229 type(cube_function_t) :: rho_cf
231 integer(int64) :: number_points
236 call dcube_function_alloc_rs(cube, rho_cf)
238 if (
present(sm))
then
239 call dsubmesh_to_cube(sm, rho, cube, rho_cf)
241 call dmesh_to_cube(mesh, rho, cube, rho_cf)
250 else if (mesh%parallel_in_domains)
then
255#if !defined(HAVE_MPI)
261 nn(1) = this%cnf(
serial)%nfft1
262 nn(2) = this%cnf(
serial)%nfft2
263 nn(3) = this%cnf(
serial)%nfft3
264 call psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
265 nn(1), nn(2), nn(3), &
266 real(mesh%spacing(1), real64), this%cnf(
serial)%kernel, rho_cf%drs)
269 nn(1) = this%cnf(i_cnf)%nfft1
270 nn(2) = this%cnf(i_cnf)%nfft2
271 nn(3) = this%cnf(i_cnf)%nfft3
272 if (this%cnf(i_cnf)%mpi_grp%size /= -1 .or. i_cnf /=
world)
then
273 call par_psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
274 nn(1), nn(2), nn(3), &
275 real(mesh%spacing(1), real64), this%cnf(i_cnf)%kernel, rho_cf%drs, &
276 this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm)
280 if (i_cnf ==
world .and. .not. this%cnf(
world)%all_nodes)
then
283 number_points = cube%rs_n_global(1) * cube%rs_n_global(2)
284 number_points = number_points * cube%rs_n_global(3)
285 if (number_points >= huge(0))
then
286 message(1) =
"Error: too many points for the normal cube. Please try to use a distributed FFT."
287 call messages_fatal(1)
289 call mpi_bcast(rho_cf%drs(1, 1, 1), cube%rs_n_global(1)*cube%rs_n_global(2)*cube%rs_n_global(3), &
290 mpi_double_precision, 0, this%all_nodes_comm, mpi_err)
295 if (
present(sm))
then
296 call dcube_to_submesh(cube, rho_cf, sm, pot)
298 call dcube_to_mesh(cube, rho_cf, mesh, pot)
301 call dcube_function_free_rs(cube, rho_cf)
318 safe_deallocate_a(this%cnf(i_cnf)%kernel)
320 if (this%cnf(
world)%mpi_grp%comm /= mpi_comm_null)
then
321 call mpi_comm_free(this%cnf(
world)%mpi_grp%comm, mpi_err)
324 safe_deallocate_a(this%cnf(
serial)%kernel)
376 subroutine psolver_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, karray, rhopot)
377 integer,
intent(in) :: n01
378 integer,
intent(in) :: n02
379 integer,
intent(in) :: n03
380 integer,
intent(in) :: nfft1
381 integer,
intent(in) :: nfft2
382 integer,
intent(in) :: nfft3
383 real(real64),
intent(in) :: hgrid
384 real(real64),
intent(in) :: karray(nfft1/2 + 1,nfft2/2 + 1, nfft3/2 + 1)
385 real(real64),
intent(inout) :: rhopot(n01, n02, n03)
387 real(real64),
allocatable :: zarray(:,:,:)
388 real(real64) :: factor
389 integer :: n1, n2, n3, nd1, nd2, nd3, n1h, nd1h
390 integer :: inzee, i_sign
399 nd1 = n1 + modulo(n1+1,2)
400 nd2 = n2 + modulo(n2+1,2)
401 nd3 = n3 + modulo(n3+1,2)
404 safe_allocate(zarray(1:2, 1:nd1h*nd2*nd3, 1:2))
407 call zarray_in(n01,n02,n03,nd1h,nd2,nd3,rhopot,zarray)
413 call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee)
416 call kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee)
421 call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee)
425 factor = hgrid**3/(real(n1*n2, real64)*real(n3, real64))
428 call zarray_out(n01, n02, n03, nd1h, nd2, nd3, rhopot, zarray(1, 1, inzee), factor)
430 safe_deallocate_a(zarray)
461 subroutine kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee)
462 integer,
intent(in) :: n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,inzee
463 real(real64),
intent(in) :: karray(1:nfft1/2 + 1, 1:nfft2/2 + 1, 1:nfft3/2 + 1)
464 real(real64),
intent(inout) :: zarray(1:2, 1:nd1h, 1:nd2, 1:nd3, 1:2)
466 real(real64),
dimension(:),
allocatable :: cos_array,sin_array
467 real(real64) :: a,b,c,d,pi2,g1,cp,sp
468 real(real64) :: rfe,ife,rfo,ifo,rk,ik,rk2,ik2,re,ro,ie,io,rhk,ihk
469 integer :: i1,i2,i3,j1,j2,j3, ouzee,n1h,n2h,n3h
470 integer :: si1,si2,si3
478 safe_allocate(cos_array(1:n1h + 1))
479 safe_allocate(sin_array(1:n1h + 1))
481 pi2 = 8._real64*datan(1._real64)
482 pi2 = pi2/real(n1, real64)
484 cos_array(i1) = dcos(pi2*(i1-1))
485 sin_array(i1) = -dsin(pi2*(i1-1))
507 a = zarray(1,i1,i2,i3,inzee)
508 b = zarray(2,i1,i2,i3,inzee)
509 c = zarray(1,si1,si2,si3,inzee)
510 d = zarray(2,si1,si2,si3,inzee)
511 rfe = .5_real64*(a+c)
512 ife = .5_real64*(b-d)
513 rfo = .5_real64*(a-c)
514 ifo = .5_real64*(b+d)
517 rk = rfe+cp*ifo-sp*rfo
518 ik = ife-cp*rfo-sp*ifo
519 g1 = karray(i1,j2,j3)
523 zarray(1,1,i2,i3,ouzee) = rk2
524 zarray(2,1,i2,i3,ouzee) = ik2
530 a=zarray(1,i1,i2,i3,inzee)
531 b=zarray(2,i1,i2,i3,inzee)
532 c=zarray(1,si1,si2,si3,inzee)
533 d=zarray(2,si1,si2,si3,inzee)
546 zarray(1,i1,i2,i3,ouzee) = rk2
547 zarray(2,i1,i2,i3,ouzee) = ik2
554 a=zarray(1,1,i2,i3,inzee)
555 b=zarray(2,1,i2,i3,inzee)
556 c=zarray(1,si1,si2,si3,inzee)
557 d=zarray(2,si1,si2,si3,inzee)
570 zarray(1,i1,i2,i3,ouzee) = rk2
571 zarray(2,i1,i2,i3,ouzee) = ik2
576 j2=n2h+1-abs(n2h+1-i2)
582 a=zarray(1,i1,i2,i3,inzee)
583 b=zarray(2,i1,i2,i3,inzee)
584 c=zarray(1,si1,si2,si3,inzee)
585 d=zarray(2,si1,si2,si3,inzee)
598 zarray(1,1,i2,i3,ouzee) = rk2
599 zarray(2,1,i2,i3,ouzee) = ik2
605 a=zarray(1,i1,i2,i3,inzee)
606 b=zarray(2,i1,i2,i3,inzee)
607 c=zarray(1,si1,si2,si3,inzee)
608 d=zarray(2,si1,si2,si3,inzee)
621 zarray(1,i1,i2,i3,ouzee) = rk2
622 zarray(2,i1,i2,i3,ouzee) = ik2
629 a=zarray(1,1,i2,i3,inzee)
630 b=zarray(2,1,i2,i3,inzee)
631 c=zarray(1,si1,si2,si3,inzee)
632 d=zarray(2,si1,si2,si3,inzee)
645 zarray(1,i1,i2,i3,ouzee) = rk2
646 zarray(2,i1,i2,i3,ouzee) = ik2
652 j3=n3h+1-abs(n3h+1-i3)
663 a=zarray(1,i1,i2,i3,inzee)
664 b=zarray(2,i1,i2,i3,inzee)
665 c=zarray(1,si1,si2,si3,inzee)
666 d=zarray(2,si1,si2,si3,inzee)
679 zarray(1,1,i2,i3,ouzee) = rk2
680 zarray(2,1,i2,i3,ouzee) = ik2
686 a=zarray(1,i1,i2,i3,inzee)
687 b=zarray(2,i1,i2,i3,inzee)
688 c=zarray(1,si1,si2,si3,inzee)
689 d=zarray(2,si1,si2,si3,inzee)
702 zarray(1,i1,i2,i3,ouzee) = rk2
703 zarray(2,i1,i2,i3,ouzee) = ik2
710 a=zarray(1,1,i2,i3,inzee)
711 b=zarray(2,1,i2,i3,inzee)
712 c=zarray(1,si1,si2,si3,inzee)
713 d=zarray(2,si1,si2,si3,inzee)
726 zarray(1,i1,i2,i3,ouzee) = rk2
727 zarray(2,i1,i2,i3,ouzee) = ik2
732 j2=n2h+1-abs(n2h+1-i2)
738 a=zarray(1,i1,i2,i3,inzee)
739 b=zarray(2,i1,i2,i3,inzee)
740 c=zarray(1,si1,si2,si3,inzee)
741 d=zarray(2,si1,si2,si3,inzee)
754 zarray(1,1,i2,i3,ouzee) = rk2
755 zarray(2,1,i2,i3,ouzee) = ik2
761 a=zarray(1,i1,i2,i3,inzee)
762 b=zarray(2,i1,i2,i3,inzee)
763 c=zarray(1,si1,si2,si3,inzee)
764 d=zarray(2,si1,si2,si3,inzee)
777 zarray(1,i1,i2,i3,ouzee) = rk2
778 zarray(2,i1,i2,i3,ouzee) = ik2
785 a=zarray(1,1,i2,i3,inzee)
786 b=zarray(2,1,i2,i3,inzee)
787 c=zarray(1,si1,si2,si3,inzee)
788 d=zarray(2,si1,si2,si3,inzee)
801 zarray(1,i1,i2,i3,ouzee) = rk2
802 zarray(2,i1,i2,i3,ouzee) = ik2
821 a=zarray(1,i1,i2,i3,ouzee)
822 b=zarray(2,i1,i2,i3,ouzee)
823 c=zarray(1,j1,j2,j3,ouzee)
824 d=-zarray(2,j1,j2,j3,ouzee)
834 zarray(1,i1,i2,i3,inzee)=rhk
835 zarray(2,i1,i2,i3,inzee)=ihk
843 a=zarray(1,i1,i2,i3,ouzee)
844 b=zarray(2,i1,i2,i3,ouzee)
845 c=zarray(1,j1,j2,j3,ouzee)
846 d=-zarray(2,j1,j2,j3,ouzee)
856 zarray(1,i1,i2,i3,inzee)=rhk
857 zarray(2,i1,i2,i3,inzee)=ihk
871 a=zarray(1,i1,i2,i3,ouzee)
872 b=zarray(2,i1,i2,i3,ouzee)
873 c=zarray(1,j1,j2,j3,ouzee)
874 d=-zarray(2,j1,j2,j3,ouzee)
884 zarray(1,i1,i2,i3,inzee)=rhk
885 zarray(2,i1,i2,i3,inzee)=ihk
893 a=zarray(1,i1,i2,i3,ouzee)
894 b=zarray(2,i1,i2,i3,ouzee)
895 c=zarray(1,j1,j2,j3,ouzee)
896 d=-zarray(2,j1,j2,j3,ouzee)
906 zarray(1,i1,i2,i3,inzee)=rhk
907 zarray(2,i1,i2,i3,inzee)=ihk
914 safe_deallocate_a(cos_array)
915 safe_deallocate_a(sin_array)
929 subroutine norm_ind(nd1,nd2,nd3,i1,i2,i3,ind)
930 integer :: nd1,nd2,nd3,i1,i2,i3
950 ind = a1 + nd1 * (a2 - 1) + nd1 * nd2 * (a3 - 1)
964 subroutine symm_ind(nd1,nd2,nd3,i1,i2,i3,ind)
965 integer :: nd1,nd2,nd3,i1,i2,i3
984 ind=a1+nd1*(a2-1)+nd1*nd2*(a3-1)
997 subroutine zarray_in(n01,n02,n03,nd1,nd2,nd3,density,zarray)
998 integer :: n01,n02,n03,nd1,nd2,nd3
999 real(real64),
dimension(n01,n02,n03) :: density
1000 real(real64),
dimension(2,nd1,nd2,nd3) :: zarray
1002 integer :: i1,i2,i3,n01h,nd1hm,nd3hm,nd2hm
1015 zarray(1,i1,i2,i3) = 0.0_8
1016 zarray(2,i1,i2,i3) = 0.0_8
1024 zarray(1,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1-1,i2,i3)
1025 zarray(2,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1,i2,i3)
1029 if (modulo(n01,2) == 1)
then
1032 zarray(1,n01h+1+nd1hm,i2+nd2hm,i3+nd3hm) = density(n01,i2,i3)
1052 subroutine zarray_out(n01, n02, n03, nd1, nd2, nd3, rhopot, zarray, factor)
1053 integer,
intent(in) :: n01,n02,n03,nd1,nd2,nd3
1054 real(real64),
intent(out) :: rhopot(n01,n02,n03)
1055 real(real64),
intent(in) :: zarray(2*nd1,nd2,nd3)
1057 real(real64),
intent(in) :: factor
1066 rhopot(i1, i2, i3) = factor*zarray(i1,i2,i3)
1107 subroutine build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,hgrid,itype_scf,karrayout)
1108 integer,
intent(in) :: n01,n02,n03,nfft1,nfft2,nfft3,itype_scf
1109 real(real64),
intent(in) :: hgrid
1110 real(real64),
dimension(nfft1/2+1,nfft2/2+1,nfft3/2+1),
intent(out) :: karrayout
1113 integer,
parameter :: N_GAUSS = 89
1115 integer,
parameter :: n_points = 2**6
1119 real(real64),
parameter :: p0_ref = 1._real64
1120 real(real64),
dimension(N_GAUSS) :: p_gauss,w_gauss
1122 real(real64),
allocatable :: kernel_scf(:), kern_1_scf(:)
1123 real(real64),
allocatable :: x_scf(:), y_scf(:)
1124 real(real64),
allocatable :: karrayhalf(:, :, :)
1126 real(real64) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range
1127 real(real64) :: factor,factor2,dx,absci,p0gauss,p0_cell
1128 real(real64) :: a1,a2,a3
1129 integer :: nd1,nd2,nd3,n1k,n2k,n3k,n_scf
1130 integer :: i_gauss,n_range,n_cell
1131 integer :: i,n_iter,i1,i2,i3,i_kern
1132 integer :: i01,i02,i03,inkee,n1h,n2h,n3h,nd1h
1137 n_scf=2*itype_scf*n_points
1145 nd1 = nfft1 + modulo(nfft1+1,2)
1146 nd2 = nfft2 + modulo(nfft2+1,2)
1147 nd3 = nfft3 + modulo(nfft3+1,2)
1153 safe_allocate(x_scf(0:n_scf))
1154 safe_allocate(y_scf(0:n_scf))
1157 call scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf)
1159 dx = real(n_range, real64)/real(n_scf, real64)
1161 n_cell = max(n01,n02,n03)
1162 n_range = max(n_cell,n_range)
1165 safe_allocate(kernel_scf(-n_range:n_range))
1166 safe_allocate(kern_1_scf(-n_range:n_range))
1169 a1 = hgrid * real(n01, real64)
1170 a2 = hgrid * real(n02, real64)
1171 a3 = hgrid * real(n03, real64)
1173 x_scf(:) = hgrid * x_scf(:)
1174 y_scf(:) = 1._real64/hgrid * y_scf(:)
1177 p0_cell = p0_ref/(hgrid*hgrid)
1180 call gequad(n_gauss,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss)
1184 a_range =
sqrt(a1*a1+a2*a2+a3*a3)
1185 factor = 1._real64/a_range
1187 factor2 = 1._real64/(a1*a1+a2*a2+a3*a3)
1188 do i_gauss=1,n_gauss
1189 p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
1191 do i_gauss=1,n_gauss
1192 w_gauss(i_gauss) = factor*w_gauss(i_gauss)
1195 karrayout(:,:,:) = 0.0_8
1198 loop_gauss:
do i_gauss=n_gauss,1,-1
1200 pgauss = p_gauss(i_gauss)
1203 n_iter = nint((
log(pgauss) -
log(p0_cell))/
log(4._real64))
1204 if (n_iter <= 0)
then
1208 p0gauss = pgauss/4._real64**n_iter
1213 kernel_scf(:) = 0.0_8
1217 absci = x_scf(i) - real(i_kern, real64)*hgrid
1219 kern = kern + y_scf(i)*
exp(-p0gauss*absci)*dx
1221 kernel_scf(i_kern) = kern
1222 kernel_scf(-i_kern) = kern
1223 if (abs(kern) < 1.d-18)
then
1230 call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf)
1239 karrayout(i1,i2,i3) = karrayout(i1,i2,i3) + w_gauss(i_gauss)* &
1240 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
1247 safe_deallocate_a(kernel_scf)
1248 safe_deallocate_a(kern_1_scf)
1249 safe_deallocate_a(x_scf)
1250 safe_deallocate_a(y_scf)
1253 safe_allocate(karrayhalf(1:2, 1:nd1h*nd2*nd3, 1:2))
1257 call karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,&
1258 karrayout,karrayhalf)
1259 call fft(n1h,nfft2,nfft3,nd1h,nd2,nd3,karrayhalf,1,inkee)
1261 call kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,&
1262 karrayhalf(1,1,inkee),karrayout)
1264 safe_deallocate_a(karrayhalf)
1280 integer,
intent(in) :: n01,n02,n03
1281 integer,
intent(out) :: nfft1,nfft2,nfft3
1283 integer :: i1,i2,i3,l1
1293 call fourier_dim(i1,nfft1)
1294 call fourier_dim(nfft1/2,l1)
1295 if (modulo(nfft1,2) == 0 .and. modulo(l1,2) == 0 .and. 2*l1 == nfft1)
then
1301 call fourier_dim(i2,nfft2)
1302 if (modulo(nfft2,2) == 0)
then
1308 call fourier_dim(i3,nfft3)
1309 if (modulo(nfft3,2) == 0)
then
1333 subroutine karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3, kernel,karrayhalf)
1334 integer,
intent(in) :: n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3
1335 real(real64),
dimension(n1k,n2k,n3k),
intent(in) :: kernel
1336 real(real64),
dimension(2,(nd1+1)/2,nd2,nd3),
intent(out) :: karrayhalf
1338 real(real64),
dimension(:),
allocatable :: karray
1339 integer :: i1,i2,i3,nd1h,n1h,n2h,n3h
1348 safe_allocate(karray(1:nfft1))
1351 karrayhalf(:,:,:,:) = 0.0_8
1356 karray(i1+n1h) = kernel(i1,i2,i3)
1359 karray(n1h-i1+1+nd1-nfft1) = kernel(i1,i2,i3)
1362 karrayhalf(1,i1,i2+n2h,i3+n3h) = karray(2*i1-1)
1363 karrayhalf(2,i1,i2+n2h,i3+n3h) = karray(2*i1)
1368 karrayhalf(:,i1,n2h-i2+1+nd2-nfft2,i3+n3h) = &
1369 karrayhalf(:,i1,i2+n2h,i3+n3h)
1376 karrayhalf(:,i1,i2,n3h-i3+1+nd3-nfft3) = karrayhalf(:,i1,i2,i3+n3h)
1381 safe_deallocate_a(karray)
1397 subroutine kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,zarray,karray)
1398 integer,
intent(in) :: n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3
1399 real(real64),
dimension(2,(nd1+1)/2*nd2*nd3),
intent(in) :: zarray
1400 real(real64),
dimension(n1k,n2k,n3k),
intent(out) :: karray
1402 real(real64),
dimension(:),
allocatable :: cos_array,sin_array
1403 integer :: i1,i2,i3,ind1,ind2,nd1h,n1h,n2h,n3h
1404 real(real64) :: rfe,ife,rfo,ifo,cp,sp,rk,ik,a,b,c,d,pi2
1413 pi2=8._real64*datan(1._real64)
1414 pi2=pi2/real(nfft1, real64)
1416 safe_allocate(cos_array(1:nd1h))
1417 safe_allocate(sin_array(1:nd1h))
1420 cos_array(i1)= dcos(pi2*(i1-1))
1421 sin_array(i1)=-dsin(pi2*(i1-1))
1427 call symm_ind(nd1h,nd2,nd3,i1,i2,i3,ind2)
1432 rfe=0.5_real64*(a+c)
1433 ife=0.5_real64*(b-d)
1434 rfo=0.5_real64*(a-c)
1435 ifo=0.5_real64*(b+d)
1438 rk=rfe+cp*ifo-sp*rfo
1439 ik=ife-cp*rfo-sp*ifo
1453 safe_deallocate_a(cos_array)
1454 safe_deallocate_a(sin_array)
1497 subroutine par_calculate_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3, md1,md2,md3,nd1,nd2,nd3,nproc)
1498 integer,
intent(in) :: n01,n02,n03,nproc
1499 integer,
intent(out) :: m1,m2,m3,n1,n2,n3,md1,md2,md3,nd1,nd2,nd3
1523 call fourier_dim(l1,n1)
1525 if (modulo(n1,2) == 0&
1532 call fourier_dim(l2,n2)
1533 if (modulo(n2,2) == 0)
then
1539 call fourier_dim(l3,n3)
1541 if (modulo(n3,2) == 0 &
1554151
if (nproc*(md2/nproc) < n2/2)
then
1566250
if (modulo(nd3,nproc) /= 0)
then
1613 subroutine par_psolver_kernel(n01, n02, n03, nd1, nd2, nd3, hgrid, kernelLOC, rhopot, iproc, nproc, comm)
1614 integer,
intent(in) :: n01,n02,n03,iproc,nproc
1615 integer,
intent(inout) :: nd1,nd2,nd3
1616 real(real64),
intent(in) :: hgrid
1617 real(real64),
intent(in),
dimension(nd1,nd2,nd3/nproc) :: kernelLOC
1618 real(real64),
intent(inout),
dimension(n01,n02,n03) :: rhopot
1619 type(mpi_comm),
intent(in) :: comm
1621 integer :: m1,m2,m3,n1,n2,n3,md1,md2,md3
1625 call par_calculate_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
1626 call pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm)
1669 subroutine pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm)
1670 integer,
intent(in) :: m1,m2,m3,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,iproc,nproc
1671 real(real64),
dimension(nd1,nd2,nd3/nproc),
intent(in) :: kernelloc
1672 real(real64),
dimension(m1,m3,m2),
intent(inout) :: rhopot
1673 real(real64),
intent(in) :: hgrid
1674 type(mpi_comm),
intent(in) :: comm
1677 integer :: istart,iend,jend,jproc
1678 real(real64) :: scal
1679 real(real64),
dimension(:,:,:),
allocatable :: zf, lrhopot(:, :, :)
1680 integer,
dimension(:),
allocatable :: counts, displs
1685 scal=hgrid**3/(real(n1*n2, real64)*real(n3, real64))
1687 safe_allocate(zf(1:md1, 1:md3, 1:md2/nproc))
1688 safe_allocate(counts(0:nproc-1))
1689 safe_allocate(displs(0:nproc-1))
1692 call enterdensity(rhopot(1,1,1), m1, m2, m3, md1, md2, md3, iproc, nproc, zf(1,1,1))
1695 call convolxc_off(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, kernelloc, zf, scal, comm)
1699 do jproc = 0, nproc - 1
1700 istart = min(jproc*(md2/nproc),m2-1)
1701 jend = max(min(md2/nproc, m2 - md2/nproc*jproc), 0)
1702 counts(jproc) = m1*m3*jend
1703 displs(jproc) = istart*m1*m3
1707 istart=min(iproc*(md2/nproc),m2-1)
1708 jend=max(min(md2/nproc,m2-md2/nproc*iproc),0)
1711 if (jend == 0) jend = 1
1713 safe_allocate(lrhopot(1:m1, 1:m3, 1:jend))
1715 lrhopot(1:m1, 1:m3, 1:jend) = zf(1:m1, 1:m3, 1:jend)
1717 call profiling_in(
"ISF_GATHER")
1718#if defined(HAVE_MPI)
1719 call mpi_allgatherv(lrhopot(1, 1, 1), counts(iproc), mpi_double_precision, rhopot(1, 1, 1), counts,&
1720 displs, mpi_double_precision, comm, mpi_err)
1722 call profiling_out(
"ISF_GATHER")
1724 safe_deallocate_a(zf)
1725 safe_deallocate_a(lrhopot)
1726 safe_deallocate_a(counts)
1727 safe_deallocate_a(displs)
1751 subroutine enterdensity(rhopot,m1,m2,m3,md1,md2,md3,iproc,nproc,zf)
1752 integer,
intent(in) :: m1,m2,m3,md1,md2,md3,iproc,nproc
1753 real(real64),
dimension(0:md1-1,0:md3-1,0:md2/nproc-1),
intent(out) :: zf
1754 real(real64),
dimension(0:m1-1,0:m3-1,0:m2-1),
intent(in) :: rhopot
1756 integer :: j1,j2,j3,jp2
1761 do jp2=0,md2/nproc-1
1762 j2=iproc*(md2/nproc)+jp2
1763 if (j2 <= m2-1)
then
1766 zf(j1,j3,jp2)=rhopot(j1,j3,j2)
1820 subroutine par_build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,hgrid,itype_scf, &
1821 iproc,nproc,comm,karrayoutLOC)
1822 integer,
intent(in) :: n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,itype_scf,iproc,nproc
1823 real(real64),
intent(in) :: hgrid
1824 real(real64),
intent(out) :: karrayoutLOC(1:n1k, 1:n2k, 1:n3k/nproc)
1825 type(mpi_comm),
intent(in) :: comm
1828 integer,
parameter :: N_GAUSS = 89
1830 integer,
parameter :: N_POINTS = 2**6
1834 real(real64),
parameter :: p0_ref = 1._real64
1835 real(real64) :: p_gauss(N_GAUSS), w_gauss(N_GAUSS)
1837 real(real64),
dimension(:),
allocatable :: kernel_scf,kern_1_scf
1838 real(real64),
dimension(:),
allocatable :: x_scf ,y_scf
1839 real(real64),
dimension(:,:,:,:),
allocatable :: karrayfour
1840 real(real64),
dimension(:,:,:),
allocatable :: karray
1842 real(real64) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range
1843 real(real64) :: factor,factor2,dx,absci,p0gauss,p0_cell
1844 real(real64) :: a1,a2,a3
1845 integer :: n_scf,nker1,nker2,nker3
1846 integer :: i_gauss,n_range,n_cell,istart,iend,istart1,istart2,iend1,iend2
1847 integer :: i,n_iter,i1,i2,i3,i_kern
1848 integer :: i01,i02,i03,n1h,n2h,n3h
1853 n_scf=2*itype_scf*n_points
1872 if (modulo(nker2,nproc) == 0)
exit
1876 if (modulo(nker3,nproc) == 0)
exit
1881 safe_allocate(karray(1:nker1,1:nfft3,1:nker2/nproc))
1886 istart=iproc*nker2/nproc+1
1887 iend=min((iproc+1)*nker2/nproc,n2h+n03)
1890 if (iproc == 0) istart1=n2h-n03+2
1896 if (istart > n2h)
then
1900 if (iend <= n2h)
then
1913 safe_allocate(x_scf(0:n_scf))
1914 safe_allocate(y_scf(0:n_scf))
1917 call scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
1919 dx = real(n_range, real64)/real(n_scf, real64)
1921 n_cell = max(n01,n02,n03)
1922 n_range = max(n_cell,n_range)
1925 safe_allocate(kernel_scf(-n_range:n_range))
1926 safe_allocate(kern_1_scf(-n_range:n_range))
1929 a1 = hgrid * real(n01, real64)
1930 a2 = hgrid * real(n02, real64)
1931 a3 = hgrid * real(n03, real64)
1933 x_scf(:) = hgrid * x_scf(:)
1934 y_scf(:) = 1._real64/hgrid * y_scf(:)
1937 p0_cell = p0_ref/(hgrid*hgrid)
1940 call gequad(n_gauss,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss)
1944 a_range =
sqrt(a1*a1+a2*a2+a3*a3)
1945 factor = 1._real64/a_range
1947 factor2 = 1._real64/(a1*a1+a2*a2+a3*a3)
1948 do i_gauss=1,n_gauss
1949 p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
1951 do i_gauss=1,n_gauss
1952 w_gauss(i_gauss) = factor*w_gauss(i_gauss)
1955 karray(:,:,:) = 0.0_8
1957 loop_gauss:
do i_gauss = n_gauss, 1, -1
1959 pgauss = p_gauss(i_gauss)
1962 n_iter = nint((
log(pgauss) -
log(p0_cell))/
log(4._real64))
1963 if (n_iter <= 0)
then
1967 p0gauss = pgauss/4._real64**n_iter
1972 kernel_scf(:) = 0.0_8
1976 absci = x_scf(i) - real(i_kern, real64)*hgrid
1978 kern = kern + y_scf(i)*
exp(-p0gauss*absci)*dx
1980 kernel_scf(i_kern) = kern
1981 kernel_scf(-i_kern) = kern
1982 if (abs(kern) < 1.d-18)
then
1989 call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf)
1999 karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* &
2000 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
2010 karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* &
2011 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
2024 karray(n1h+2-i1,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1)
2029 karray(i1,n3h+2-i2,i3-istart+1) = karray(i1,i2+n3h,i3-istart+1)
2036 safe_deallocate_a(kernel_scf)
2037 safe_deallocate_a(kern_1_scf)
2038 safe_deallocate_a(x_scf)
2039 safe_deallocate_a(y_scf)
2043 safe_allocate(karrayfour(1:2, 1:nker1, 1:nker2, 1:nker3/nproc))
2047 call kernelfft(nfft1,nfft2,nfft3,nker1,nker2,nker3,nproc,iproc,karray,karrayfour,comm)
2053 karrayoutloc(i1,i2,i3)=karrayfour(1,i1,i2,i3)
2059 safe_deallocate_a(karray)
2060 safe_deallocate_a(karrayfour)
2067 subroutine gequad(n_gauss, p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
2068 integer,
intent(in) :: n_gauss
2069 real(real64),
intent(out) :: p_gauss(:)
2070 real(real64),
intent(out) :: w_gauss(:)
2071 real(real64),
intent(out) :: ur_gauss
2072 real(real64),
intent(out) :: dr_gauss
2073 real(real64),
intent(out) :: acc_gauss
2075 integer :: iunit, i, idx
2080 dr_gauss = 1.0e-08_8
2081 acc_gauss = 1.0e-08_8
2083 iunit = io_open(trim(conf%share)//
'/gequad.data', action =
'read', status =
'old', die = .
true.)
2086 read(iunit, *) idx, p_gauss(i), w_gauss(i)
2089 call io_close(iunit)
double log(double __x) __attribute__((__nothrow__
double exp(double __x) __attribute__((__nothrow__
double sqrt(double __x) __attribute__((__nothrow__
This module defines the meshes, which are used in Octopus.
subroutine symm_ind(nd1, nd2, nd3, i1, i2, i3, ind)
subroutine zarray_in(n01, n02, n03, nd1, nd2, nd3, density, zarray)
subroutine karrayhalf_in(n01, n02, n03, n1k, n2k, n3k, nfft1, nfft2, nfft3, nd1, nd2, nd3, kernel, karrayhalf)
subroutine pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm)
subroutine, public poisson_isf_end(this)
subroutine build_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, itype_scf, karrayout)
integer, parameter serial
subroutine norm_ind(nd1, nd2, nd3, i1, i2, i3, ind)
subroutine enterdensity(rhopot, m1, m2, m3, md1, md2, md3, iproc, nproc, zf)
subroutine par_psolver_kernel(n01, n02, n03, nd1, nd2, nd3, hgrid, kernelLOC, rhopot, iproc, nproc, comm)
subroutine kernel_application(n1, n2, n3, nd1h, nd2, nd3, nfft1, nfft2, nfft3, zarray, karray, inzee)
subroutine, public poisson_isf_init(this, namespace, mesh, cube, all_nodes_comm, init_world)
subroutine, public poisson_isf_solve(this, mesh, cube, pot, rho, all_nodes, sm)
subroutine gequad(n_gauss, p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
subroutine calculate_dimensions(n01, n02, n03, nfft1, nfft2, nfft3)
subroutine par_build_kernel(n01, n02, n03, nfft1, nfft2, nfft3, n1k, n2k, n3k, hgrid, itype_scf, iproc, nproc, comm, karrayoutLOC)
integer, parameter domain
subroutine zarray_out(n01, n02, n03, nd1, nd2, nd3, rhopot, zarray, factor)
subroutine par_calculate_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
subroutine psolver_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, karray, rhopot)
subroutine kernel_recon(n1k, n2k, n3k, nfft1, nfft2, nfft3, nd1, nd2, nd3, zarray, karray)
These routines are part of the ISF poisson solver, eventually they will be integrated with the other ...