Octopus
poisson_isf.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2011 M. Marques, A. Castro, X. Andrade, J. Alberdi-Rodriguez, M. Oliveira
2!! Copyright (C) Luigi Genovese, Thierry Deutsch, CEA Grenoble, 2006
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
23 use, intrinsic :: iso_fortran_env
25 use cube_oct_m
26 use debug_oct_m
27 use global_oct_m
28 use io_oct_m
30 use mesh_oct_m
31 use mpi_oct_m
33 use parser_oct_m
36 use sgfft_oct_m
38
39 implicit none
40
41 private
42
43 public :: &
45 isf_cnf_t, &
49
50 ! Indices for the cnf array
51 integer, parameter :: SERIAL = 1
52 integer, parameter :: WORLD = 2
53 integer, parameter :: DOMAIN = 3
54 integer, parameter :: N_CNF = 3
55
56 ! Datatype to store kernel values to solve Poisson equation
57 ! on different communicators (configurations).
58 type isf_cnf_t
59 private
60 real(real64), allocatable :: kernel(:, :, :)
61 integer :: nfft1, nfft2, nfft3
62 type(mpi_grp_t) :: mpi_grp
63 logical :: all_nodes
64 end type isf_cnf_t
65
66 type poisson_isf_t
67 private
68 type(MPI_Comm) :: all_nodes_comm
69 type(isf_cnf_t) :: cnf(1:N_CNF)
70 end type poisson_isf_t
71
72 integer, parameter :: order_scaling_function = 8
73
74
75contains
76
77 ! ---------------------------------------------------------
78 subroutine poisson_isf_init(this, namespace, mesh, cube, all_nodes_comm, init_world)
79 type(poisson_isf_t), intent(out) :: this
80 type(namespace_t), target, intent(in) :: namespace
81 type(mesh_t), intent(in) :: mesh
82 type(cube_t), intent(inout) :: cube
83 type(MPI_Comm), intent(in) :: all_nodes_comm
84 logical, optional, intent(in) :: init_world
85
86 integer :: n1, n2, n3
87 integer :: i_cnf
88 integer :: m1, m2, m3, md1, md2, md3
89 integer :: n(3)
90 logical :: init_world_
91 integer :: default_nodes
92 integer :: ii
93 integer, allocatable :: ranks(:)
94 !data ranks /0, 1/
95 integer :: world_size
96 integer :: nodes
97#ifdef HAVE_MPI
98 integer :: ierr
99 type(MPI_Group) :: world_grp, poisson_grp
100#endif
101
102 push_sub(poisson_isf_init)
103
104 init_world_ = .true.
105 if (present(init_world)) init_world_ = init_world
106
107 if (.not. mesh%parallel_in_domains) then
108 ! The serial version is always needed (as used, e.g., in the casida runmode)
109 call calculate_dimensions(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
110 this%cnf(serial)%nfft1, this%cnf(serial)%nfft2, this%cnf(serial)%nfft3)
111
112 n1 = this%cnf(serial)%nfft1/2 + 1
113 n2 = this%cnf(serial)%nfft2/2 + 1
114 n3 = this%cnf(serial)%nfft3/2 + 1
116 safe_allocate(this%cnf(serial)%kernel(1:n1, 1:n2, 1:n3))
117
118 call build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
119 this%cnf(serial)%nfft1, this%cnf(serial)%nfft2, this%cnf(serial)%nfft3, &
120 real(cube%spacing(1), real64), order_scaling_function, this%cnf(SERIAL)%kernel)
121 end if
122
123#if !defined(HAVE_MPI)
124 pop_sub(poisson_isf_init)
125 return
126#endif
127
128 ! Allocate to configurations. The initialisation, especially the kernel,
129 ! depends on the number of nodes used for the calculations. To avoid
130 ! recalculating the kernel on each call of poisson_isf_solve depending on
131 ! the all_nodes argument, both kernels are calculated.
132 this%cnf(domain)%mpi_grp = mesh%mpi_grp
133
134 ! For the world configuration we build a new communicator
135
136 default_nodes = 0 !All nodes
137
138 !%Variable PoissonSolverNodes
139 !%Type integer
140 !%Section Hamiltonian::Poisson
141 !%Default 0
142 !%Description
143 !% How many nodes to use to solve the Poisson equation. A value of
144 !% 0, the default, implies that all available nodes are used.
145 !%End
146 call parse_variable(namespace, 'PoissonSolverNodes', default_nodes, nodes)
148 this%all_nodes_comm = all_nodes_comm
149
150#if defined(HAVE_MPI)
151 call mpi_comm_size(all_nodes_comm, world_size, mpi_err)
152#endif
154 if (nodes <= 0 .or. nodes > world_size) nodes = world_size
155 this%cnf(world)%all_nodes = (nodes == mpi_world%size)
157 safe_allocate(ranks(1:nodes))
158
159 do ii = 1, nodes
160 ranks(ii) = ii - 1
161 end do
163 !create a new communicator
164 !Extract the original group handle and create new comm.
165#if defined(HAVE_MPI)
166 call mpi_comm_group(all_nodes_comm, world_grp, ierr)
167 call mpi_group_incl(world_grp, nodes, ranks, poisson_grp, ierr)
168 call mpi_comm_create(mpi_world%comm, poisson_grp, this%cnf(world)%mpi_grp%comm, ierr)
169#endif
170
171 safe_deallocate_a(ranks)
172
173 !Fill the new data structure, for all nodes
174#if defined(HAVE_MPI)
175 if (this%cnf(world)%mpi_grp%comm /= mpi_comm_null) then
176 call mpi_comm_rank(this%cnf(world)%mpi_grp%comm, this%cnf(world)%mpi_grp%rank, ierr)
177 call mpi_comm_size(this%cnf(world)%mpi_grp%comm, this%cnf(world)%mpi_grp%size, ierr)
178 else
179 this%cnf(world)%mpi_grp%rank = -1
180 this%cnf(world)%mpi_grp%size = -1
181 end if
182#endif
183
184 ! Build the kernel for all configurations. At the moment, this is
185 ! solving the poisson equation with all nodes (i_cnf == WORLD) and
186 ! with the domain nodes only (i_cnf == DOMAIN).
187 do i_cnf = 2, n_cnf
188 if ((i_cnf == world .and. .not. init_world_) & ! world is disabled
189 .or. (i_cnf == domain .and. .not. mesh%parallel_in_domains) & ! not parallel in domains
190 ) then
191 cycle
192 end if
193 if (this%cnf(i_cnf)%mpi_grp%rank /= -1 .or. i_cnf /= world) then
194 call par_calculate_dimensions(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
195 m1, m2, m3, n1, n2, n3, md1, md2, md3, this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, &
196 this%cnf(i_cnf)%nfft3, this%cnf(i_cnf)%mpi_grp%size)
197
198 ! Shortcuts to avoid to "line too long" errors.
199 n(1) = this%cnf(i_cnf)%nfft1
200 n(2) = this%cnf(i_cnf)%nfft2
201 n(3) = this%cnf(i_cnf)%nfft3
202
203 safe_allocate(this%cnf(i_cnf)%kernel(1:n(1), 1:n(2), 1:n(3)/this%cnf(i_cnf)%mpi_grp%size))
204
205 call par_build_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), n1, n2, n3, &
206 this%cnf(i_cnf)%nfft1, this%cnf(i_cnf)%nfft2, this%cnf(i_cnf)%nfft3, &
207 cube%spacing(1), order_scaling_function, &
208 this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm, &
209 this%cnf(i_cnf)%kernel)
210 else
211 cycle
212 end if
213 end do
214
215 pop_sub(poisson_isf_init)
216 end subroutine poisson_isf_init
217
218 ! ---------------------------------------------------------
219 subroutine poisson_isf_solve(this, mesh, cube, pot, rho, all_nodes, sm)
220 type(poisson_isf_t), intent(in) :: this
221 type(mesh_t), intent(in) :: mesh
222 type(cube_t), intent(in) :: cube
223 real(real64), contiguous, intent(out) :: pot(:)
224 real(real64), contiguous, intent(in) :: rho(:)
225 logical, intent(in) :: all_nodes
226 type(submesh_t), optional, intent(in) :: sm
227
228 integer :: i_cnf, nn(1:3)
229 type(cube_function_t) :: rho_cf
230#if defined(HAVE_MPI)
231 integer(int64) :: number_points
232#endif
233
234 push_sub(poisson_isf_solve)
235
236 call dcube_function_alloc_rs(cube, rho_cf)
237
238 if (present(sm)) then
239 call dsubmesh_to_cube(sm, rho, cube, rho_cf)
240 else
241 call dmesh_to_cube(mesh, rho, cube, rho_cf)
242 end if
243
244 ! Choose configuration.
245 i_cnf = serial
246
247#if defined(HAVE_MPI)
248 if (all_nodes) then
249 i_cnf = world
250 else if (mesh%parallel_in_domains) then
251 i_cnf = domain
252 end if
253#endif
254
255#if !defined(HAVE_MPI)
256 assert(i_cnf == serial)
257#endif
258
259 if (i_cnf == serial) then
260
261 nn(1) = this%cnf(serial)%nfft1
262 nn(2) = this%cnf(serial)%nfft2
263 nn(3) = this%cnf(serial)%nfft3
264 call psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
265 nn(1), nn(2), nn(3), &
266 real(mesh%spacing(1), real64), this%cnf(serial)%kernel, rho_cf%drs)
267
268 else
269 nn(1) = this%cnf(i_cnf)%nfft1
270 nn(2) = this%cnf(i_cnf)%nfft2
271 nn(3) = this%cnf(i_cnf)%nfft3
272 if (this%cnf(i_cnf)%mpi_grp%size /= -1 .or. i_cnf /= world) then
273 call par_psolver_kernel(cube%rs_n_global(1), cube%rs_n_global(2), cube%rs_n_global(3), &
274 nn(1), nn(2), nn(3), &
275 real(mesh%spacing(1), real64), this%cnf(i_cnf)%kernel, rho_cf%drs, &
276 this%cnf(i_cnf)%mpi_grp%rank, this%cnf(i_cnf)%mpi_grp%size, this%cnf(i_cnf)%mpi_grp%comm)
277 end if
278 ! we need to be sure that the root of every domain-partition has a copy of the potential
279 ! for the moment we broadcast to all nodes, but this is more than what we really need
280 if (i_cnf == world .and. .not. this%cnf(world)%all_nodes) then
281#if defined(HAVE_MPI)
282 ! make sure we do not run into integer overflow here
283 number_points = cube%rs_n_global(1) * cube%rs_n_global(2)
284 number_points = number_points * cube%rs_n_global(3)
285 if (number_points >= huge(0)) then
286 message(1) = "Error: too many points for the normal cube. Please try to use a distributed FFT."
287 call messages_fatal(1)
288 end if
289 call mpi_bcast(rho_cf%drs(1, 1, 1), cube%rs_n_global(1)*cube%rs_n_global(2)*cube%rs_n_global(3), &
290 mpi_double_precision, 0, this%all_nodes_comm, mpi_err)
291#endif
292 end if
293 end if
294
295 if (present(sm)) then
296 call dcube_to_submesh(cube, rho_cf, sm, pot)
297 else
298 call dcube_to_mesh(cube, rho_cf, mesh, pot)
299 end if
300
301 call dcube_function_free_rs(cube, rho_cf)
302
303 pop_sub(poisson_isf_solve)
304 end subroutine poisson_isf_solve
305
306 ! ---------------------------------------------------------
307 subroutine poisson_isf_end(this)
308 type(poisson_isf_t), intent(inout) :: this
309
310#if defined(HAVE_MPI)
311 integer :: i_cnf
312#endif
313
314 push_sub(poisson_isf_end)
315
316#if defined(HAVE_MPI)
317 do i_cnf = 1, n_cnf
318 safe_deallocate_a(this%cnf(i_cnf)%kernel)
319 end do
320 if (this%cnf(world)%mpi_grp%comm /= mpi_comm_null) then
321 call mpi_comm_free(this%cnf(world)%mpi_grp%comm, mpi_err)
322 end if
323#else
324 safe_deallocate_a(this%cnf(serial)%kernel)
325#endif
326
327 pop_sub(poisson_isf_end)
328 end subroutine poisson_isf_end
329
330 ! --------------------------------------------------------------
331
332 !!****h* BigDFT/psolver_kernel
333 !! NAME
334 !! psolver_kernel
335 !!
336 !! FUNCTION
337 !! Solver of Poisson equation applying a kernel
338 !!
339 !! SYNOPSIS
340 !! Poisson solver applying a kernel and
341 !! using Fourier transform for the convolution.
342 !! rhopot : input -> the density
343 !! output -> the Hartree potential + pot_ion
344 !! The potential pot_ion is ADDED in the array rhopot.
345 !! Calculate also the Hartree potential
346 !!
347 !! Replaces the charge density contained in rhopot
348 !! by the Hartree stored as well in rhopot.
349 !! If xc_on is true, it also adds the XC potential and
350 !! ionic potential pot_ion
351 !!
352 !! We double the size of the mesh except in one dimension
353 !! in order to use the property of the density to be real.
354 !! WARNING
355 !! For the use of FFT routine
356 !! inzee=1: first part of Z is data (output) array,
357 !! second part work array
358 !! inzee=2: first part of Z is work array, second part data array
359 !! real(F(i1,i2,i3))=Z(1,i1,i2,i3,inzee)
360 !! imag(F(i1,i2,i3))=Z(2,i1,i2,i3,inzee)
361 !! inzee on output is in general different from inzee on input
362 !!
363 !! AUTHOR
364 !! Thierry Deutsch, Luigi Genovese
365 !! COPYRIGHT
366 !! Copyright (C) 2005 CEA
367 !! CREATION DATE
368 !! 13/07/2005
369 !!
370 !! MODIFICATION HISTORY
371 !! 12/2005 Kernel stored into memory
372 !! 12/2005 Real Kernel FFT and use less memory
373 !!
374 !! SOURCE
375 !!
376 subroutine psolver_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, karray, rhopot)
377 integer, intent(in) :: n01
378 integer, intent(in) :: n02
379 integer, intent(in) :: n03
380 integer, intent(in) :: nfft1
381 integer, intent(in) :: nfft2
382 integer, intent(in) :: nfft3
383 real(real64), intent(in) :: hgrid
384 real(real64), intent(in) :: karray(nfft1/2 + 1,nfft2/2 + 1, nfft3/2 + 1)
385 real(real64), intent(inout) :: rhopot(n01, n02, n03)
386
387 real(real64), allocatable :: zarray(:,:,:)
388 real(real64) :: factor
389 integer :: n1, n2, n3, nd1, nd2, nd3, n1h, nd1h
390 integer :: inzee, i_sign
391
392 push_sub(psolver_kernel)
393
394 !Dimension of the FFT
395 call calculate_dimensions(n01, n02, n03, n1, n2, n3)
396
397 !Half size of nd1
398 n1h=n1/2
399 nd1 = n1 + modulo(n1+1,2)
400 nd2 = n2 + modulo(n2+1,2)
401 nd3 = n3 + modulo(n3+1,2)
402 nd1h=(nd1+1)/2
403
404 safe_allocate(zarray(1:2, 1:nd1h*nd2*nd3, 1:2))
405
406 !Set zarray
407 call zarray_in(n01,n02,n03,nd1h,nd2,nd3,rhopot,zarray)
408
409 !FFT
410 !print *,"Do a 3D HalFFT for the density"
411 i_sign=1
412 inzee=1
413 call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee)
414
415 !print *, "Apply the kernel"
416 call kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee)
417
418 !Inverse FFT
419 i_sign=-1
420 !print *,"Do a 3D inverse HalFFT"
421 call fft(n1h,n2,n3,nd1h,nd2,nd3,zarray,i_sign,inzee)
422
423 !Recollect the result
424 !We have to multiply by a factor
425 factor = hgrid**3/(real(n1*n2, real64)*real(n3, real64))
426
427 ! Calling this routine gives only the Hartree potential
428 call zarray_out(n01, n02, n03, nd1h, nd2, nd3, rhopot, zarray(1, 1, inzee), factor)
429
430 safe_deallocate_a(zarray)
431 pop_sub(psolver_kernel)
432 end subroutine psolver_kernel
433 !!***
434
435 !!****h* BigDFT/kernel_application
436 !! NAME
437 !! kernel_application
438 !!
439 !! FUNCTION
440 !! Multiply the FFT of the density by the FFT of the kernel
441 !!
442 !! SYNOPSIS
443 !! zarray(:,:,:,:,inzee) : IN -> FFT of the density with the x dimension divided by two
444 !! (HalFFT), OUT -> FFT of the potential
445 !! karray : kernel FFT (real, 1/8 of the total grid)
446 !! n1h,n2,n3 : dimension of the FFT grid for zarray
447 !! nd1h,nd2,nd3 : dimensions of zarray
448 !! nfft1,nfft2,nfft3 : original FFT grid dimensions, to be used for karray dimensions
449 !!
450 !! WARNING
451 !! We use all the zarray vector, storing the auxiliary part using ouzee=3-inzee
452 !! All the loop are unrolled such to avoid different conditions
453 !! the "min" functions are substituted by kink computed with absolute values
454 !! AUTHOR
455 !! Luigi Genovese
456 !! CREATION DATE
457 !! March 2006
458 !!
459 !! SOURCE
460 !!
461 subroutine kernel_application(n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,zarray,karray,inzee)
462 integer, intent(in) :: n1,n2,n3,nd1h,nd2,nd3,nfft1,nfft2,nfft3,inzee
463 real(real64), intent(in) :: karray(1:nfft1/2 + 1, 1:nfft2/2 + 1, 1:nfft3/2 + 1)
464 real(real64), intent(inout) :: zarray(1:2, 1:nd1h, 1:nd2, 1:nd3, 1:2)
465
466 real(real64), dimension(:), allocatable :: cos_array,sin_array
467 real(real64) :: a,b,c,d,pi2,g1,cp,sp
468 real(real64) :: rfe,ife,rfo,ifo,rk,ik,rk2,ik2,re,ro,ie,io,rhk,ihk
469 integer :: i1,i2,i3,j1,j2,j3, ouzee,n1h,n2h,n3h
470 integer :: si1,si2,si3
471
472 push_sub(kernel_application)
473
474 n1h = n1/2
475 n2h = n2/2
476 n3h = n3/2
477
478 safe_allocate(cos_array(1:n1h + 1))
479 safe_allocate(sin_array(1:n1h + 1))
480
481 pi2 = 8._real64*datan(1._real64)
482 pi2 = pi2/real(n1, real64)
483 do i1 = 1,n1h+1
484 cos_array(i1) = dcos(pi2*(i1-1))
485 sin_array(i1) = -dsin(pi2*(i1-1))
486 end do
487
488 ouzee = 3-inzee
489
490 !--------------------------------------------!
491 !--- Starting reconstruction half -> full ---!
492 !--------------------------------------------!
493
494 !-------------Case i3 = 1
495 i3 = 1
496 j3 = 1
497 si3 = 1
498
499 !-------------Case i2 = 1, i3 = 1
500 i2 = 1
501 j2 = 1
502 si2 = 1
503
504 !Case i1 == 1
505 i1 = 1
506 si1 = 1
507 a = zarray(1,i1,i2,i3,inzee)
508 b = zarray(2,i1,i2,i3,inzee)
509 c = zarray(1,si1,si2,si3,inzee)
510 d = zarray(2,si1,si2,si3,inzee)
511 rfe = .5_real64*(a+c)
512 ife = .5_real64*(b-d)
513 rfo = .5_real64*(a-c)
514 ifo = .5_real64*(b+d)
515 cp = cos_array(i1)
516 sp = sin_array(i1)
517 rk = rfe+cp*ifo-sp*rfo
518 ik = ife-cp*rfo-sp*ifo
519 g1 = karray(i1,j2,j3)
520 rk2 = rk*g1
521 ik2 = ik*g1
522
523 zarray(1,1,i2,i3,ouzee) = rk2
524 zarray(2,1,i2,i3,ouzee) = ik2
525
526 !Case i1=2,n1h
527 do i1=2,n1h
528 si1=n1h+2-i1
529
530 a=zarray(1,i1,i2,i3,inzee)
531 b=zarray(2,i1,i2,i3,inzee)
532 c=zarray(1,si1,si2,si3,inzee)
533 d=zarray(2,si1,si2,si3,inzee)
534 rfe=.5_real64*(a+c)
535 ife=.5_real64*(b-d)
536 rfo=.5_real64*(a-c)
537 ifo=.5_real64*(b+d)
538 cp=cos_array(i1)
539 sp=sin_array(i1)
540 rk=rfe+cp*ifo-sp*rfo
541 ik=ife-cp*rfo-sp*ifo
542 g1=karray(i1,j2,j3)
543 rk2=rk*g1
544 ik2=ik*g1
545
546 zarray(1,i1,i2,i3,ouzee) = rk2
547 zarray(2,i1,i2,i3,ouzee) = ik2
548 end do
549
550 !Case i1=n1h+1
551 i1=n1h+1
552 si1=n1h+2-i1
553
554 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
555 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
556 c=zarray(1,si1,si2,si3,inzee)
557 d=zarray(2,si1,si2,si3,inzee)
558 rfe=.5_real64*(a+c)
559 ife=.5_real64*(b-d)
560 rfo=.5_real64*(a-c)
561 ifo=.5_real64*(b+d)
562 cp=cos_array(i1)
563 sp=sin_array(i1)
564 rk=rfe+cp*ifo-sp*rfo
565 ik=ife-cp*rfo-sp*ifo
566 g1=karray(i1,j2,j3)
567 rk2=rk*g1
568 ik2=ik*g1
569
570 zarray(1,i1,i2,i3,ouzee) = rk2
571 zarray(2,i1,i2,i3,ouzee) = ik2
572 !-------------END case i2 = 1 , i3=1
573
574 !case i2 >=2
575 do i2 = 2, n2
576 j2=n2h+1-abs(n2h+1-i2)
577 si2=n2+2-i2 !if i2 /=1, otherwise si2=1
578
579 !Case i1 == 1
580 i1=1
581 si1=1
582 a=zarray(1,i1,i2,i3,inzee)
583 b=zarray(2,i1,i2,i3,inzee)
584 c=zarray(1,si1,si2,si3,inzee)
585 d=zarray(2,si1,si2,si3,inzee)
586 rfe=.5_real64*(a+c)
587 ife=.5_real64*(b-d)
588 rfo=.5_real64*(a-c)
589 ifo=.5_real64*(b+d)
590 cp=cos_array(i1)
591 sp=sin_array(i1)
592 rk=rfe+cp*ifo-sp*rfo
593 ik=ife-cp*rfo-sp*ifo
594 g1=karray(i1,j2,j3)
595 rk2=rk*g1
596 ik2=ik*g1
597
598 zarray(1,1,i2,i3,ouzee) = rk2
599 zarray(2,1,i2,i3,ouzee) = ik2
600
601 !Case i1=2,n1h
602 do i1=2,n1h
603 si1=n1h+2-i1
604
605 a=zarray(1,i1,i2,i3,inzee)
606 b=zarray(2,i1,i2,i3,inzee)
607 c=zarray(1,si1,si2,si3,inzee)
608 d=zarray(2,si1,si2,si3,inzee)
609 rfe=.5_real64*(a+c)
610 ife=.5_real64*(b-d)
611 rfo=.5_real64*(a-c)
612 ifo=.5_real64*(b+d)
613 cp=cos_array(i1)
614 sp=sin_array(i1)
615 rk=rfe+cp*ifo-sp*rfo
616 ik=ife-cp*rfo-sp*ifo
617 g1=karray(i1,j2,j3)
618 rk2=rk*g1
619 ik2=ik*g1
620
621 zarray(1,i1,i2,i3,ouzee) = rk2
622 zarray(2,i1,i2,i3,ouzee) = ik2
623 end do
624
625 !Case i1=n1h+1
626 i1=n1h+1
627 si1=n1h+2-i1
628
629 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
630 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
631 c=zarray(1,si1,si2,si3,inzee)
632 d=zarray(2,si1,si2,si3,inzee)
633 rfe=.5_real64*(a+c)
634 ife=.5_real64*(b-d)
635 rfo=.5_real64*(a-c)
636 ifo=.5_real64*(b+d)
637 cp=cos_array(i1)
638 sp=sin_array(i1)
639 rk=rfe+cp*ifo-sp*rfo
640 ik=ife-cp*rfo-sp*ifo
641 g1=karray(i1,j2,j3)
642 rk2=rk*g1
643 ik2=ik*g1
644
645 zarray(1,i1,i2,i3,ouzee) = rk2
646 zarray(2,i1,i2,i3,ouzee) = ik2
647 end do
648 !-------------END Case i3 = 1
649
650 !case i3 >=2
651 do i3=2,n3
652 j3=n3h+1-abs(n3h+1-i3)
653 si3=n3+2-i3 !if i3 /=1, otherwise si3=1
654
655 !-------------Case i2 = 1
656 i2=1
657 j2=1
658 si2=1
659
660 !Case i1 == 1
661 i1=1
662 si1=1
663 a=zarray(1,i1,i2,i3,inzee)
664 b=zarray(2,i1,i2,i3,inzee)
665 c=zarray(1,si1,si2,si3,inzee)
666 d=zarray(2,si1,si2,si3,inzee)
667 rfe=.5_real64*(a+c)
668 ife=.5_real64*(b-d)
669 rfo=.5_real64*(a-c)
670 ifo=.5_real64*(b+d)
671 cp=cos_array(i1)
672 sp=sin_array(i1)
673 rk=rfe+cp*ifo-sp*rfo
674 ik=ife-cp*rfo-sp*ifo
675 g1=karray(i1,j2,j3)
676 rk2=rk*g1
677 ik2=ik*g1
678
679 zarray(1,1,i2,i3,ouzee) = rk2
680 zarray(2,1,i2,i3,ouzee) = ik2
681
682 !Case i1=2,n1h
683 do i1=2,n1h
684 si1=n1h+2-i1
685
686 a=zarray(1,i1,i2,i3,inzee)
687 b=zarray(2,i1,i2,i3,inzee)
688 c=zarray(1,si1,si2,si3,inzee)
689 d=zarray(2,si1,si2,si3,inzee)
690 rfe=.5_real64*(a+c)
691 ife=.5_real64*(b-d)
692 rfo=.5_real64*(a-c)
693 ifo=.5_real64*(b+d)
694 cp=cos_array(i1)
695 sp=sin_array(i1)
696 rk=rfe+cp*ifo-sp*rfo
697 ik=ife-cp*rfo-sp*ifo
698 g1=karray(i1,j2,j3)
699 rk2=rk*g1
700 ik2=ik*g1
701
702 zarray(1,i1,i2,i3,ouzee) = rk2
703 zarray(2,i1,i2,i3,ouzee) = ik2
704 end do
705
706 !Case i1=n1h+1
707 i1=n1h+1
708 si1=n1h+2-i1
709
710 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
711 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
712 c=zarray(1,si1,si2,si3,inzee)
713 d=zarray(2,si1,si2,si3,inzee)
714 rfe=.5_real64*(a+c)
715 ife=.5_real64*(b-d)
716 rfo=.5_real64*(a-c)
717 ifo=.5_real64*(b+d)
718 cp=cos_array(i1)
719 sp=sin_array(i1)
720 rk=rfe+cp*ifo-sp*rfo
721 ik=ife-cp*rfo-sp*ifo
722 g1=karray(i1,j2,j3)
723 rk2=rk*g1
724 ik2=ik*g1
725
726 zarray(1,i1,i2,i3,ouzee) = rk2
727 zarray(2,i1,i2,i3,ouzee) = ik2
728 !-------------END case i2 = 1
729
730 !case i2 >=2
731 do i2=2,n2
732 j2=n2h+1-abs(n2h+1-i2)
733 si2=n2+2-i2 !if i2 /=1, otherwise si2=1
734
735 !Case i1 == 1
736 i1=1
737 si1=1
738 a=zarray(1,i1,i2,i3,inzee)
739 b=zarray(2,i1,i2,i3,inzee)
740 c=zarray(1,si1,si2,si3,inzee)
741 d=zarray(2,si1,si2,si3,inzee)
742 rfe=.5_real64*(a+c)
743 ife=.5_real64*(b-d)
744 rfo=.5_real64*(a-c)
745 ifo=.5_real64*(b+d)
746 cp=cos_array(i1)
747 sp=sin_array(i1)
748 rk=rfe+cp*ifo-sp*rfo
749 ik=ife-cp*rfo-sp*ifo
750 g1=karray(i1,j2,j3)
751 rk2=rk*g1
752 ik2=ik*g1
753
754 zarray(1,1,i2,i3,ouzee) = rk2
755 zarray(2,1,i2,i3,ouzee) = ik2
756
757 !Case i1=2,n1h
758 do i1=2,n1h
759 si1=n1h+2-i1
760
761 a=zarray(1,i1,i2,i3,inzee)
762 b=zarray(2,i1,i2,i3,inzee)
763 c=zarray(1,si1,si2,si3,inzee)
764 d=zarray(2,si1,si2,si3,inzee)
765 rfe=.5_real64*(a+c)
766 ife=.5_real64*(b-d)
767 rfo=.5_real64*(a-c)
768 ifo=.5_real64*(b+d)
769 cp=cos_array(i1)
770 sp=sin_array(i1)
771 rk=rfe+cp*ifo-sp*rfo
772 ik=ife-cp*rfo-sp*ifo
773 g1=karray(i1,j2,j3)
774 rk2=rk*g1
775 ik2=ik*g1
776
777 zarray(1,i1,i2,i3,ouzee) = rk2
778 zarray(2,i1,i2,i3,ouzee) = ik2
779 end do
780
781 !Case i1=n1h+1
782 i1=n1h+1
783 si1=n1h+2-i1
784
785 a=zarray(1,1,i2,i3,inzee) !beware here i1 -> 1
786 b=zarray(2,1,i2,i3,inzee) !beware here i1 -> 1
787 c=zarray(1,si1,si2,si3,inzee)
788 d=zarray(2,si1,si2,si3,inzee)
789 rfe=.5_real64*(a+c)
790 ife=.5_real64*(b-d)
791 rfo=.5_real64*(a-c)
792 ifo=.5_real64*(b+d)
793 cp=cos_array(i1)
794 sp=sin_array(i1)
795 rk=rfe+cp*ifo-sp*rfo
796 ik=ife-cp*rfo-sp*ifo
797 g1=karray(i1,j2,j3)
798 rk2=rk*g1
799 ik2=ik*g1
800
801 zarray(1,i1,i2,i3,ouzee) = rk2
802 zarray(2,i1,i2,i3,ouzee) = ik2
803 end do
804
805 end do
806
807
808 !--------------------------------------------!
809 !--- Starting reconstruction full -> half ---!
810 !--------------------------------------------!
811
812 !case i3=1
813 i3=1
814 j3=1
815 !case i2=1
816 i2=1
817 j2=1
818 do i1 = 1,n1h
819 j1=n1h+2-i1
820
821 a=zarray(1,i1,i2,i3,ouzee)
822 b=zarray(2,i1,i2,i3,ouzee)
823 c=zarray(1,j1,j2,j3,ouzee)
824 d=-zarray(2,j1,j2,j3,ouzee)
825 cp=cos_array(i1)
826 sp=sin_array(i1)
827 re=(a+c)
828 ie=(b+d)
829 ro=(a-c)*cp-(b-d)*sp
830 io=(a-c)*sp+(b-d)*cp
831 rhk=re-io
832 ihk=ie+ro
833
834 zarray(1,i1,i2,i3,inzee)=rhk
835 zarray(2,i1,i2,i3,inzee)=ihk
836 end do
837 !case i2 >= 2
838 do i2=2,n2
839 j2=nd2+1-i2
840 do i1 = 1,n1h
841 j1=n1h+2-i1
842
843 a=zarray(1,i1,i2,i3,ouzee)
844 b=zarray(2,i1,i2,i3,ouzee)
845 c=zarray(1,j1,j2,j3,ouzee)
846 d=-zarray(2,j1,j2,j3,ouzee)
847 cp=cos_array(i1)
848 sp=sin_array(i1)
849 re=(a+c)
850 ie=(b+d)
851 ro=(a-c)*cp-(b-d)*sp
852 io=(a-c)*sp+(b-d)*cp
853 rhk=re-io
854 ihk=ie+ro
855
856 zarray(1,i1,i2,i3,inzee)=rhk
857 zarray(2,i1,i2,i3,inzee)=ihk
858 end do
859 end do
860
861
862 !case i3 >=2
863 do i3=2,n3
864 j3=nd3+1-i3
865 !case i2=1
866 i2=1
867 j2=1
868 do i1 = 1,n1h
869 j1=n1h+2-i1
870
871 a=zarray(1,i1,i2,i3,ouzee)
872 b=zarray(2,i1,i2,i3,ouzee)
873 c=zarray(1,j1,j2,j3,ouzee)
874 d=-zarray(2,j1,j2,j3,ouzee)
875 cp=cos_array(i1)
876 sp=sin_array(i1)
877 re=(a+c)
878 ie=(b+d)
879 ro=(a-c)*cp-(b-d)*sp
880 io=(a-c)*sp+(b-d)*cp
881 rhk=re-io
882 ihk=ie+ro
883
884 zarray(1,i1,i2,i3,inzee)=rhk
885 zarray(2,i1,i2,i3,inzee)=ihk
886 end do
887 !case i2 >= 2
888 do i2=2,n2
889 j2=nd2+1-i2
890 do i1 = 1,n1h
891 j1=n1h+2-i1
892
893 a=zarray(1,i1,i2,i3,ouzee)
894 b=zarray(2,i1,i2,i3,ouzee)
895 c=zarray(1,j1,j2,j3,ouzee)
896 d=-zarray(2,j1,j2,j3,ouzee)
897 cp=cos_array(i1)
898 sp=sin_array(i1)
899 re=(a+c)
900 ie=(b+d)
901 ro=(a-c)*cp-(b-d)*sp
902 io=(a-c)*sp+(b-d)*cp
903 rhk=re-io
904 ihk=ie+ro
905
906 zarray(1,i1,i2,i3,inzee)=rhk
907 zarray(2,i1,i2,i3,inzee)=ihk
908 end do
909 end do
910
911 end do
912
913 !De-allocations
914 safe_deallocate_a(cos_array)
915 safe_deallocate_a(sin_array)
916
917 pop_sub(kernel_application)
918 end subroutine kernel_application
919
920 !!****h* BigDFT/norm_ind
921 !! NAME
922 !! norm_ind
923 !!
924 !! FUNCTION
925 !! Index in zarray
926 !!
927 !! SOURCE
928 !!
929 subroutine norm_ind(nd1,nd2,nd3,i1,i2,i3,ind)
930 integer :: nd1,nd2,nd3,i1,i2,i3
931 integer :: ind
932
933 !Local variables
934 integer :: a1,a2,a3
935 if (i1 == nd1) then
936 a1 = 1
937 else
938 a1 = i1
939 end if
940 if (i2 == nd2) then
941 a2 = 1
942 else
943 a2 = i2
944 end if
945 if (i3 == nd3) then
946 a3 = 1
947 else
948 a3 = i3
949 end if
950 ind = a1 + nd1 * (a2 - 1) + nd1 * nd2 * (a3 - 1)
951 end subroutine norm_ind
952 !!***
953
954
955 !!****h* BigDFT/symm_ind
956 !! NAME
957 !! symm_ind
958 !!
959 !! FUNCTION
960 !! Index in zarray for -g vector
961 !!
962 !! SOURCE
963 !!
964 subroutine symm_ind(nd1,nd2,nd3,i1,i2,i3,ind)
965 integer :: nd1,nd2,nd3,i1,i2,i3
966 integer :: ind
967
968 integer :: a1,a2,a3
969 if (i1 /= 1) then
970 a1=nd1+1-i1
971 else
972 a1=i1
973 end if
974 if (i2 /= 1) then
975 a2=nd2+1-i2
976 else
977 a2=i2
978 end if
979 if (i3 /= 1) then
980 a3=nd3+1-i3
981 else
982 a3=i3
983 end if
984 ind=a1+nd1*(a2-1)+nd1*nd2*(a3-1)
985 end subroutine symm_ind
986 !!***
987
988 !!****h* BigDFT/zarray_in
989 !! NAME
990 !! zarray_in
991 !!
992 !! FUNCTION
993 !! Put the density into zarray
994 !!
995 !! SOURCE
996 !!
997 subroutine zarray_in(n01,n02,n03,nd1,nd2,nd3,density,zarray)
998 integer :: n01,n02,n03,nd1,nd2,nd3
999 real(real64), dimension(n01,n02,n03) :: density
1000 real(real64), dimension(2,nd1,nd2,nd3) :: zarray
1001
1002 integer :: i1,i2,i3,n01h,nd1hm,nd3hm,nd2hm
1003
1004 push_sub(zarray_in)
1005
1006 !Half the size of n01
1007 n01h=n01/2
1008 nd1hm=(nd1-1)/2
1009 nd2hm=(nd2-1)/2
1010 nd3hm=(nd3-1)/2
1011 !Set to zero
1012 do i3 = 1,nd3
1013 do i2 = 1,nd2
1014 do i1 = 1,nd1
1015 zarray(1,i1,i2,i3) = 0.0_8
1016 zarray(2,i1,i2,i3) = 0.0_8
1017 end do
1018 end do
1019 end do
1020 !Set zarray
1021 do i3 = 1,n03
1022 do i2 = 1,n02
1023 do i1 = 1,n01h
1024 zarray(1,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1-1,i2,i3)
1025 zarray(2,i1+nd1hm,i2+nd2hm,i3+nd3hm) = density(2*i1,i2,i3)
1026 end do
1027 end do
1028 end do
1029 if (modulo(n01,2) == 1) then
1030 do i3 = 1,n03
1031 do i2 = 1,n02
1032 zarray(1,n01h+1+nd1hm,i2+nd2hm,i3+nd3hm) = density(n01,i2,i3)
1033 end do
1034 end do
1035 end if
1036
1037 pop_sub(zarray_in)
1038 end subroutine zarray_in
1039 !!***
1040
1041
1042 !!****h* BigDFT/zarray_out
1043 !! NAME
1044 !! zarray_out
1045 !!
1046 !! FUNCTION
1047 !! Set the potential (rhopot) from zarray
1048 !! Calculate the Hartree energy.
1049 !!
1050 !! SOURCE
1051 !!
1052 subroutine zarray_out(n01, n02, n03, nd1, nd2, nd3, rhopot, zarray, factor)
1053 integer, intent(in) :: n01,n02,n03,nd1,nd2,nd3
1054 real(real64), intent(out) :: rhopot(n01,n02,n03)
1055 real(real64), intent(in) :: zarray(2*nd1,nd2,nd3) ! Convert zarray(2,nd1,nd2,nd3) -> zarray(2*nd1,nd2,nd3) to use i1=1,n01
1056 ! ! instead of i1=1,n1h + special case for modulo(n01,2)
1057 real(real64), intent(in) :: factor
1058
1059 integer :: i1,i2,i3
1060
1061 push_sub(zarray_out)
1062
1063 do i3 = 1,n03
1064 do i2 = 1,n02
1065 do i1 = 1,n01
1066 rhopot(i1, i2, i3) = factor*zarray(i1,i2,i3)
1067 end do
1068 end do
1069 end do
1070
1071 pop_sub(zarray_out)
1072 end subroutine zarray_out
1073 !!***
1074
1075 !!****h* BigDFT/build_kernel
1076 !! NAME
1077 !! build_kernel
1078 !!
1079 !! FUNCTION
1080 !! Build the kernel of a gaussian function
1081 !! for interpolating scaling functions.
1082 !!
1083 !! SYNOPSIS
1084 !! Build the kernel (karrayout) of a gaussian function
1085 !! for interpolating scaling functions
1086 !! $$ K(j) = \int \int \phi(x) g(x`-x) \delta(x`- j) dx dx` $$
1087 !!
1088 !! n01,n02,n03 Mesh dimensions of the density
1089 !! n1k,n2k,n3k Dimensions of the kernel
1090 !! hgrid Mesh step
1091 !! itype_scf Order of the scaling function (8,14,16)
1092 !!
1093 !! AUTHORS
1094 !! T. Deutsch, L. Genovese
1095 !! COPYRIGHT
1096 !! Copyright (C) 2005 CEA
1097 !! CREATION DATE
1098 !! 13/07/2005
1099 !!
1100 !! MODIFICATION HISTORY
1101 !! 13/09/2005 Use hgrid instead of acell
1102 !! 09/12/2005 Real kernel, stocked only half components
1103 !! 13/12/2005 Add routines to simplify the port into Stefan`s code
1104 !!
1105 !! SOURCE
1106 !!
1107 subroutine build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,hgrid,itype_scf,karrayout)
1108 integer, intent(in) :: n01,n02,n03,nfft1,nfft2,nfft3,itype_scf
1109 real(real64), intent(in) :: hgrid
1110 real(real64), dimension(nfft1/2+1,nfft2/2+1,nfft3/2+1), intent(out) :: karrayout
1111
1112 !Do not touch !!!!
1113 integer, parameter :: N_GAUSS = 89
1114 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
1115 integer, parameter :: n_points = 2**6
1116
1117 !Better p_gauss for calculation
1118 !(the support of the exponential should be inside [-n_range/2,n_range/2])
1119 real(real64), parameter :: p0_ref = 1._real64
1120 real(real64), dimension(N_GAUSS) :: p_gauss,w_gauss
1121
1122 real(real64), allocatable :: kernel_scf(:), kern_1_scf(:)
1123 real(real64), allocatable :: x_scf(:), y_scf(:)
1124 real(real64), allocatable :: karrayhalf(:, :, :)
1125
1126 real(real64) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range
1127 real(real64) :: factor,factor2,dx,absci,p0gauss,p0_cell
1128 real(real64) :: a1,a2,a3
1129 integer :: nd1,nd2,nd3,n1k,n2k,n3k,n_scf
1130 integer :: i_gauss,n_range,n_cell
1131 integer :: i,n_iter,i1,i2,i3,i_kern
1132 integer :: i01,i02,i03,inkee,n1h,n2h,n3h,nd1h
1133
1134 push_sub(build_kernel)
1135
1136 !Number of integration points : 2*itype_scf*n_points
1137 n_scf=2*itype_scf*n_points
1138 !Dimensions of Kernel
1139 n1k=nfft1/2+1
1140 n2k=nfft2/2+1
1141 n3k=nfft3/2+1
1142 n1h=nfft1/2
1143 n2h=nfft2/2
1144 n3h=nfft3/2
1145 nd1 = nfft1 + modulo(nfft1+1,2)
1146 nd2 = nfft2 + modulo(nfft2+1,2)
1147 nd3 = nfft3 + modulo(nfft3+1,2)
1148
1149 !Half size for the half FFT
1150 nd1h=(nd1+1)/2
1151
1152 !Allocations
1153 safe_allocate(x_scf(0:n_scf))
1154 safe_allocate(y_scf(0:n_scf))
1155
1156 !Build the scaling function
1157 call scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf)
1158 !Step grid for the integration
1159 dx = real(n_range, real64)/real(n_scf, real64)
1160 !Extend the range (no more calculations because fill in by 0.0_8)
1161 n_cell = max(n01,n02,n03)
1162 n_range = max(n_cell,n_range)
1163
1164 !Allocations
1165 safe_allocate(kernel_scf(-n_range:n_range))
1166 safe_allocate(kern_1_scf(-n_range:n_range))
1167
1168 !Lengthes of the box (use FFT dimension)
1169 a1 = hgrid * real(n01, real64)
1170 a2 = hgrid * real(n02, real64)
1171 a3 = hgrid * real(n03, real64)
1172
1173 x_scf(:) = hgrid * x_scf(:)
1174 y_scf(:) = 1._real64/hgrid * y_scf(:)
1175 dx = hgrid * dx
1176 !To have a correct integration
1177 p0_cell = p0_ref/(hgrid*hgrid)
1178
1179 !Initialisation of the gaussian (Beylkin)
1180 call gequad(n_gauss,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss)
1181 !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
1182 !(biggest length in the cube)
1183 !We divide the p_gauss by a_range**2 and a_gauss by a_range
1184 a_range = sqrt(a1*a1+a2*a2+a3*a3)
1185 factor = 1._real64/a_range
1186 !factor2 = factor*factor
1187 factor2 = 1._real64/(a1*a1+a2*a2+a3*a3)
1188 do i_gauss=1,n_gauss
1189 p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
1190 end do
1191 do i_gauss=1,n_gauss
1192 w_gauss(i_gauss) = factor*w_gauss(i_gauss)
1193 end do
1194
1195 karrayout(:,:,:) = 0.0_8
1196
1197 !Use in this order (better for accuracy).
1198 loop_gauss: do i_gauss=n_gauss,1,-1
1199 !Gaussian
1200 pgauss = p_gauss(i_gauss)
1201
1202 !We calculate the number of iterations to go from pgauss to p0_ref
1203 n_iter = nint((log(pgauss) - log(p0_cell))/log(4._real64))
1204 if (n_iter <= 0) then
1205 n_iter = 0
1206 p0gauss = pgauss
1207 else
1208 p0gauss = pgauss/4._real64**n_iter
1209 end if
1210
1211 !Stupid integration
1212 !Do the integration with the exponential centered in i_kern
1213 kernel_scf(:) = 0.0_8
1214 do i_kern=0,n_range
1215 kern = 0.0_8
1216 do i=0,n_scf
1217 absci = x_scf(i) - real(i_kern, real64)*hgrid
1218 absci = absci*absci
1219 kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx
1220 end do
1221 kernel_scf(i_kern) = kern
1222 kernel_scf(-i_kern) = kern
1223 if (abs(kern) < 1.d-18) then
1224 !Too small not useful to calculate
1225 exit
1226 end if
1227 end do
1228
1229 !Start the iteration to go from p0gauss to pgauss
1230 call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf)
1231
1232 !Add to the kernel.
1233 do i3 = 1,n03
1234 i03 = i3-1
1235 do i2 = 1,n02
1236 i02 = i2-1
1237 do i1 = 1,n01
1238 i01 = i1-1
1239 karrayout(i1,i2,i3) = karrayout(i1,i2,i3) + w_gauss(i_gauss)* &
1240 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
1241 end do
1242 end do
1243 end do
1244
1245 end do loop_gauss
1246
1247 safe_deallocate_a(kernel_scf)
1248 safe_deallocate_a(kern_1_scf)
1249 safe_deallocate_a(x_scf)
1250 safe_deallocate_a(y_scf)
1251
1252 !Set karray
1253 safe_allocate(karrayhalf(1:2, 1:nd1h*nd2*nd3, 1:2))
1254
1255 !Set karray : use mirror symmetries
1256 inkee=1
1257 call karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,&
1258 karrayout,karrayhalf)
1259 call fft(n1h,nfft2,nfft3,nd1h,nd2,nd3,karrayhalf,1,inkee)
1260 !Reconstruct the real kernel
1261 call kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,&
1262 karrayhalf(1,1,inkee),karrayout)
1263
1264 safe_deallocate_a(karrayhalf)
1265 pop_sub(build_kernel)
1266 end subroutine build_kernel
1267 !!***
1268
1269
1270 !!****h* BigDFT/calculate_dimensions
1271 !! NAME
1272 !! calculate_dimensions
1273 !!
1274 !! FUNCTION
1275 !! Give the dimensions of the FFT
1276 !!
1277 !! SOURCE
1278 !!
1279 subroutine calculate_dimensions(n01,n02,n03,nfft1,nfft2,nfft3)
1280 integer, intent(in) :: n01,n02,n03
1281 integer, intent(out) :: nfft1,nfft2,nfft3
1282
1283 integer :: i1,i2,i3,l1
1284
1285 push_sub(calculate_dimensions)
1286
1287 !Test 2*n01, 2*n02, 2*n03
1288 !write(*,*) 'in dimensions_fft',n01,n02,n03
1289 i1=2*n01
1290 i2=2*n02
1291 i3=2*n03
1292 do
1293 call fourier_dim(i1,nfft1)
1294 call fourier_dim(nfft1/2,l1)
1295 if (modulo(nfft1,2) == 0 .and. modulo(l1,2) == 0 .and. 2*l1 == nfft1) then
1296 exit
1297 end if
1298 i1=i1+1
1299 end do
1300 do
1301 call fourier_dim(i2,nfft2)
1302 if (modulo(nfft2,2) == 0) then
1303 exit
1304 end if
1305 i2=i2+1
1306 end do
1307 do
1308 call fourier_dim(i3,nfft3)
1309 if (modulo(nfft3,2) == 0) then
1310 exit
1311 end if
1312 i3=i3+1
1313 end do
1314 !nd1 = nfft1 + modulo(nfft1+1,2)
1315 !nd2 = nfft2 + modulo(nfft2+1,2)
1316 !nd3 = nfft3 + modulo(nfft3+1,2)
1317 !write(*,*) 'out dimensions_fft',nfft1,nfft2,nfft3
1318
1319 pop_sub(calculate_dimensions)
1320 end subroutine calculate_dimensions
1321 !!***
1322
1323
1324 !!****h* BigDFT/karrayhalf_in
1325 !! NAME
1326 !! karrayhalf_in
1327 !!
1328 !! FUNCTION
1329 !! Put in the array for4446666444 FFT
1330 !!
1331 !! SOURCE
1332 !!
1333 subroutine karrayhalf_in(n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3, kernel,karrayhalf)
1334 integer, intent(in) :: n01,n02,n03,n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3
1335 real(real64), dimension(n1k,n2k,n3k), intent(in) :: kernel
1336 real(real64), dimension(2,(nd1+1)/2,nd2,nd3), intent(out) :: karrayhalf
1337
1338 real(real64), dimension(:), allocatable :: karray
1339 integer :: i1,i2,i3,nd1h,n1h,n2h,n3h
1340
1341 push_sub(karrayhalf_in)
1342
1343 !Body
1344 n1h=nfft1/2
1345 n2h=nfft2/2
1346 n3h=nfft3/2
1347
1348 safe_allocate(karray(1:nfft1))
1349
1350 nd1h=(nd1+1)/2
1351 karrayhalf(:,:,:,:) = 0.0_8
1352 do i3 = 1,n03
1353 do i2 = 1,n02
1354 karray(:) = 0.0_8
1355 do i1 = 1,n01
1356 karray(i1+n1h) = kernel(i1,i2,i3)
1357 end do
1358 do i1 = 2,n01
1359 karray(n1h-i1+1+nd1-nfft1) = kernel(i1,i2,i3)
1360 end do
1361 do i1 = 1,n1h
1362 karrayhalf(1,i1,i2+n2h,i3+n3h) = karray(2*i1-1)
1363 karrayhalf(2,i1,i2+n2h,i3+n3h) = karray(2*i1)
1364 end do
1365 end do
1366 do i2 = 2,n02
1367 do i1 = 1,nd1h
1368 karrayhalf(:,i1,n2h-i2+1+nd2-nfft2,i3+n3h) = &
1369 karrayhalf(:,i1,i2+n2h,i3+n3h)
1370 end do
1371 end do
1372 end do
1373 do i3 = 2,n03
1374 do i2 = 1,nd2
1375 do i1 = 1,nd1h
1376 karrayhalf(:,i1,i2,n3h-i3+1+nd3-nfft3) = karrayhalf(:,i1,i2,i3+n3h)
1377 end do
1378 end do
1379 end do
1380
1381 safe_deallocate_a(karray)
1382 pop_sub(karrayhalf_in)
1383 end subroutine karrayhalf_in
1384 !!***
1385
1386
1387 !!****h* BigDFT/kernel_recon
1388 !! NAME
1389 !! kernel_recon
1390 !!
1391 !! FUNCTION
1392 !! Reconstruction of the kernel from the FFT array zarray
1393 !! We keep only the half kernel in each direction (x,y,z).
1394 !!
1395 !! SOURCE
1396 !!
1397 subroutine kernel_recon(n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3,zarray,karray)
1398 integer, intent(in) :: n1k,n2k,n3k,nfft1,nfft2,nfft3,nd1,nd2,nd3
1399 real(real64), dimension(2,(nd1+1)/2*nd2*nd3), intent(in) :: zarray
1400 real(real64), dimension(n1k,n2k,n3k), intent(out) :: karray
1401
1402 real(real64), dimension(:), allocatable :: cos_array,sin_array
1403 integer :: i1,i2,i3,ind1,ind2,nd1h,n1h,n2h,n3h
1404 real(real64) :: rfe,ife,rfo,ifo,cp,sp,rk,ik,a,b,c,d,pi2
1405
1406 push_sub(kernel_recon)
1407
1408 !Body
1409 n1h=nfft1/2
1410 n2h=nfft2/2
1411 n3h=nfft3/2
1412 nd1h=(nd1+1)/2
1413 pi2=8._real64*datan(1._real64)
1414 pi2=pi2/real(nfft1, real64)
1415
1416 safe_allocate(cos_array(1:nd1h))
1417 safe_allocate(sin_array(1:nd1h))
1418
1419 do i1 = 1,nd1h
1420 cos_array(i1)= dcos(pi2*(i1-1))
1421 sin_array(i1)=-dsin(pi2*(i1-1))
1422 end do
1423 do i3 = 1,n3h+1
1424 do i2 = 1,n2h+1
1425 do i1 = 1,nd1h
1426 call norm_ind(nd1h,nd2,nd3,i1,i2,i3,ind1)
1427 call symm_ind(nd1h,nd2,nd3,i1,i2,i3,ind2)
1428 a=zarray(1,ind1)
1429 b=zarray(2,ind1)
1430 c=zarray(1,ind2)
1431 d=zarray(2,ind2)
1432 rfe=0.5_real64*(a+c)
1433 ife=0.5_real64*(b-d)
1434 rfo=0.5_real64*(a-c)
1435 ifo=0.5_real64*(b+d)
1436 cp=cos_array(i1)
1437 sp=sin_array(i1)
1438 rk=rfe+cp*ifo-sp*rfo
1439 ik=ife-cp*rfo-sp*ifo
1440 !For big dimension 1.d-9 otherwise 1.d-10
1441 !Remove the test
1442 !if (abs(ik) >= 1.d-10) then
1443 ! print *,"non real kernel FFT",i1,i2,i3,ik
1444 ! stop
1445 !end if
1446 !Build the intermediate FFT convolution (full)
1447 !call norm_ind(nd1,nd2,nd3,i1,i2,i3,indA)
1448 karray(i1,i2,i3)=rk
1449 end do
1450 end do
1451 end do
1452
1453 safe_deallocate_a(cos_array)
1454 safe_deallocate_a(sin_array)
1455
1456 pop_sub(kernel_recon)
1457 end subroutine kernel_recon
1458 !!***
1459
1460 !!****h* BigDFT/par_calculate_dimensions
1461 !! NAME
1462 !! par_calculate_dimensions
1463 !!
1464 !! FUNCTION
1465 !! Calculate four sets of dimension needed for the calculation of the
1466 !! zero-padded convolution
1467 !!
1468 !! SYNOPSIS
1469 !! n01,n02,n03 original real dimensions (input)
1470 !!
1471 !! m1,m2,m3 original real dimension with the dimension 2 and 3 exchanged
1472 !!
1473 !! n1,n2 the first FFT even dimensions greater that 2*m1, 2*m2
1474 !! n3 the double of the first FFT even dimension greater than m3
1475 !! (improved for the HalFFT procedure)
1476 !!
1477 !! md1,md2,md3 half of n1,n2,n3 dimension. They contain the real unpadded space,
1478 !! properly enlarged to be compatible with the FFT dimensions n_i.
1479 !! md2 is further enlarged to be a multiple of nproc
1480 !!
1481 !! nd1,nd2,nd3 fourier dimensions for which the kernel FFT is injective,
1482 !! formally 1/8 of the fourier grid. Here the dimension nd3 is
1483 !! enlarged to be a multiple of nproc
1484 !!
1485 !! WARNING
1486 !! The dimension m2 and m3 correspond to n03 and n02 respectively
1487 !! this is needed since the convolution routine manage arrays of dimension
1488 !! (md1,md3,md2/nproc)
1489 !!
1490 !! AUTHOR
1491 !! Luigi Genovese
1492 !! CREATION DATE
1493 !! February 2006
1494 !!
1495 !! SOURCE
1496 !!
1497 subroutine par_calculate_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3, md1,md2,md3,nd1,nd2,nd3,nproc)
1498 integer, intent(in) :: n01,n02,n03,nproc
1499 integer, intent(out) :: m1,m2,m3,n1,n2,n3,md1,md2,md3,nd1,nd2,nd3
1500
1501 integer :: l1,l2,l3
1502
1503 push_sub(par_calculate_dimensions)
1504
1505 !dimensions of the density in the real space, inverted for convenience
1506
1507 m1=n01
1508 m2=n03
1509 m3=n02
1510
1511 ! real space grid dimension (suitable for number of processors)
1512
1513 ! n1=2*m1
1514 ! n2=2*m2
1515 ! n3=2*m3
1516
1517 l1=2*m1
1518 l2=2*m2
1519 l3=m3 !beware of the half dimension
1520 do
1521 !this is for the FFT of the kernel
1522 !one can erase it when the kernel is parallelized
1523 call fourier_dim(l1,n1)
1524 !call fourier_dim(n1/2,l1A)
1525 if (modulo(n1,2) == 0&! .and. 2*l1A == n1
1526 ) then
1527 exit
1528 end if
1529 l1=l1+1
1530 end do
1531 do
1532 call fourier_dim(l2,n2)
1533 if (modulo(n2,2) == 0) then
1534 exit
1535 end if
1536 l2=l2+1
1537 end do
1538 do
1539 call fourier_dim(l3,n3)
1540 !call fourier_dim(n3/2,l3A)
1541 if (modulo(n3,2) == 0 &!.and. 2*l3A == n3 .and. modulo(l3A,2) == 0
1542 ) then
1543 exit
1544 end if
1545 l3=l3+1
1546 end do
1547 n3=2*n3
1548
1549 !dimensions that contain the unpadded real space,
1550 ! compatible with the number of processes
1551 md1=n1/2
1552 md2=n2/2
1553 md3=n3/2
1554151 if (nproc*(md2/nproc) < n2/2) then
1555 md2=md2+1
1556 goto 151
1557 end if
1558
1559
1560 !dimensions of the kernel, 1/8 of the total volume,
1561 !compatible with nproc
1562
1563 nd1=n1/2+1
1564 nd2=n2/2+1
1565 nd3=n3/2+1
1566250 if (modulo(nd3,nproc) /= 0) then
1567 nd3=nd3+1
1568 goto 250
1569 end if
1570
1572 end subroutine par_calculate_dimensions
1573 !!***
1574
1575
1576 !!****h* BigDFT/par_psolver_kernel
1577 !! NAME
1578 !! par_psolver_kernel
1579 !!
1580 !! FUNCTION
1581 !! Solver of Poisson equation applying a kernel, parallel computation
1582 !!
1583 !! SYNOPSIS
1584 !! Poisson solver applying a kernel and
1585 !! using Fourier transform for the convolution.
1586 !! rhopot : input -> the density
1587 !! output -> the Hartree potential + pot_ion
1588 !! All the processes manage the same global rhopot array
1589 !! The potential pot_ion is ADDED in the array rhopot.
1590 !! Calculate also the Hartree potential
1591 !!
1592 !! Replaces the charge density contained in rhopot
1593 !! by the Hartree stored as well in rhopot.
1594 !! If xc_on is true, it also adds the XC potential and
1595 !! ionic potential pot_ion
1596 !!
1597 !! kernelLOC: the kernel in fourier space, calculated from ParBuil_Kernel routine
1598 !! it is a local vector (each process have its own part)
1599 !!
1600 !! comm: MPI communicator to use
1601 !!
1602 !! We double the size of the mesh except in one dimension
1603 !! in order to use the property of the density to be real.
1604 !! WARNING
1605 !!
1606 !! AUTHOR
1607 !! Luigi Genovese
1608 !! CREATION DATE
1609 !! February 2006
1610 !!
1611 !! SOURCE
1612 !!
1613 subroutine par_psolver_kernel(n01, n02, n03, nd1, nd2, nd3, hgrid, kernelLOC, rhopot, iproc, nproc, comm)
1614 integer, intent(in) :: n01,n02,n03,iproc,nproc
1615 integer, intent(inout) :: nd1,nd2,nd3
1616 real(real64), intent(in) :: hgrid
1617 real(real64), intent(in), dimension(nd1,nd2,nd3/nproc) :: kernelLOC
1618 real(real64), intent(inout), dimension(n01,n02,n03) :: rhopot
1619 type(mpi_comm), intent(in) :: comm
1620
1621 integer :: m1,m2,m3,n1,n2,n3,md1,md2,md3
1622
1623 push_sub(par_psolver_kernel)
1624
1625 call par_calculate_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
1626 call pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm)
1627
1628 pop_sub(par_psolver_kernel)
1629 end subroutine par_psolver_kernel
1630 !!***
1631
1632
1633 !!****h* BigDFT/pconvxc_off
1634 !! NAME
1635 !! pconvxc_off
1636 !!
1637 !! FUNCTION
1638 !! Calculate the parallel convolution with the kernel
1639 !! without the exchange-correlation part
1640 !!
1641 !! SYNOPSIS
1642 !! Poisson solver applying a kernel and
1643 !! using Fourier transform for the convolution.
1644 !! rhopot : input -> the density
1645 !! output -> the Hartree potential + pot_ion
1646 !! All the processes manage the same global rhopot array
1647 !! The potential pot_ion is ADDED in the array rhopot.
1648 !! Calculate also the Hartree potential
1649 !!
1650 !! Replaces the charge density contained in rhopot
1651 !! by the Hartree stored as well in rhopot.
1652 !!
1653 !! kernelLOC: the kernel in fourier space, calculated from ParBuild_Kernel routine
1654 !! it is a local vector (each process have its own part)
1655 !!
1656 !! comm: MPI communicator to use
1657 !!
1658 !! We double the size of the mesh except in one dimension
1659 !! in order to use the property of the density to be real.
1660 !! WARNING
1661 !!
1662 !! AUTHOR
1663 !! Luigi Genovese
1664 !! CREATION DATE
1665 !! February 2006
1666 !!
1667 !! SOURCE
1668 !!
1669 subroutine pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm)
1670 integer, intent(in) :: m1,m2,m3,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,iproc,nproc
1671 real(real64), dimension(nd1,nd2,nd3/nproc), intent(in) :: kernelloc
1672 real(real64), dimension(m1,m3,m2), intent(inout) :: rhopot
1673 real(real64), intent(in) :: hgrid
1674 type(mpi_comm), intent(in) :: comm
1675
1676 !Local variables
1677 integer :: istart,iend,jend,jproc
1678 real(real64) :: scal
1679 real(real64), dimension(:,:,:), allocatable :: zf, lrhopot(:, :, :)
1680 integer, dimension(:), allocatable :: counts, displs
1681
1682 push_sub(pconvxc_off)
1683
1684 !factor to be used to keep unitarity
1685 scal=hgrid**3/(real(n1*n2, real64)*real(n3, real64))
1686
1687 safe_allocate(zf(1:md1, 1:md3, 1:md2/nproc))
1688 safe_allocate(counts(0:nproc-1))
1689 safe_allocate(displs(0:nproc-1))
1690
1691 !Here we insert the process-related values of the density, starting from the total density
1692 call enterdensity(rhopot(1,1,1), m1, m2, m3, md1, md2, md3, iproc, nproc, zf(1,1,1))
1693
1694 !this routine builds the values for each process of the potential (zf), multiplying by the factor
1695 call convolxc_off(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, kernelloc, zf, scal, comm)
1696
1697 !building the array of the data to be sent from each process
1698 !and the array of the displacement
1699 do jproc = 0, nproc - 1
1700 istart = min(jproc*(md2/nproc),m2-1)
1701 jend = max(min(md2/nproc, m2 - md2/nproc*jproc), 0)
1702 counts(jproc) = m1*m3*jend
1703 displs(jproc) = istart*m1*m3
1704 end do
1705
1706 !assign the distributed density to the rhopot array
1707 istart=min(iproc*(md2/nproc),m2-1)
1708 jend=max(min(md2/nproc,m2-md2/nproc*iproc),0)
1709 iend=istart+jend
1710
1711 if (jend == 0) jend = 1
1712
1713 safe_allocate(lrhopot(1:m1, 1:m3, 1:jend))
1714
1715 lrhopot(1:m1, 1:m3, 1:jend) = zf(1:m1, 1:m3, 1:jend)
1716
1717 call profiling_in("ISF_GATHER")
1718#if defined(HAVE_MPI)
1719 call mpi_allgatherv(lrhopot(1, 1, 1), counts(iproc), mpi_double_precision, rhopot(1, 1, 1), counts,&
1720 displs, mpi_double_precision, comm, mpi_err)
1721#endif
1722 call profiling_out("ISF_GATHER")
1723
1724 safe_deallocate_a(zf)
1725 safe_deallocate_a(lrhopot)
1726 safe_deallocate_a(counts)
1727 safe_deallocate_a(displs)
1728
1729 pop_sub(pconvxc_off)
1730 end subroutine pconvxc_off
1731!!***
1732
1733
1734 !!****h* BigDFT/enterdensity
1735 !! NAME
1736 !! enterdensity
1737 !!
1738 !! FUNCTION
1739 !!
1740 !! Define a real space process-dependent vector zf with the global dimensions that are half of the FFT grid
1741 !! in order to perform convolution. The dimension md2 is a multiple of nproc
1742 !! Can be used also to define the local part of pot_ion
1743 !!
1744 !! AUTHOR
1745 !! L. Genovese
1746 !! CREATION DATE
1747 !! February 2006
1748 !!
1749 !! SOURCE
1750 !!
1751 subroutine enterdensity(rhopot,m1,m2,m3,md1,md2,md3,iproc,nproc,zf)
1752 integer, intent(in) :: m1,m2,m3,md1,md2,md3,iproc,nproc
1753 real(real64), dimension(0:md1-1,0:md3-1,0:md2/nproc-1), intent(out) :: zf
1754 real(real64), dimension(0:m1-1,0:m3-1,0:m2-1), intent(in) :: rhopot
1755
1756 integer :: j1,j2,j3,jp2
1757
1758 push_sub(enterdensity)
1759
1760 !Body
1761 do jp2=0,md2/nproc-1
1762 j2=iproc*(md2/nproc)+jp2
1763 if (j2 <= m2-1) then
1764 do j3=0,m3-1
1765 do j1=0,m1-1
1766 zf(j1,j3,jp2)=rhopot(j1,j3,j2)
1767 end do
1768 do j1=m1,md1-1
1769 zf(j1,j3,jp2)=0.0_8
1770 end do
1771 end do
1772 do j3=m3,md3-1
1773 do j1=0,md1-1
1774 zf(j1,j3,jp2)=0.0_8
1775 end do
1776 end do
1777 else
1778 do j3=0,md3-1
1779 do j1=0,md1-1
1780 zf(j1,j3,jp2)=0.0_8
1781 end do
1782 end do
1783 end if
1784 end do
1785
1786 pop_sub(enterdensity)
1787 end subroutine enterdensity
1788 !!***
1789
1790
1791 !!****h* BigDFT/par_build_kernel
1792 !! NAME
1793 !! par_build_kernel
1794 !!
1795 !! FUNCTION
1796 !! Build the kernel of a gaussian function
1797 !! for interpolating scaling functions.
1798 !! Do the parallel HalFFT of the symmetrized function and stores into
1799 !! memory only 1/8 of the grid divided by the number of processes nproc
1800 !!
1801 !! SYNOPSIS
1802 !! Build the kernel (karray) of a gaussian function
1803 !! for interpolating scaling functions
1804 !! $$ K(j) = \sum_k \omega_k \int \int \phi(x) g_k(y-x) \delta(y-j) dx dy $$
1805 !!
1806 !! n01,n02,n03 Mesh dimensions of the density
1807 !! nfft1,nfft2,nfft3 Dimensions of the FFT grid (HalFFT in the third direction)
1808 !! n1k,n2k,n3k Dimensions of the kernel FFT
1809 !! hgrid Mesh step
1810 !! itype_scf Order of the scaling function (8,14,16)
1811 !! comm MPI communicator to use
1812 !!
1813 !! AUTHORS
1814 !! T. Deutsch, L. Genovese
1815 !! CREATION DATE
1816 !! February 2006
1817 !!
1818 !! SOURCE
1819 !!
1820 subroutine par_build_kernel(n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,hgrid,itype_scf, &
1821 iproc,nproc,comm,karrayoutLOC)
1822 integer, intent(in) :: n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,itype_scf,iproc,nproc
1823 real(real64), intent(in) :: hgrid
1824 real(real64), intent(out) :: karrayoutLOC(1:n1k, 1:n2k, 1:n3k/nproc)
1825 type(mpi_comm), intent(in) :: comm
1826
1827 !Do not touch !!!!
1828 integer, parameter :: N_GAUSS = 89
1829 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
1830 integer, parameter :: N_POINTS = 2**6
1831
1832 !Better p_gauss for calculation
1833 !(the support of the exponential should be inside [-n_range/2,n_range/2])
1834 real(real64), parameter :: p0_ref = 1._real64
1835 real(real64) :: p_gauss(N_GAUSS), w_gauss(N_GAUSS)
1836
1837 real(real64), dimension(:), allocatable :: kernel_scf,kern_1_scf
1838 real(real64), dimension(:), allocatable :: x_scf ,y_scf
1839 real(real64), dimension(:,:,:,:), allocatable :: karrayfour
1840 real(real64), dimension(:,:,:), allocatable :: karray
1841
1842 real(real64) :: ur_gauss,dr_gauss,acc_gauss,pgauss,kern,a_range
1843 real(real64) :: factor,factor2,dx,absci,p0gauss,p0_cell
1844 real(real64) :: a1,a2,a3
1845 integer :: n_scf,nker1,nker2,nker3
1846 integer :: i_gauss,n_range,n_cell,istart,iend,istart1,istart2,iend1,iend2
1847 integer :: i,n_iter,i1,i2,i3,i_kern
1848 integer :: i01,i02,i03,n1h,n2h,n3h
1849
1850 push_sub(par_build_kernel)
1851
1852 !Number of integration points : 2*itype_scf*n_points
1853 n_scf=2*itype_scf*n_points
1854 !Set karray
1855 !dimension test
1856
1857 !here we must set the dimensions for the fft part, starting from the nfft
1858 !remember that actually nfft2 is associated with n03 and viceversa
1859
1860 !dimensions that define the center of symmetry
1861 n1h=nfft1/2
1862 n2h=nfft2/2
1863 n3h=nfft3/2
1864
1865 !Auxiliary dimensions only for building the FFT part
1866 nker1=nfft1
1867 nker2=nfft2
1868 nker3=nfft3/2+1
1869
1870 !adjusting the last two dimensions to be multiples of nproc
1871 do
1872 if (modulo(nker2,nproc) == 0) exit
1873 nker2=nker2+1
1874 end do
1875 do
1876 if (modulo(nker3,nproc) == 0) exit
1877 nker3=nker3+1
1878 end do
1879
1880 !this will be the array of the kernel in the real space
1881 safe_allocate(karray(1:nker1,1:nfft3,1:nker2/nproc))
1882
1883 !defining proper extremes for the calculation of the
1884 !local part of the kernel
1885
1886 istart=iproc*nker2/nproc+1
1887 iend=min((iproc+1)*nker2/nproc,n2h+n03)
1888
1889 istart1=istart
1890 if (iproc == 0) istart1=n2h-n03+2
1891
1892 iend2=iend
1893
1894 iend1=n2h
1895 istart2=n2h+1
1896 if (istart > n2h) then
1897 iend1=istart1-1
1898 istart2=istart
1899 end if
1900 if (iend <= n2h) then
1901 istart2=iend2+1
1902 iend1=iend
1903 end if
1904
1905!!!!!START KERNEL CONSTRUCTION
1906 ! if (iproc == 0) then
1907 ! write(unit=*,fmt="(1x,a,i0,a)") &
1908 ! "Build the kernel in parallel using a sum of ",N_GAUSS," gaussians"
1909 ! write(unit=*,fmt="(1x,a,i0,a)") &
1910 ! "Use interpolating scaling functions of ",itype_scf," order"
1911 ! end if
1912
1913 safe_allocate(x_scf(0:n_scf))
1914 safe_allocate(y_scf(0:n_scf))
1915
1916 !Build the scaling function
1917 call scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
1918 !Step grid for the integration
1919 dx = real(n_range, real64)/real(n_scf, real64)
1920 !Extend the range (no more calculations because fill in by 0.0_8)
1921 n_cell = max(n01,n02,n03)
1922 n_range = max(n_cell,n_range)
1923
1924 !Allocations
1925 safe_allocate(kernel_scf(-n_range:n_range))
1926 safe_allocate(kern_1_scf(-n_range:n_range))
1927
1928 !Lengthes of the box (use FFT dimension)
1929 a1 = hgrid * real(n01, real64)
1930 a2 = hgrid * real(n02, real64)
1931 a3 = hgrid * real(n03, real64)
1932
1933 x_scf(:) = hgrid * x_scf(:)
1934 y_scf(:) = 1._real64/hgrid * y_scf(:)
1935 dx = hgrid * dx
1936 !To have a correct integration
1937 p0_cell = p0_ref/(hgrid*hgrid)
1938
1939 !Initialization of the gaussian (Beylkin)
1940 call gequad(n_gauss,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss)
1941 !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
1942 !(biggest length in the cube)
1943 !We divide the p_gauss by a_range**2 and a_gauss by a_range
1944 a_range = sqrt(a1*a1+a2*a2+a3*a3)
1945 factor = 1._real64/a_range
1946 !factor2 = factor*factor
1947 factor2 = 1._real64/(a1*a1+a2*a2+a3*a3)
1948 do i_gauss=1,n_gauss
1949 p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
1950 end do
1951 do i_gauss=1,n_gauss
1952 w_gauss(i_gauss) = factor*w_gauss(i_gauss)
1953 end do
1954
1955 karray(:,:,:) = 0.0_8
1956 !Use in this order (better for accuracy).
1957 loop_gauss: do i_gauss = n_gauss, 1, -1
1958 !Gaussian
1959 pgauss = p_gauss(i_gauss)
1960
1961 !We calculate the number of iterations to go from pgauss to p0_ref
1962 n_iter = nint((log(pgauss) - log(p0_cell))/log(4._real64))
1963 if (n_iter <= 0) then
1964 n_iter = 0
1965 p0gauss = pgauss
1966 else
1967 p0gauss = pgauss/4._real64**n_iter
1968 end if
1969
1970 !Stupid integration
1971 !Do the integration with the exponential centered in i_kern
1972 kernel_scf(:) = 0.0_8
1973 do i_kern=0,n_range
1974 kern = 0.0_8
1975 do i=0,n_scf
1976 absci = x_scf(i) - real(i_kern, real64)*hgrid
1977 absci = absci*absci
1978 kern = kern + y_scf(i)*exp(-p0gauss*absci)*dx
1979 end do
1980 kernel_scf(i_kern) = kern
1981 kernel_scf(-i_kern) = kern
1982 if (abs(kern) < 1.d-18) then
1983 !Too small not useful to calculate
1984 exit
1985 end if
1986 end do
1987
1988 !Start the iteration to go from p0gauss to pgauss
1989 call scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf)
1990
1991 !Add to the kernel (only the local part)
1992
1993 do i3=istart1,iend1
1994 i03 = n2h - i3 + 1
1995 do i2 = 1,n02
1996 i02 = i2-1
1997 do i1 = 1,n01
1998 i01 = i1-1
1999 karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* &
2000 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
2001 end do
2002 end do
2003 end do
2004 do i3=istart2,iend2
2005 i03 = i3 - n2h -1
2006 do i2 = 1,n02
2007 i02 = i2-1
2008 do i1 = 1,n01
2009 i01 = i1-1
2010 karray(i1+n1h,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1) + w_gauss(i_gauss)* &
2011 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
2012 end do
2013 end do
2014 end do
2015
2016
2017 end do loop_gauss
2018
2019 !Build the kernel in the real space as an even function, thus having a real FFT
2020
2021 do i3=istart1,iend2
2022 do i2 = 1,n02
2023 do i1=2,n01
2024 karray(n1h+2-i1,i2+n3h,i3-istart+1) = karray(i1+n1h,i2+n3h,i3-istart+1)
2025 end do
2026 end do
2027 do i2=2,n02
2028 do i1 = 1,nker1
2029 karray(i1,n3h+2-i2,i3-istart+1) = karray(i1,i2+n3h,i3-istart+1)
2030 end do
2031 end do
2032 end do
2033
2034
2035 !De-allocations
2036 safe_deallocate_a(kernel_scf)
2037 safe_deallocate_a(kern_1_scf)
2038 safe_deallocate_a(x_scf)
2039 safe_deallocate_a(y_scf)
2040
2041!!!!END KERNEL CONSTRUCTION
2042
2043 safe_allocate(karrayfour(1:2, 1:nker1, 1:nker2, 1:nker3/nproc))
2044
2045 ! if (iproc == 0) print *,"Do a 3D PHalFFT for the kernel"
2046
2047 call kernelfft(nfft1,nfft2,nfft3,nker1,nker2,nker3,nproc,iproc,karray,karrayfour,comm)
2048
2049 !Reconstruct the real kernel FFT
2050 do i3 = 1,n3k/nproc
2051 do i2 = 1,n2k
2052 do i1 = 1,n1k
2053 karrayoutloc(i1,i2,i3)=karrayfour(1,i1,i2,i3)
2054 end do
2055 end do
2056 end do
2057
2058 !De-allocations
2059 safe_deallocate_a(karray)
2060 safe_deallocate_a(karrayfour)
2061 pop_sub(par_build_kernel)
2062 end subroutine par_build_kernel
2063 !!***
2064
2065 ! -------------------------------------------------------------------------
2066
2067 subroutine gequad(n_gauss, p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
2068 integer, intent(in) :: n_gauss
2069 real(real64), intent(out) :: p_gauss(:)
2070 real(real64), intent(out) :: w_gauss(:)
2071 real(real64), intent(out) :: ur_gauss
2072 real(real64), intent(out) :: dr_gauss
2073 real(real64), intent(out) :: acc_gauss
2074
2075 integer :: iunit, i, idx
2076
2077 push_sub(gequad)
2078
2079 ur_gauss = 1.0_8
2080 dr_gauss = 1.0e-08_8
2081 acc_gauss = 1.0e-08_8
2082
2083 iunit = io_open(trim(conf%share)//'/gequad.data', action = 'read', status = 'old', die = .true.)
2084
2085 do i = 1, n_gauss
2086 read(iunit, *) idx, p_gauss(i), w_gauss(i)
2087 end do
2088
2089 call io_close(iunit)
2090
2091 pop_sub(gequad)
2092 end subroutine gequad
2093
2094end module poisson_isf_oct_m
2095
2096!! Local Variables:
2097!! mode: f90
2098!! coding: utf-8
2099!! End:
double log(double __x) __attribute__((__nothrow__
double exp(double __x) __attribute__((__nothrow__
double sqrt(double __x) __attribute__((__nothrow__
Definition: io.F90:114
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:118
subroutine symm_ind(nd1, nd2, nd3, i1, i2, i3, ind)
subroutine zarray_in(n01, n02, n03, nd1, nd2, nd3, density, zarray)
subroutine karrayhalf_in(n01, n02, n03, n1k, n2k, n3k, nfft1, nfft2, nfft3, nd1, nd2, nd3, kernel, karrayhalf)
subroutine pconvxc_off(m1, m2, m3, n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, iproc, nproc, rhopot, kernelloc, hgrid, comm)
subroutine, public poisson_isf_end(this)
subroutine build_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, itype_scf, karrayout)
integer, parameter serial
subroutine norm_ind(nd1, nd2, nd3, i1, i2, i3, ind)
subroutine enterdensity(rhopot, m1, m2, m3, md1, md2, md3, iproc, nproc, zf)
subroutine par_psolver_kernel(n01, n02, n03, nd1, nd2, nd3, hgrid, kernelLOC, rhopot, iproc, nproc, comm)
integer, parameter n_cnf
integer, parameter world
subroutine kernel_application(n1, n2, n3, nd1h, nd2, nd3, nfft1, nfft2, nfft3, zarray, karray, inzee)
subroutine, public poisson_isf_init(this, namespace, mesh, cube, all_nodes_comm, init_world)
subroutine, public poisson_isf_solve(this, mesh, cube, pot, rho, all_nodes, sm)
subroutine gequad(n_gauss, p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
subroutine calculate_dimensions(n01, n02, n03, nfft1, nfft2, nfft3)
subroutine par_build_kernel(n01, n02, n03, nfft1, nfft2, nfft3, n1k, n2k, n3k, hgrid, itype_scf, iproc, nproc, comm, karrayoutLOC)
integer, parameter domain
subroutine zarray_out(n01, n02, n03, nd1, nd2, nd3, rhopot, zarray, factor)
subroutine par_calculate_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, md1, md2, md3, nd1, nd2, nd3, nproc)
subroutine psolver_kernel(n01, n02, n03, nfft1, nfft2, nfft3, hgrid, karray, rhopot)
subroutine kernel_recon(n1k, n2k, n3k, nfft1, nfft2, nfft3, nd1, nd2, nd3, zarray, karray)
These routines are part of the ISF poisson solver, eventually they will be integrated with the other ...
Definition: sgfft.F90:117
int true(void)