Octopus
fft.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!! Copyright (C) 2011 J. Alberdi-Rodriguez, P. Garcia RisueƱo, M. Oliveira
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
25module fft_oct_m
26 use, intrinsic :: iso_c_binding
27 use, intrinsic :: ieee_arithmetic
28
29 use accel_oct_m
30#ifdef HAVE_OPENCL
31 use cl
32#ifdef HAVE_CLFFT
33 use clfft
34#endif
35#endif
36 use fftw_oct_m
38 use debug_oct_m
39 use global_oct_m
40 use, intrinsic :: iso_fortran_env
45 use mpi_oct_m
47#ifdef HAVE_NFFT
48 use nfft_oct_m
49#endif
50#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
51 use omp_lib
52#endif
53 use parser_oct_m
54 use pfft_oct_m
56 use pnfft_oct_m
58 use types_oct_m
61
62 implicit none
63
64 private
65 public :: &
66 fft_t, &
69 fft_init, &
71 fft_end, &
72 fft_copy, &
74 pad_feq, &
82
83
85 integer, public, parameter :: &
86 FFT_NONE = 0, &
87 fft_real = 1, &
88 fft_complex = 2
89
90 integer, public, parameter :: &
91 FFTLIB_NONE = 0, &
92 fftlib_fftw = 1, &
93 fftlib_pfft = 2, &
94 fftlib_accel = 3, &
95 fftlib_nfft = 4, &
96 fftlib_pnfft = 5
97
98 integer, parameter :: &
99 FFT_MAX = 10, &
100 fft_null = -1
101
102 type fft_t
103 private
104 integer :: slot = 0
105
106 integer, public :: type
107 integer, public :: library
108
109 type(MPI_Comm) :: comm
110 integer :: rs_n_global(3)
111 integer :: fs_n_global(3)
112 integer :: rs_n(3)
113 integer :: fs_n(3)
114 integer :: rs_istart(1:3)
115 integer :: fs_istart(1:3)
116
117 integer, public :: stride_rs(1:3)
118 integer, public :: stride_fs(1:3)
119
120 type(c_ptr) :: planf
121 type(c_ptr) :: planb
122 !integer(ptrdiff_t_kind) :: pfft_planf !< PFFT plan for forward transform
123 !integer(ptrdiff_t_kind) :: pfft_planb !< PFFT plan for backward transform
124
127 real(real64), pointer, public :: drs_data(:,:,:)
128 complex(real64), pointer, public :: zrs_data(:,:,:)
129 complex(real64), pointer, public :: fs_data(:,:,:)
130#ifdef HAVE_CLFFT
131
132 type(clfftPlanHandle) :: cl_plan_fw
133 type(clfftPlanHandle) :: cl_plan_bw
134#endif
135 type(c_ptr) :: cuda_plan_fw
136 type(c_ptr) :: cuda_plan_bw
137#ifdef HAVE_NFFT
138 type(nfft_t), public :: nfft
139#endif
140 type(pnfft_t), public :: pnfft
141
142 logical, public :: aligned_memory
143 end type fft_t
144
145 interface dfft_forward
147 end interface dfft_forward
148
149 interface zfft_forward
151 end interface zfft_forward
152
153 interface dfft_backward
155 end interface dfft_backward
156
157 interface zfft_backward
159 end interface zfft_backward
160
161 logical, save, public :: fft_initialized = .false.
162 integer, save :: fft_refs(FFT_MAX)
163 type(fft_t), save :: fft_array(FFT_MAX)
164 logical :: fft_optimize
165 integer, save :: fft_prepare_plan
166 integer, public :: fft_default_lib = -1
167#ifdef HAVE_NFFT
168 type(nfft_t), save :: nfft_options
169#endif
170 type(pnfft_t), save :: pnfft_options
171
172 integer, parameter :: &
173 CUFFT_R2C = int(z'2a'), &
174 cufft_c2r = int(z'2c'), &
175 cufft_c2c = int(z'29'), &
176 cufft_d2z = int(z'6a'), &
177 cufft_z2d = int(z'6c'), &
178 cufft_z2z = int(z'69')
179
180contains
181
182 ! ---------------------------------------------------------
184 subroutine fft_all_init(namespace)
185 type(namespace_t), intent(in) :: namespace
186
187 integer :: ii, fft_default
188#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
189 integer :: iret
190#endif
192 push_sub(fft_all_init)
193
194 fft_initialized = .true.
196 !%Variable FFTOptimize
197 !%Type logical
198 !%Default yes
199 !%Section Mesh::FFTs
200 !%Description
201 !% Should <tt>octopus</tt> optimize the FFT dimensions?
202 !% This means that the mesh to which FFTs are applied is not taken to be as small
203 !% as possible: some points may be added to each direction in order to get a "good number"
204 !% for the performance of the FFT algorithm.
205 !% The best FFT grid dimensions are given by <math>2^a 3^b 5^c 7^d 11^e 13^f</math>
206 !% where <math>a,b,c,d</math> are arbitrary and <math>e,f</math> are 0 or 1.
207 !% (<a href=http://www.fftw.org/doc/Complex-DFTs.html>ref</a>).
208 !% In some cases, namely when using
209 !% the split-operator, or Suzuki-Trotter propagators, this option should be turned off.
210 !% For spatial FFTs in periodic directions, the grid is never optimized, but a warning will
211 !% be written if the number is not good, with a suggestion of a better one to use, so you
212 !% can try a different spacing if you want to get a good number.
213 !%End
214 call parse_variable(namespace, 'FFTOptimize', .true., fft_optimize)
215 do ii = 1, fft_max
216 fft_refs(ii) = fft_null
217 end do
218
219 !%Variable FFTPreparePlan
220 !%Type integer
221 !%Default fftw_measure
222 !%Section Mesh::FFTs
223 !%Description
224 !% The FFTs are performed in octopus with the help of <a href=http://www.fftw.org>FFTW</a> and similar packages.
225 !% Before doing the actual computations, this package prepares a "plan", which means that
226 !% the precise numerical strategy to be followed to compute the FFT is machine/compiler-dependent,
227 !% and therefore the software attempts to figure out which is this precise strategy (see the
228 !% FFTW documentation for details). This plan preparation, which has to be done for each particular
229 !% FFT shape, can be done exhaustively and carefully (slow), or merely estimated. Since this is
230 !% a rather critical numerical step, by default it is done carefully, which implies a longer initial
231 !% initialization, but faster subsequent computations. You can change this behaviour by changing
232 !% this <tt>FFTPreparePlan</tt> variable, and in this way you can force FFTW to do a fast guess or
233 !% estimation of which is the best way to perform the FFT.
234 !%Option fftw_measure 0
235 !% This plan implies a longer initialization, but involves a more careful analysis
236 !% of the strategy to follow, and therefore more efficient FFTs. A side effect of the runtime
237 !% choices is that this plan can introduce slight numerical fluctuations between runs.
238 !%Option fftw_estimate 64
239 !% This is the "fast initialization" scheme, in which the plan is merely guessed from "reasonable"
240 !% assumptions. This is the default option, as it guarantees stable results
241 !%Option fftw_patient 32
242 !% It is like fftw_measure, but considers a wider range of algorithms and often produces a
243 !% "more optimal" plan (especially for large transforms), but at the expense of several times
244 !% longer planning time (especially for large transforms).
245 !%Option fftw_exhaustive 8
246 !% It is like fftw_patient, but considers an even wider range of algorithms,
247 !% including many that we think are unlikely to be fast, to produce the most optimal
248 !% plan but with a substantially increased planning time.
249 !%End
250 call parse_variable(namespace, 'FFTPreparePlan', fftw_estimate, fft_prepare_plan)
251 if (.not. varinfo_valid_option('FFTPreparePlan', fft_prepare_plan)) then
252 call messages_input_error(namespace, 'FFTPreparePlan')
253 end if
255 !%Variable FFTLibrary
256 !%Type integer
257 !%Section Mesh::FFTs
258 !%Default fftw
259 !%Description
260 !% (experimental) You can select the FFT library to use.
261 !%Option fftw 1
262 !% Uses FFTW3 library.
263 !%Option pfft 2
264 !% (experimental) Uses PFFT library, which has to be linked.
265 !%Option accel 3
266 !% (experimental) Uses a GPU accelerated library. This only
267 !% works if Octopus was compiled with Cuda or OpenCL support.
268 !%End
269 fft_default = fftlib_fftw
270 if(accel_is_enabled()) then
271 fft_default = fftlib_accel
272 end if
273 call parse_variable(namespace, 'FFTLibrary', fft_default, fft_default_lib)
274
275 if (fft_default_lib == fftlib_accel) then
276#if ! (defined(HAVE_CLFFT) || defined(HAVE_CUDA))
277 call messages_write('You have selected the Accelerated FFT, but Octopus was compiled', new_line = .true.)
278 call messages_write('without clfft (OpenCL) or Cuda support.')
279 call messages_fatal()
280#endif
281 if (.not. accel_is_enabled()) then
282 call messages_write('You have selected the accelerated FFT, but acceleration is disabled.')
283 call messages_fatal()
284 end if
285 end if
286
287#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
288 if (omp_get_max_threads() > 1) then
289
290 call messages_write('Info: Initializing Multi-threaded FFTW')
291 call messages_info()
292
293 iret = fftw_init_threads()
294 if (iret == 0) then
295 call messages_write('Initialization of FFTW3 threads failed.')
296 call messages_fatal()
297 end if
298 call fftw_plan_with_nthreads(omp_get_max_threads())
299
300 end if
301#endif
302#ifdef HAVE_NFFT
303 call nfft_guru_options(nfft_options, namespace)
304#endif
305 call pnfft_guru_options(pnfft_options, namespace)
306
307 pop_sub(fft_all_init)
308 end subroutine fft_all_init
309
310
311 ! ---------------------------------------------------------
313 subroutine fft_all_end()
314 integer :: ii
315
316 push_sub(fft_all_end)
317
318 do ii = 1, fft_max
319 if (fft_refs(ii) /= fft_null) then
320 call fft_end(fft_array(ii))
321 end if
322 end do
323
324#ifdef HAVE_PFFT
325 call pfft_cleanup()
326#endif
327
328#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
329 call fftw_cleanup_threads()
330#else
331 call fftw_cleanup()
332#endif
333
334 fft_initialized = .false.
335
336 pop_sub(fft_all_end)
337 end subroutine fft_all_end
338
339 ! ---------------------------------------------------------
340 subroutine fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
341 type(fft_t), intent(inout) :: this
342 integer, intent(inout) :: nn(3)
343 integer, intent(in) :: dim
344 integer, intent(in) :: type
345 integer, intent(in) :: library
346 logical, intent(in) :: optimize(3)
347 integer, intent(in) :: optimize_parity(3)
349 type(mpi_comm), optional, intent(out) :: comm
350 type(mpi_grp_t), optional, intent(in) :: mpi_grp
351 logical, optional :: use_aligned
352
353 integer :: ii, jj, fft_dim, idir, column_size, row_size, n3
354 integer :: n_1, n_2, n_3, nn_temp(3)
355 integer :: library_
356 type(mpi_grp_t) :: mpi_grp_
357 integer(int64) :: number_points, alloc_size
358
359#ifdef HAVE_CLFFT
360 real(real64) :: scale
361 integer :: status
362#endif
363#ifdef HAVE_PFFT
364 integer :: ierror
365#endif
366
367 push_sub(fft_init)
368
369 assert(fft_initialized)
370
371 assert(type == fft_real .or. type == fft_complex)
372
373 mpi_grp_ = mpi_world
374 if (present(mpi_grp)) mpi_grp_ = mpi_grp
375
376 this%aligned_memory = optional_default(use_aligned, .false.)
377
378 ! First, figure out the dimensionality of the FFT.
379 fft_dim = 0
380 do ii = 1, dim
381 if (nn(ii) <= 1) exit
382 fft_dim = fft_dim + 1
383 end do
384
385 if (fft_dim == 0) then
386 message(1) = "Internal error in fft_init: apparently, a 1x1x1 FFT is required."
387 call messages_fatal(1)
388 end if
389
390 if (fft_dim > 3) call messages_not_implemented('FFT for dimension > 3')
391
392 library_ = library
393 nn_temp(1:fft_dim) = nn(1:fft_dim)
394
395 select case (library_)
396 case (fftlib_accel)
397 ! FFT optimization
398 if(any(optimize_parity(1:fft_dim) > 1)) then
399 message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."
400 call messages_fatal(1)
401 end if
402
403 do ii = 1, fft_dim
404 nn_temp(ii) = fft_size(nn(ii), (/2, 3, 5, 7/), optimize_parity(ii))
405 if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)
406 end do
407
408 ! if we can't optimize, in some cases we can't use the library
409 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then
410#ifdef HAVE_CLFFT
411 call messages_write('Invalid grid size for accel fft. FFTW will be used instead.')
412 call messages_warning()
413 library_ = fftlib_fftw
414#endif
415 end if
416
418
419 do ii = 1, fft_dim
420 !NFFT likes even grids
421 !The underlying FFT grids are optimized inside the nfft_init routine
422 if (int(nn(ii)/2)*2 /= nn(ii) .and. (fft_optimize .and. optimize(ii)))&
423 nn(ii)=nn(ii)+1
424 end do
425
426 case (fftlib_pnfft)
427
428 do ii = 1, fft_dim
429 !also PNFFT likes even grids
430 if (int(nn(ii)/2)*2 /= nn(ii)) nn(ii) = nn(ii) + 1
431 end do
432
433 if (fft_dim < 3) then
434 call messages_not_implemented('PNFFT support for dimension < 3')
435 end if
436
437 case default
438
439 if (fft_dim < 3 .and. library_ == fftlib_pfft) then
440 call messages_not_implemented('PFFT support for dimension < 3')
441 end if
442
443 ! FFT optimization
444 if (any(optimize_parity(1:fft_dim) > 1)) then
445 message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."
446 call messages_fatal(1)
447 end if
448
449 do ii = 1, fft_dim
450 call loct_fft_optimize(nn_temp(ii), optimize_parity(ii))
451 if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)
452 end do
453
454 end select
455
456 ! find out if fft has already been allocated
457 jj = 0
458 do ii = fft_max, 1, -1
459 if (fft_refs(ii) /= fft_null) then
460 if (all(nn(1:dim) == fft_array(ii)%rs_n_global(1:dim)) .and. type == fft_array(ii)%type &
461 .and. library_ == fft_array(ii)%library .and. library_ /= fftlib_nfft &
462 .and. library_ /= fftlib_pnfft &
463 .and. this%aligned_memory .eqv. fft_array(ii)%aligned_memory) then
464
465 ! NFFT and PNFFT plans are always allocated from scratch since they
466 ! are very likely to be different
467 this = fft_array(ii) ! return a copy
468 fft_refs(ii) = fft_refs(ii) + 1 ! increment the ref count
469 if (present(comm)) comm = fft_array(ii)%comm ! also return the MPI communicator
470 pop_sub(fft_init)
471 return
472 end if
473 else
474 jj = ii
475 end if
476 end do
477
478 if (jj == 0) then
479 message(1) = "Not enough slots for FFTs."
480 message(2) = "Please increase FFT_MAX in fft.F90 and recompile."
481 call messages_fatal(2)
482 end if
483
484 ! jj now contains an empty slot
485 fft_refs(jj) = 1
486 fft_array(jj)%slot = jj
487 fft_array(jj)%type = type
488 fft_array(jj)%library = library_
489 fft_array(jj)%rs_n_global(1:dim) = nn(1:dim)
490 fft_array(jj)%rs_n_global(dim+1:) = 1
491 nullify(fft_array(jj)%drs_data)
492 nullify(fft_array(jj)%zrs_data)
493 nullify(fft_array(jj)%fs_data)
494
495 fft_array(jj)%aligned_memory = this%aligned_memory
496
497 ! Initialize parallel communicator
498 select case (library_)
499 case (fftlib_pfft)
500#ifdef HAVE_PFFT
501 call pfft_init()
502
503 call pfft_decompose(mpi_grp_%size, column_size, row_size)
504
505 ierror = pfft_create_procmesh_2d(mpi_grp_%comm%MPI_VAL, column_size, row_size, fft_array(jj)%comm%MPI_VAL)
506
507 if (ierror /= 0) then
508 message(1) = "The number of rows and columns in PFFT processor grid is not equal to "
509 message(2) = "the number of processor in the MPI communicator."
510 message(3) = "Please check it."
511 call messages_fatal(3)
512 end if
513#endif
514
515 case (fftlib_pnfft)
516#ifdef HAVE_PNFFT
517 call pnfft_init_procmesh(fft_array(jj)%pnfft, mpi_grp_, fft_array(jj)%comm)
518#endif
519 case default
520 fft_array(jj)%comm = mpi_comm_undefined
521
522 end select
523
524 if (present(comm)) comm = fft_array(jj)%comm
525
526 ! Get dimentions of arrays
527 select case (library_)
528 case (fftlib_fftw)
529 call fftw_get_dims(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global)
530 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
531 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
532 fft_array(jj)%rs_istart = 1
533 fft_array(jj)%fs_istart = 1
534
535 if (this%aligned_memory) then
536 call fftw_alloc_memory(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global, &
537 fft_array(jj)%drs_data, fft_array(jj)%zrs_data, fft_array(jj)%fs_data)
538 end if
539
540 case (fftlib_pfft)
541#ifdef HAVE_PFFT
542 call pfft_get_dims(fft_array(jj)%rs_n_global, comm%MPI_VAL, type == fft_real, &
543 alloc_size, fft_array(jj)%fs_n_global, fft_array(jj)%rs_n, &
544 fft_array(jj)%fs_n, fft_array(jj)%rs_istart, fft_array(jj)%fs_istart)
545 !write(*,"(6(A,3I4,/),A,I10,/)") "PFFT: rs_n_global = ",fft_array(jj)%rs_n_global,&
546 ! "fs_n_global = ",fft_array(jj)%fs_n_global,&
547 ! "rs_n = ",fft_array(jj)%rs_n,&
548 ! "fs_n = ",fft_array(jj)%fs_n,&
549 ! "rs_istart = ",fft_array(jj)%rs_istart,&
550 ! "fs_istart = ",fft_array(jj)%fs_istart,&
551 ! "alloc_size = ",alloc_size
552#endif
553
554 ! Allocate memory. Note that PFFT may need extra memory space
555 ! and that in fourier space the function will be transposed
556 if (type == fft_real) then
557 n_1 = max(1, fft_array(jj)%rs_n(1))
558 n_2 = max(1, fft_array(jj)%rs_n(2))
559 n_3 = max(1, fft_array(jj)%rs_n(3))
560
561 n3 = ceiling(real(2*alloc_size)/real(n_1*n_2))
562 safe_allocate(fft_array(jj)%drs_data(1:n_1, 1:n_2, 1:n3))
563 else
564 n3 = ceiling(real(alloc_size)/real(fft_array(jj)%rs_n(1)*fft_array(jj)%rs_n(2)))
565 safe_allocate(fft_array(jj)%zrs_data(1:fft_array(jj)%rs_n(1), 1:fft_array(jj)%rs_n(2), 1:n3))
566 end if
567
568 n_1 = max(1, fft_array(jj)%fs_n(1))
569 n_2 = max(1, fft_array(jj)%fs_n(2))
570 n_3 = max(1, fft_array(jj)%fs_n(3))
571
572 n3 = ceiling(real(alloc_size)/real(n_3*n_1))
573 safe_allocate(fft_array(jj)%fs_data(1:n_3, 1:n_1, 1:n3))
574
575 case (fftlib_accel)
576 call fftw_get_dims(fft_array(jj)%rs_n_global, (type == fft_real), fft_array(jj)%fs_n_global)
577 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
578 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
579 fft_array(jj)%rs_istart = 1
580 fft_array(jj)%fs_istart = 1
581
582 case (fftlib_nfft)
583 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
584 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
585 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
586 fft_array(jj)%rs_istart = 1
587 fft_array(jj)%fs_istart = 1
588
589 case (fftlib_pnfft)
590 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
591 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
592 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
593 fft_array(jj)%rs_istart = 1
594 fft_array(jj)%fs_istart = 1
595 ! indices partition is performed together with the plan preparation
596
597
598 end select
599
600 ! Prepare plans
601 select case (library_)
602 case (fftlib_fftw)
603 if (.not. this%aligned_memory) then
604 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
605 type == fft_real, fftw_forward, fft_prepare_plan+fftw_unaligned)
606 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
607 type == fft_real, fftw_backward, fft_prepare_plan+fftw_unaligned)
608 else
609 if (type == fft_real) then
610 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
611 type == fft_real, fftw_forward, fft_prepare_plan, &
612 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
613 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
614 type == fft_real, fftw_backward, fft_prepare_plan, &
615 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
616 else
617 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
618 type == fft_real, fftw_forward, fft_prepare_plan, &
619 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
620 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
621 type == fft_real, fftw_backward, fft_prepare_plan, &
622 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
623 end if
624 end if
625
626 case (fftlib_nfft)
627#ifdef HAVE_NFFT
628 call nfft_copy_info(this%nfft,fft_array(jj)%nfft) !copy default parameters set in the calling routine
629 call nfft_init(fft_array(jj)%nfft, nfft_options, fft_array(jj)%rs_n_global, &
630 fft_dim, fft_array(jj)%rs_n_global, optimize = .true.)
631#endif
632 case (fftlib_pfft)
633#ifdef HAVE_PFFT
634 if (type == fft_real) then
635 call pfft_prepare_plan_r2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%drs_data, &
636 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
637 call pfft_prepare_plan_c2r(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &
638 fft_array(jj)%drs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
639 else
640 call pfft_prepare_plan_c2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%zrs_data, &
641 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
642 call pfft_prepare_plan_c2c(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &
643 fft_array(jj)%zrs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
644 end if
645#endif
646 case (fftlib_pnfft)
647#ifdef HAVE_PNFFT
648 call pnfft_copy_params(this%pnfft, fft_array(jj)%pnfft) ! pass default parameters like in NFFT
649
650 ! NOTE:
651 ! PNFFT (likewise NFFT) breaks the symmetry between real space and Fourier space
652 ! by allowing the possibility to have an unstructured grid in rs and by
653 ! using different parallelizations (the rs is transposed w.r.t. fs).
654 ! Octopus, in fourier_space_m, uses the convention for which the mapping
655 ! between rs and fs is done with a forward transform (and fs->rs with backward).
656 ! This is exactly the opposite of the definitions used by all the libraries
657 ! performing FFTs (PNFFT and NFFT included) [see e.g. M. Frigo, and S. G. Johnson, Proc.
658 ! IEEE 93, 216-231 (2005)].
659 ! While this leads to no problem on ordinary ffts where fs and rs can be exchanged
660 ! it does makes a fundamental difference for PNFFT (for some reason I don`t know NFFT
661 ! is still symmetric).
662 ! Therefore, in order to perform rs->fs tranforms with PNFFT one should use the
663 ! backward transform.
664
665 call pnfft_init_plan(fft_array(jj)%pnfft, pnfft_options, comm, fft_array(jj)%fs_n_global, &
666 fft_array(jj)%fs_n, fft_array(jj)%fs_istart, fft_array(jj)%rs_n, fft_array(jj)%rs_istart)
667#endif
668 case (fftlib_accel)
669
670 fft_array(jj)%stride_rs(1) = 1
671 fft_array(jj)%stride_fs(1) = 1
672 do ii = 2, fft_dim
673 fft_array(jj)%stride_rs(ii) = fft_array(jj)%stride_rs(ii - 1)*fft_array(jj)%rs_n(ii - 1)
674 fft_array(jj)%stride_fs(ii) = fft_array(jj)%stride_fs(ii - 1)*fft_array(jj)%fs_n(ii - 1)
675 end do
676
677#ifdef HAVE_CUDA
678 if (type == fft_real) then
679 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
680 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_d2z, &
681 accel%cuda_stream)
682 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
683 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2d, &
684 accel%cuda_stream)
685 else
686 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
687 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &
688 accel%cuda_stream)
689 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
690 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &
691 accel%cuda_stream)
692 end if
693#endif
694
695#ifdef HAVE_CLFFT
696
697 ! create the plans
698 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_fw, accel%context%cl_context, &
699 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
700 if (status /= clfft_success) call clfft_print_error(status, 'clfftCreateDefaultPlan')
701
702 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_bw, accel%context%cl_context, &
703 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
704 if (status /= clfft_success) call clfft_print_error(status, 'clfftCreateDefaultPlan')
705
706 ! set precision
707
708 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
709 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
710
711 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
712 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
713
714 ! set number of transforms to 1
715
716 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_fw, 1_real64, status)
717 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanBatchSize')
718
719 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_bw, 1_real64, status)
720 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanBatchSize')
721
722 ! set the type precision to double
723
724 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
725 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
726
727 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
728 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
729
730
731 ! set the layout
732
733 if (type == fft_real) then
734
735 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_real, clfft_hermitian_interleaved, status)
736 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
737
738 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_hermitian_interleaved, clfft_real, status)
739 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
740
741 else
742
743 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_complex_interleaved, clfft_complex_interleaved, status)
744 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
745
746 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_complex_interleaved, clfft_complex_interleaved, status)
747 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
748
749 end if
750
751 ! set the plans as at out of place
752
753 call clfftsetresultlocation(fft_array(jj)%cl_plan_fw, clfft_outofplace, status)
754 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetResultLocation')
755
756 call clfftsetresultlocation(fft_array(jj)%cl_plan_bw, clfft_outofplace, status)
757 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetResultLocation')
758
759 ! the strides
760
761 call clfftsetplaninstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
762 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanInStride')
763
764 call clfftsetplanoutstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
765 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanOutStride')
766
767 call clfftsetplaninstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
768 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanInStride')
769
770 call clfftsetplanoutstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
771 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanOutStride')
772
773 ! set the scaling factors
774
775 scale = 1.0_real64/(product(real(fft_array(jj)%rs_n_global(1:fft_dim), real64)))
776
777 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_forward, 1.0_real64, status)
778 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
779
780 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_backward, scale, status)
781 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
782
783 if (type == fft_real) then
784
785 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, 1.0_real64, status)
786 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
787
788 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, scale, status)
789 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
790
791 else
792
793 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, scale, status)
794 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
795
796 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, 1.0_real64, status)
797 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
798
799 end if
800
801 ! now 'bake' the plans, this signals that the plans are ready to use
802
803 call clfftbakeplan(fft_array(jj)%cl_plan_fw, accel%command_queue, status)
804 if (status /= clfft_success) call clfft_print_error(status, 'clfftBakePlan')
805
806 call clfftbakeplan(fft_array(jj)%cl_plan_bw, accel%command_queue, status)
807 if (status /= clfft_success) call clfft_print_error(status, 'clfftBakePlan')
808
809#endif
810
811 case default
812 call messages_write('Invalid FFT library.')
813 call messages_fatal()
814 end select
815
816 this = fft_array(jj)
817
818 ! Write information
819 if (.not. (library_ == fftlib_nfft .or. library_ == fftlib_pnfft)) then
820 call messages_write('Info: FFT grid dimensions =')
821 number_points = 1
822 do idir = 1, dim
823 call messages_write(fft_array(jj)%rs_n_global(idir))
824 if (idir < dim) call messages_write(" x ")
825 ! do the multiplication in a integer(int64) to avoid overflow for large grids
826 number_points = number_points * fft_array(jj)%rs_n_global(idir)
827 end do
828 call messages_new_line()
829
830 call messages_write(' Total grid size =')
831 call messages_write(number_points)
832 call messages_write(' (')
833 call messages_write(number_points*8.0_real64, units = unit_megabytes, fmt = '(f9.1)')
834 call messages_write(' )')
835 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then
836 call messages_new_line()
837 call messages_write(' Inefficient FFT grid. A better grid would be: ')
838 do idir = 1, fft_dim
839 call messages_write(nn_temp(idir))
840 end do
841 end if
842 call messages_info()
843 end if
844
845 select case (library_)
846 case (fftlib_pfft)
847 write(message(1),'(a)') "Info: FFT library = PFFT"
848 write(message(2),'(a)') "Info: PFFT processor grid"
849 write(message(3),'(a, i9)') " No. of processors = ", mpi_grp_%size
850 write(message(4),'(a, i9)') " No. of columns in the proc. grid = ", column_size
851 write(message(5),'(a, i9)') " No. of rows in the proc. grid = ", row_size
852 write(message(6),'(a, i9)') " The size of integer is = ", c_intptr_t
853 call messages_info(6)
854
855 case (fftlib_pnfft)
856#ifdef HAVE_PNFFT
857 call messages_write("Info: FFT library = PNFFT")
858 call messages_info()
859 call pnfft_write_info(fft_array(jj)%pnfft)
860#endif
861 case (fftlib_nfft)
862#ifdef HAVE_NFFT
863 call messages_write("Info: FFT library = NFFT")
864 call messages_info()
865 call nfft_write_info(fft_array(jj)%nfft)
866#endif
867 end select
868
869 pop_sub(fft_init)
870 end subroutine fft_init
871
872 ! ---------------------------------------------------------
876 subroutine fft_init_stage1(this, namespace, XX, nn)
877 type(fft_t), intent(inout) :: this
880 type(namespace_t), intent(in) :: namespace
881 real(real64), intent(in) :: xx(:,:)
882 integer, optional, intent(in) :: nn(:)
883
884 integer :: slot
885
886 push_sub(fft_init_stage1)
887
888 assert(size(xx,2) == 3)
889
890 slot = this%slot
891 select case (fft_array(slot)%library)
892 case (fftlib_fftw)
893 !Do nothing
894 case (fftlib_nfft)
895#ifdef HAVE_NFFT
896 assert(present(nn))
897 call nfft_precompute(fft_array(slot)%nfft, &
898 xx(1:nn(1),1), xx(1:nn(2),2), xx(1:nn(3),3))
899#endif
900 case (fftlib_pfft)
901 !Do nothing
902 case (fftlib_accel)
903 !Do nothing
904 case (fftlib_pnfft)
905#ifdef HAVE_PNFFT
906 call pnfft_set_sp_nodes(fft_array(slot)%pnfft, namespace, xx)
907#endif
908 case default
909 call messages_write('Invalid FFT library.')
910 call messages_fatal()
911 end select
912
913
914
915 pop_sub(fft_init_stage1)
916 end subroutine fft_init_stage1
917 ! ---------------------------------------------------------
918 subroutine fft_end(this)
919 type(fft_t), intent(inout) :: this
920
921 integer :: ii
922#ifdef HAVE_CLFFT
923 integer :: status
924#endif
925
926 push_sub(fft_end)
927
928 ii = this%slot
929 if (fft_refs(ii) == fft_null) then
930 message(1) = "Trying to deallocate FFT that has not been allocated."
931 call messages_warning(1)
932 else
933 if (fft_refs(ii) > 1) then
934 fft_refs(ii) = fft_refs(ii) - 1
935 else
936 select case (fft_array(ii)%library)
937 case (fftlib_fftw)
938 call fftw_destroy_plan(fft_array(ii)%planf)
939 call fftw_destroy_plan(fft_array(ii)%planb)
940
941 if (this%aligned_memory) then
942 call fftw_free_memory(this%type == fft_real, &
943 fft_array(ii)%drs_data, fft_array(ii)%zrs_data, fft_array(ii)%fs_data)
944 end if
945
946 case (fftlib_pfft)
947#ifdef HAVE_PFFT
948 call pfft_destroy_plan(fft_array(ii)%planf)
949 call pfft_destroy_plan(fft_array(ii)%planb)
950#endif
951 safe_deallocate_p(fft_array(ii)%drs_data)
952 safe_deallocate_p(fft_array(ii)%zrs_data)
953 safe_deallocate_p(fft_array(ii)%fs_data)
954
955 case (fftlib_accel)
956#ifdef HAVE_CUDA
957 call cuda_fft_destroy(fft_array(ii)%cuda_plan_fw)
958 call cuda_fft_destroy(fft_array(ii)%cuda_plan_bw)
959#endif
960#ifdef HAVE_CLFFT
961 call clfftdestroyplan(fft_array(ii)%cl_plan_fw, status)
962 call clfftdestroyplan(fft_array(ii)%cl_plan_bw, status)
963#endif
964
965 case (fftlib_nfft)
966#ifdef HAVE_NFFT
967 call nfft_end(fft_array(ii)%nfft)
968#endif
969 case (fftlib_pnfft)
970#ifdef HAVE_PNFFT
971 call pnfft_end(fft_array(ii)%pnfft)
972#endif
973 end select
974 fft_refs(ii) = fft_null
975 end if
976 end if
977 this%slot = 0
978
979 pop_sub(fft_end)
980 end subroutine fft_end
981
982 ! ---------------------------------------------------------
983 subroutine fft_copy(fft_i, fft_o)
984 type(fft_t), intent(in) :: fft_i
985 type(fft_t), intent(inout) :: fft_o
986
987 push_sub(fft_copy)
988
989 if (fft_o%slot > 0) then
990 call fft_end(fft_o)
991 end if
992 assert(fft_i%slot >= 1.and.fft_i%slot <= fft_max)
993 assert(fft_refs(fft_i%slot) > 0)
994
995 fft_o = fft_i
996 fft_refs(fft_i%slot) = fft_refs(fft_i%slot) + 1
997
998 pop_sub(fft_copy)
999 end subroutine fft_copy
1000
1001 ! ---------------------------------------------------------
1002 subroutine fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
1003 type(fft_t), intent(in) :: fft
1004 integer, intent(out) :: rs_n_global(1:3)
1005 integer, intent(out) :: fs_n_global(1:3)
1006 integer, intent(out) :: rs_n(1:3)
1007 integer, intent(out) :: fs_n(1:3)
1008 integer, intent(out) :: rs_istart(1:3)
1009 integer, intent(out) :: fs_istart(1:3)
1010
1011 integer :: slot
1012
1013 push_sub(fft_get_dims)
1014
1015 slot = fft%slot
1016 rs_n_global(1:3) = fft_array(slot)%rs_n_global(1:3)
1017 fs_n_global(1:3) = fft_array(slot)%fs_n_global(1:3)
1018 rs_n(1:3) = fft_array(slot)%rs_n(1:3)
1019 fs_n(1:3) = fft_array(slot)%fs_n(1:3)
1020 rs_istart(1:3) = fft_array(slot)%rs_istart(1:3)
1021 fs_istart(1:3) = fft_array(slot)%fs_istart(1:3)
1022
1023 pop_sub(fft_get_dims)
1024 end subroutine fft_get_dims
1025
1026 ! ---------------------------------------------------------
1028 pure function pad_feq(ii, nn, mode)
1029 integer, intent(in) :: ii,nn
1030 logical, intent(in) :: mode
1031 integer :: pad_feq
1032
1033 ! no push_sub: called too frequently
1034
1035 if (mode) then ! index to frequency number
1036 if (ii <= nn/2 + 1) then
1037 pad_feq = ii - 1
1038 else
1039 pad_feq = ii - nn - 1
1040 end if
1041 else
1042 if (ii >= 0) then
1043 pad_feq = ii + 1
1044 else
1045 pad_feq = ii + nn + 1
1046 end if
1047 end if
1048
1049 end function pad_feq
1050
1051 ! -------------------------------------------------------
1052
1053 integer function fft_size(size, factors, parity)
1054 integer, intent(in) :: size
1055 integer, intent(in) :: factors(:)
1056 integer, intent(in) :: parity
1057
1058 integer :: nfactors
1059 integer :: nondiv
1060 integer, allocatable :: exponents(:)
1061
1062 push_sub(fft_size)
1063
1064 nfactors = ubound(factors, dim = 1)
1065
1066 safe_allocate(exponents(1:nfactors))
1067
1068 fft_size = size
1069 do
1070 call get_exponents(fft_size, nfactors, factors, exponents, nondiv)
1071 if (nondiv == 1 .and. mod(fft_size, 2) == parity) exit
1072 fft_size = fft_size + 1
1073 end do
1074
1075 safe_deallocate_a(exponents)
1076
1077 pop_sub(fft_size)
1078 end function fft_size
1080 ! -------------------------------------------------------
1081
1082 subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
1083 integer, intent(in) :: num
1084 integer, intent(in) :: nfactors
1085 integer, intent(in) :: factors(:)
1086 integer, intent(out) :: exponents(:)
1087 integer, intent(out) :: nondiv
1088
1089 integer :: ifactor
1090
1091 push_sub(get_exponents)
1092
1093 nondiv = num
1094 do ifactor = 1, nfactors
1095 exponents(ifactor) = 0
1096 do
1097 if (mod(nondiv, factors(ifactor)) /= 0) exit
1098 nondiv = nondiv/factors(ifactor)
1099 exponents(ifactor) = exponents(ifactor) + 1
1100 end do
1101 end do
1102
1103 pop_sub(get_exponents)
1104 end subroutine get_exponents
1106
1107 ! ----------------------------------------------------------
1108
1109 subroutine fft_operation_count(fft)
1110 type(fft_t), intent(in) :: fft
1111
1112 real(real64) :: fullsize
1113
1114 push_sub(fft_operation_count)
1115
1116 fullsize = product(real(fft%fs_n(1:3), real64))
1117 call profiling_count_operations(5.0_real64*fullsize*log(fullsize)/log(m_two))
1118
1119 pop_sub(fft_operation_count)
1120 end subroutine fft_operation_count
1121
1122 !-----------------------------------------------------------------
1123 subroutine fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
1124 integer, intent(in) :: gg_in(:)
1125 real(real64), intent(in) :: temp(:)
1126 integer, intent(in) :: periodic_dim
1127 type(lattice_vectors_t), intent(in) :: latt
1128 real(real64), intent(in) :: qq(:)
1129 real(real64), intent(inout) :: gg(:)
1130 real(real64), intent(out) :: modg2
1131
1132 ! no PUSH_SUB, called too frequently
1133
1134 gg(1:3) = gg_in(1:3)
1135 gg(1:periodic_dim) = gg(1:periodic_dim) + qq(1:periodic_dim)
1136 gg(1:3) = gg(1:3) * temp(1:3)
1137 gg(1:3) = matmul(latt%klattice_primitive(1:3,1:3),gg(1:3))
1138 modg2 = sum(gg(1:3)**2)
1139
1140 end subroutine fft_gg_transform
1141
1142 ! ----------------------------------------------------------
1143
1146 real(real64) pure function fft_scaling_factor(fft) result(scaling_factor)
1147 type(fft_t), intent(in) :: fft
1148
1149 ! for the moment this factor is handled by the backwards transform for most libraries
1150 scaling_factor = m_one
1151
1152 select case (fft_array(fft%slot)%library)
1153 case (fftlib_accel)
1154#ifdef HAVE_CUDA
1155 scaling_factor = m_one/real(fft_array(fft%slot)%rs_n_global(1), real64)
1156 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(2), real64)
1157 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(3), real64)
1158#endif
1159 end select
1160
1161 end function fft_scaling_factor
1162
1163 ! ----------------------------------------------------------
1166 !
1167 ! Inspired by the routine bounds from Abinit
1168 real(real64) function fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq) result(ecut)
1169 integer, intent(in) :: box_dim(:)
1170 integer, intent(in) :: fs_istart(:)
1171 type(lattice_vectors_t), intent(in) :: latt
1172 real(real64), intent(in) :: gspacing(:)
1173 integer, intent(in) :: periodic_dim
1174 real(real64), intent(in) :: qq(:)
1175
1176 integer :: lx, ix, iy, iz, idir, idir2, idir3
1177 real(real64) :: dminsq, gg(3), modg2
1178 integer :: box_dim_(3), ixx(3)
1179 integer :: ming(3), maxg(3)
1180
1181 ! no PUSH_SUB, called too frequently
1182
1183 assert(periodic_dim > 0)
1184
1185 box_dim_(1:periodic_dim) = box_dim(1:periodic_dim)
1186 if (periodic_dim < 3) box_dim_(periodic_dim+1:3) = 1
1187
1188 ! We first need to remove asymetric planes for the case of even FFT grids
1189 ming = 1
1190 maxg = 1
1191 do idir = 1, periodic_dim
1192 do lx = 1, box_dim(idir)
1193 ix = fs_istart(idir) + lx - 1
1194 ixx(idir) = pad_feq(ix, box_dim(idir), .true.)
1195 ming(idir) = min(ming(idir), ixx(idir))
1196 maxg(idir) = max(maxg(idir), ixx(idir))
1197 end do
1198 maxg(idir) = min(abs(ming(idir)), maxg(idir))
1199 end do
1201 ! Given the boundaries, we can search the min distance, which gives us the the cutoff energy
1202 dminsq = m_huge
1203 do idir = 1, periodic_dim
1204 idir2 = mod(idir, 3)+1
1205 idir3 = mod(idir+1, 3)+1
1206
1207 ! Negative plane
1208 ixx(idir) = -maxg(idir)
1209 do iy = -maxg(idir2), maxg(idir2)
1210 ixx(idir2) = iy
1211 do iz = -maxg(idir3), maxg(idir3)
1212 ixx(idir3) = iz
1213 call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)
1214 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1215 end do
1216 end do
1217 ! Positive plane
1218 ixx(idir) = maxg(idir)
1219 do iy = -maxg(idir2), maxg(idir2)
1220 ixx(idir2) = iy
1221 do iz = -maxg(idir3), maxg(idir3)
1222 ixx(idir3) = iz
1223 call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)
1224 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1225 end do
1226 end do
1227 end do
1228
1229 ecut = m_half * dminsq
1230
1231 end function fft_get_ecut_from_box
1232
1233#include "undef.F90"
1234#include "real.F90"
1235#include "fft_inc.F90"
1236
1237#include "undef.F90"
1238#include "complex.F90"
1239#include "fft_inc.F90"
1240
1241end module fft_oct_m
1242
1243!! Local Variables:
1244!! mode: f90
1245!! coding: utf-8
1246!! End:
subroutine optimize()
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
double log(double __x) __attribute__((__nothrow__
subroutine, public clfft_print_error(ierr, name)
Definition: accel.F90:1787
pure logical function, public accel_is_enabled()
Definition: accel.F90:400
type(accel_t), public accel
Definition: accel.F90:270
Fast Fourier Transform module. This module provides a single interface that works with different FFT ...
Definition: fft.F90:118
subroutine zfft_forward_accel(fft, in, out)
Definition: fft.F90:1945
subroutine dfft_backward_1d(fft, in, out)
Definition: fft.F90:1749
integer, parameter cufft_z2d
Definition: fft.F90:265
subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
Definition: fft.F90:1160
subroutine, public fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
Definition: fft.F90:418
subroutine, public fft_all_init(namespace)
initialize the table
Definition: fft.F90:278
real(real64) function, public fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq)
Given an fft box (fixed by the real-space grid), it returns the cutoff energy of the sphere that fits...
Definition: fft.F90:1246
subroutine dfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1381
subroutine dfft_forward_accel(fft, in, out)
Definition: fft.F90:1489
subroutine, public fft_end(this)
Definition: fft.F90:996
subroutine, public fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
Definition: fft.F90:1201
real(real64) pure function, public fft_scaling_factor(fft)
This function returns the factor required to normalize a function after a forward and backward transf...
Definition: fft.F90:1224
integer, parameter cufft_z2z
Definition: fft.F90:265
pure integer function, public pad_feq(ii, nn, mode)
convert between array index and G-vector
Definition: fft.F90:1106
subroutine zfft_backward_1d(fft, in, out)
Definition: fft.F90:2205
integer, parameter, public fftlib_accel
Definition: fft.F90:183
subroutine, public fft_all_end()
delete all plans
Definition: fft.F90:391
integer function fft_size(size, factors, parity)
Definition: fft.F90:1131
subroutine zfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:2028
subroutine fft_operation_count(fft)
Definition: fft.F90:1187
subroutine zfft_backward_accel(fft, in, out)
Definition: fft.F90:2142
integer, parameter cufft_c2r
Definition: fft.F90:265
integer, parameter cufft_c2c
Definition: fft.F90:265
integer, parameter, public fft_real
Definition: fft.F90:178
subroutine, public fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: fft.F90:1080
integer, parameter, public fft_complex
Definition: fft.F90:178
integer, parameter, public fftlib_nfft
Definition: fft.F90:183
subroutine dfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:1572
subroutine, public fft_copy(fft_i, fft_o)
Definition: fft.F90:1061
subroutine dfft_forward_1d(fft, in, out)
Definition: fft.F90:1554
integer, parameter cufft_d2z
Definition: fft.F90:265
integer, parameter fft_null
Definition: fft.F90:191
integer, parameter, public fftlib_pnfft
Definition: fft.F90:183
subroutine zfft_forward_1d(fft, in, out)
Definition: fft.F90:2010
subroutine zfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1846
integer, parameter, public fftlib_pfft
Definition: fft.F90:183
subroutine dfft_backward_accel(fft, in, out)
Definition: fft.F90:1686
integer, parameter, public fftlib_fftw
Definition: fft.F90:183
subroutine, public fft_init_stage1(this, namespace, XX, nn)
Some fft-libraries (only NFFT for the moment) need an additional precomputation stage that depends on...
Definition: fft.F90:954
subroutine, public fftw_prepare_plan(plan, dim, n, is_real, sign, flags, din_, cin_, cout_)
Definition: fftw.F90:187
subroutine, public fftw_free_memory(is_real, drs_data, zrs_data, fs_data)
Definition: fftw.F90:344
subroutine, public fftw_get_dims(rs_n, is_real, fs_n)
Definition: fftw.F90:305
subroutine, public fftw_alloc_memory(rs_n, is_real, fs_n, drs_data, zrs_data, fs_data)
Definition: fftw.F90:319
real(real64), parameter, public m_two
Definition: global.F90:190
real(real64), parameter, public m_huge
Definition: global.F90:206
real(real64), parameter, public m_half
Definition: global.F90:194
real(real64), parameter, public m_one
Definition: global.F90:189
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1113
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:537
subroutine, public messages_new_line()
Definition: messages.F90:1134
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:414
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:713
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:616
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
Definition: mpi.F90:136
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:266
subroutine, public nfft_write_info(nfft)
Definition: nfft.F90:323
subroutine, public nfft_end(nfft)
Definition: nfft.F90:386
subroutine, public nfft_init(nfft, nfft_options, N, dim, M, optimize)
Definition: nfft.F90:257
subroutine, public nfft_copy_info(in, out)
Definition: nfft.F90:400
subroutine, public nfft_precompute(nfft, X1, X2, X3)
Definition: nfft.F90:429
subroutine, public nfft_guru_options(nfft, namespace)
Definition: nfft.F90:190
The low level module to work with the PFFT library. http:
Definition: pfft.F90:164
subroutine, public pfft_prepare_plan_c2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:298
subroutine, public pfft_prepare_plan_c2r(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:263
subroutine, public pfft_decompose(n_proc, dim1, dim2)
Decompose all available processors in 2D processor grid, most equally possible.
Definition: pfft.F90:188
subroutine, public pfft_prepare_plan_r2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:229
subroutine, public pfft_get_dims(rs_n_global, mpi_comm, is_real, alloc_size, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: pfft.F90:335
The includes for the PFFT.
Definition: pfft.F90:115
The low level module to work with the PNFFT library. http:
Definition: pnfft.F90:128
subroutine, public pnfft_copy_params(in, out)
Definition: pnfft.F90:304
subroutine, public pnfft_set_sp_nodes(pnfft, namespace, X)
Definition: pnfft.F90:495
subroutine, public pnfft_init_plan(pnfft, pnfft_options, comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart)
Definition: pnfft.F90:362
subroutine, public pnfft_write_info(pnfft)
Definition: pnfft.F90:319
subroutine, public pnfft_guru_options(pnfft, namespace)
Definition: pnfft.F90:202
subroutine, public pnfft_end(pnfft)
Definition: pnfft.F90:471
subroutine, public pnfft_init_procmesh(pnfft, mpi_grp, comm)
Definition: pnfft.F90:268
This module defines the unit system, used for input and output.
type(unit_t), public unit_megabytes
For large amounts of data (natural code units are bytes)
This is defined even when running serial.
Definition: mpi.F90:142
int true(void)