Octopus
fft.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!! Copyright (C) 2011 J. Alberdi-Rodriguez, P. Garcia RisueƱo, M. Oliveira
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
25module fft_oct_m
26 use, intrinsic :: iso_c_binding
27
28 use accel_oct_m
29#ifdef HAVE_OPENCL
30 use cl
31#ifdef HAVE_CLFFT
32 use clfft
33#endif
34#endif
35 use fftw_oct_m
37 use debug_oct_m
38 use global_oct_m
39 use, intrinsic :: iso_fortran_env
44 use mpi_oct_m
46#ifdef HAVE_NFFT
47 use nfft_oct_m
48#endif
49#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
50 use omp_lib
51#endif
52 use parser_oct_m
53 use pfft_oct_m
55 use pnfft_oct_m
57 use types_oct_m
60
61 implicit none
62
63 private
64 public :: &
65 fft_t, &
68 fft_init, &
70 fft_end, &
71 fft_copy, &
73 pad_feq, &
81
82
84 integer, public, parameter :: &
85 FFT_NONE = 0, &
86 fft_real = 1, &
87 fft_complex = 2
88
89 integer, public, parameter :: &
90 FFTLIB_NONE = 0, &
91 fftlib_fftw = 1, &
92 fftlib_pfft = 2, &
93 fftlib_accel = 3, &
94 fftlib_nfft = 4, &
95 fftlib_pnfft = 5
96
97 integer, parameter :: &
98 FFT_MAX = 10, &
99 fft_null = -1
100
101 type fft_t
102 private
103 integer :: slot = 0
104
105 integer, public :: type
106 integer, public :: library
107
108 type(MPI_Comm) :: comm
109 integer :: rs_n_global(3)
110 integer :: fs_n_global(3)
111 integer :: rs_n(3)
112 integer :: fs_n(3)
113 integer :: rs_istart(1:3)
114 integer :: fs_istart(1:3)
115
116 integer, public :: stride_rs(1:3)
117 integer, public :: stride_fs(1:3)
119 type(c_ptr) :: planf
120 type(c_ptr) :: planb
121 !integer(ptrdiff_t_kind) :: pfft_planf !< PFFT plan for forward transform
122 !integer(ptrdiff_t_kind) :: pfft_planb !< PFFT plan for backward transform
123
126 real(real64), pointer, public :: drs_data(:,:,:)
127 complex(real64), pointer, public :: zrs_data(:,:,:)
128 complex(real64), pointer, public :: fs_data(:,:,:)
129#ifdef HAVE_CLFFT
130
131 type(clfftPlanHandle) :: cl_plan_fw
132 type(clfftPlanHandle) :: cl_plan_bw
133#endif
134 type(c_ptr) :: cuda_plan_fw
135 type(c_ptr) :: cuda_plan_bw
136#ifdef HAVE_NFFT
137 type(nfft_t), public :: nfft
138#endif
139 type(pnfft_t), public :: pnfft
140
141 logical, public :: aligned_memory
142 end type fft_t
143
144 interface dfft_forward
146 end interface dfft_forward
147
148 interface zfft_forward
150 end interface zfft_forward
151
152 interface dfft_backward
154 end interface dfft_backward
155
156 interface zfft_backward
158 end interface zfft_backward
159
160 logical, save, public :: fft_initialized = .false.
161 integer, save :: fft_refs(FFT_MAX)
162 type(fft_t), save :: fft_array(FFT_MAX)
163 logical :: fft_optimize
164 integer, save :: fft_prepare_plan
165 integer, public :: fft_default_lib = -1
166#ifdef HAVE_NFFT
167 type(nfft_t), save :: nfft_options
168#endif
169 type(pnfft_t), save :: pnfft_options
170
171 integer, parameter :: &
172 CUFFT_R2C = int(z'2a'), &
173 cufft_c2r = int(z'2c'), &
174 cufft_c2c = int(z'29'), &
175 cufft_d2z = int(z'6a'), &
176 cufft_z2d = int(z'6c'), &
177 cufft_z2z = int(z'69')
178
179contains
180
181 ! ---------------------------------------------------------
183 subroutine fft_all_init(namespace)
184 type(namespace_t), intent(in) :: namespace
185
186 integer :: ii, fft_default
187#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
188 integer :: iret
189#endif
191 push_sub(fft_all_init)
192
193 fft_initialized = .true.
195 !%Variable FFTOptimize
196 !%Type logical
197 !%Default yes
198 !%Section Mesh::FFTs
199 !%Description
200 !% Should <tt>octopus</tt> optimize the FFT dimensions?
201 !% This means that the mesh to which FFTs are applied is not taken to be as small
202 !% as possible: some points may be added to each direction in order to get a "good number"
203 !% for the performance of the FFT algorithm.
204 !% The best FFT grid dimensions are given by <math>2^a 3^b 5^c 7^d 11^e 13^f</math>
205 !% where <math>a,b,c,d</math> are arbitrary and <math>e,f</math> are 0 or 1.
206 !% (<a href=http://www.fftw.org/doc/Complex-DFTs.html>ref</a>).
207 !% In some cases, namely when using
208 !% the split-operator, or Suzuki-Trotter propagators, this option should be turned off.
209 !% For spatial FFTs in periodic directions, the grid is never optimized, but a warning will
210 !% be written if the number is not good, with a suggestion of a better one to use, so you
211 !% can try a different spacing if you want to get a good number.
212 !%End
213 call parse_variable(namespace, 'FFTOptimize', .true., fft_optimize)
214 do ii = 1, fft_max
215 fft_refs(ii) = fft_null
216 end do
217
218 !%Variable FFTPreparePlan
219 !%Type integer
220 !%Default fftw_measure
221 !%Section Mesh::FFTs
222 !%Description
223 !% The FFTs are performed in octopus with the help of <a href=http://www.fftw.org>FFTW</a> and similar packages.
224 !% Before doing the actual computations, this package prepares a "plan", which means that
225 !% the precise numerical strategy to be followed to compute the FFT is machine/compiler-dependent,
226 !% and therefore the software attempts to figure out which is this precise strategy (see the
227 !% FFTW documentation for details). This plan preparation, which has to be done for each particular
228 !% FFT shape, can be done exhaustively and carefully (slow), or merely estimated. Since this is
229 !% a rather critical numerical step, by default it is done carefully, which implies a longer initial
230 !% initialization, but faster subsequent computations. You can change this behaviour by changing
231 !% this <tt>FFTPreparePlan</tt> variable, and in this way you can force FFTW to do a fast guess or
232 !% estimation of which is the best way to perform the FFT.
233 !%Option fftw_measure 0
234 !% This plan implies a longer initialization, but involves a more careful analysis
235 !% of the strategy to follow, and therefore more efficient FFTs. A side effect of the runtime
236 !% choices is that this plan can introduce slight numerical fluctuations between runs.
237 !%Option fftw_estimate 64
238 !% This is the "fast initialization" scheme, in which the plan is merely guessed from "reasonable"
239 !% assumptions. This is the default option, as it guarantees stable results
240 !%Option fftw_patient 32
241 !% It is like fftw_measure, but considers a wider range of algorithms and often produces a
242 !% "more optimal" plan (especially for large transforms), but at the expense of several times
243 !% longer planning time (especially for large transforms).
244 !%Option fftw_exhaustive 8
245 !% It is like fftw_patient, but considers an even wider range of algorithms,
246 !% including many that we think are unlikely to be fast, to produce the most optimal
247 !% plan but with a substantially increased planning time.
248 !%End
249 call parse_variable(namespace, 'FFTPreparePlan', fftw_estimate, fft_prepare_plan)
250 if (.not. varinfo_valid_option('FFTPreparePlan', fft_prepare_plan)) then
251 call messages_input_error(namespace, 'FFTPreparePlan')
252 end if
254 !%Variable FFTLibrary
255 !%Type integer
256 !%Section Mesh::FFTs
257 !%Default fftw
258 !%Description
259 !% (experimental) You can select the FFT library to use.
260 !%Option fftw 1
261 !% Uses FFTW3 library.
262 !%Option pfft 2
263 !% (experimental) Uses PFFT library, which has to be linked.
264 !%Option accel 3
265 !% (experimental) Uses a GPU accelerated library. This only
266 !% works if Octopus was compiled with Cuda or OpenCL support.
267 !%End
268 fft_default = fftlib_fftw
269 if(accel_is_enabled()) then
270 fft_default = fftlib_accel
271 end if
272 call parse_variable(namespace, 'FFTLibrary', fft_default, fft_default_lib)
273
274 if (fft_default_lib == fftlib_accel) then
275#if ! (defined(HAVE_CLFFT) || defined(HAVE_CUDA))
276 call messages_write('You have selected the Accelerated FFT, but Octopus was compiled', new_line = .true.)
277 call messages_write('without clfft (OpenCL) or Cuda support.')
278 call messages_fatal()
279#endif
280 if (.not. accel_is_enabled()) then
281 call messages_write('You have selected the accelerated FFT, but acceleration is disabled.')
282 call messages_fatal()
283 end if
284 end if
285
286#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
287 if (omp_get_max_threads() > 1) then
288
289 call messages_write('Info: Initializing Multi-threaded FFTW')
290 call messages_info()
291
292 iret = fftw_init_threads()
293 if (iret == 0) then
294 call messages_write('Initialization of FFTW3 threads failed.')
295 call messages_fatal()
296 end if
297 call fftw_plan_with_nthreads(omp_get_max_threads())
298
299 end if
300#endif
301#ifdef HAVE_NFFT
302 call nfft_guru_options(nfft_options, namespace)
303#endif
304 call pnfft_guru_options(pnfft_options, namespace)
305
306 pop_sub(fft_all_init)
307 end subroutine fft_all_init
308
309
310 ! ---------------------------------------------------------
312 subroutine fft_all_end()
313 integer :: ii
314
315 push_sub(fft_all_end)
316
317 do ii = 1, fft_max
318 if (fft_refs(ii) /= fft_null) then
319 call fft_end(fft_array(ii))
320 end if
321 end do
322
323#ifdef HAVE_PFFT
324 call pfft_cleanup()
325#endif
326
327#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
328 call fftw_cleanup_threads()
329#else
330 call fftw_cleanup()
331#endif
332
333 fft_initialized = .false.
334
335 pop_sub(fft_all_end)
336 end subroutine fft_all_end
337
338 ! ---------------------------------------------------------
339 subroutine fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
340 type(fft_t), intent(inout) :: this
341 integer, intent(inout) :: nn(3)
342 integer, intent(in) :: dim
343 integer, intent(in) :: type
344 integer, intent(in) :: library
345 logical, intent(in) :: optimize(3)
346 integer, intent(in) :: optimize_parity(3)
348 type(mpi_comm), optional, intent(out) :: comm
349 type(mpi_grp_t), optional, intent(in) :: mpi_grp
350 logical, optional :: use_aligned
351
352 integer :: ii, jj, fft_dim, idir, column_size, row_size, n3
353 integer :: n_1, n_2, n_3, nn_temp(3)
354 integer :: library_
355 type(mpi_grp_t) :: mpi_grp_
356 integer(int64) :: number_points, alloc_size
357
358#ifdef HAVE_CLFFT
359 real(real64) :: scale
360 integer :: status
361#endif
362#ifdef HAVE_PFFT
363 integer :: ierror
364#endif
365
366 push_sub(fft_init)
367
368 assert(fft_initialized)
369
370 assert(type == fft_real .or. type == fft_complex)
371
372 mpi_grp_ = mpi_world
373 if (present(mpi_grp)) mpi_grp_ = mpi_grp
374
375 this%aligned_memory = optional_default(use_aligned, .false.)
376
377 ! First, figure out the dimensionality of the FFT.
378 fft_dim = 0
379 do ii = 1, dim
380 if (nn(ii) <= 1) exit
381 fft_dim = fft_dim + 1
382 end do
383
384 if (fft_dim == 0) then
385 message(1) = "Internal error in fft_init: apparently, a 1x1x1 FFT is required."
386 call messages_fatal(1)
387 end if
388
389 if (fft_dim > 3) call messages_not_implemented('FFT for dimension > 3')
390
391 library_ = library
392 nn_temp(1:fft_dim) = nn(1:fft_dim)
393
394 select case (library_)
395 case (fftlib_accel)
396 ! FFT optimization
397 if(any(optimize_parity(1:fft_dim) > 1)) then
398 message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."
399 call messages_fatal(1)
400 end if
401
402 do ii = 1, fft_dim
403 nn_temp(ii) = fft_size(nn(ii), (/2, 3, 5, 7/), optimize_parity(ii))
404 if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)
405 end do
406
407 ! if we can't optimize, in some cases we can't use the library
408 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then
409#ifdef HAVE_CLFFT
410 call messages_write('Invalid grid size for accel fft. FFTW will be used instead.')
411 call messages_warning()
412 library_ = fftlib_fftw
413#endif
414 end if
415
417
418 do ii = 1, fft_dim
419 !NFFT likes even grids
420 !The underlying FFT grids are optimized inside the nfft_init routine
421 if (int(nn(ii)/2)*2 /= nn(ii) .and. (fft_optimize .and. optimize(ii)))&
422 nn(ii)=nn(ii)+1
423 end do
424
425 case (fftlib_pnfft)
426
427 do ii = 1, fft_dim
428 !also PNFFT likes even grids
429 if (int(nn(ii)/2)*2 /= nn(ii)) nn(ii) = nn(ii) + 1
430 end do
431
432 if (fft_dim < 3) then
433 call messages_not_implemented('PNFFT support for dimension < 3')
434 end if
435
436 case default
437
438 if (fft_dim < 3 .and. library_ == fftlib_pfft) then
439 call messages_not_implemented('PFFT support for dimension < 3')
440 end if
441
442 ! FFT optimization
443 if (any(optimize_parity(1:fft_dim) > 1)) then
444 message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."
445 call messages_fatal(1)
446 end if
447
448 do ii = 1, fft_dim
449 call loct_fft_optimize(nn_temp(ii), optimize_parity(ii))
450 if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)
451 end do
452
453 end select
454
455 ! find out if fft has already been allocated
456 jj = 0
457 do ii = fft_max, 1, -1
458 if (fft_refs(ii) /= fft_null) then
459 if (all(nn(1:dim) == fft_array(ii)%rs_n_global(1:dim)) .and. type == fft_array(ii)%type &
460 .and. library_ == fft_array(ii)%library .and. library_ /= fftlib_nfft &
461 .and. library_ /= fftlib_pnfft &
462 .and. this%aligned_memory .eqv. fft_array(ii)%aligned_memory) then
463
464 ! NFFT and PNFFT plans are always allocated from scratch since they
465 ! are very likely to be different
466 this = fft_array(ii) ! return a copy
467 fft_refs(ii) = fft_refs(ii) + 1 ! increment the ref count
468 if (present(comm)) comm = fft_array(ii)%comm ! also return the MPI communicator
469 pop_sub(fft_init)
470 return
471 end if
472 else
473 jj = ii
474 end if
475 end do
476
477 if (jj == 0) then
478 message(1) = "Not enough slots for FFTs."
479 message(2) = "Please increase FFT_MAX in fft.F90 and recompile."
480 call messages_fatal(2)
481 end if
482
483 ! jj now contains an empty slot
484 fft_refs(jj) = 1
485 fft_array(jj)%slot = jj
486 fft_array(jj)%type = type
487 fft_array(jj)%library = library_
488 fft_array(jj)%rs_n_global(1:dim) = nn(1:dim)
489 fft_array(jj)%rs_n_global(dim+1:) = 1
490 nullify(fft_array(jj)%drs_data)
491 nullify(fft_array(jj)%zrs_data)
492 nullify(fft_array(jj)%fs_data)
493
494 fft_array(jj)%aligned_memory = this%aligned_memory
495
496 ! Initialize parallel communicator
497 select case (library_)
498 case (fftlib_pfft)
499#ifdef HAVE_PFFT
500 call pfft_init()
501
502 call pfft_decompose(mpi_grp_%size, column_size, row_size)
503
504 ierror = pfft_create_procmesh_2d(mpi_grp_%comm%MPI_VAL, column_size, row_size, fft_array(jj)%comm%MPI_VAL)
505
506 if (ierror /= 0) then
507 message(1) = "The number of rows and columns in PFFT processor grid is not equal to "
508 message(2) = "the number of processor in the MPI communicator."
509 message(3) = "Please check it."
510 call messages_fatal(3)
511 end if
512#endif
513
514 case (fftlib_pnfft)
515#ifdef HAVE_PNFFT
516 call pnfft_init_procmesh(fft_array(jj)%pnfft, mpi_grp_, fft_array(jj)%comm)
517#endif
518 case default
519 fft_array(jj)%comm = mpi_comm_undefined
520
521 end select
522
523 if (present(comm)) comm = fft_array(jj)%comm
524
525 ! Get dimentions of arrays
526 select case (library_)
527 case (fftlib_fftw)
528 call fftw_get_dims(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global)
529 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
530 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
531 fft_array(jj)%rs_istart = 1
532 fft_array(jj)%fs_istart = 1
533
534 if (this%aligned_memory) then
535 call fftw_alloc_memory(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global, &
536 fft_array(jj)%drs_data, fft_array(jj)%zrs_data, fft_array(jj)%fs_data)
537 end if
538
539 case (fftlib_pfft)
540#ifdef HAVE_PFFT
541 call pfft_get_dims(fft_array(jj)%rs_n_global, comm%MPI_VAL, type == fft_real, &
542 alloc_size, fft_array(jj)%fs_n_global, fft_array(jj)%rs_n, &
543 fft_array(jj)%fs_n, fft_array(jj)%rs_istart, fft_array(jj)%fs_istart)
544 !write(*,"(6(A,3I4,/),A,I10,/)") "PFFT: rs_n_global = ",fft_array(jj)%rs_n_global,&
545 ! "fs_n_global = ",fft_array(jj)%fs_n_global,&
546 ! "rs_n = ",fft_array(jj)%rs_n,&
547 ! "fs_n = ",fft_array(jj)%fs_n,&
548 ! "rs_istart = ",fft_array(jj)%rs_istart,&
549 ! "fs_istart = ",fft_array(jj)%fs_istart,&
550 ! "alloc_size = ",alloc_size
551#endif
552
553 ! Allocate memory. Note that PFFT may need extra memory space
554 ! and that in fourier space the function will be transposed
555 if (type == fft_real) then
556 n_1 = max(1, fft_array(jj)%rs_n(1))
557 n_2 = max(1, fft_array(jj)%rs_n(2))
558 n_3 = max(1, fft_array(jj)%rs_n(3))
559
560 n3 = ceiling(real(2*alloc_size)/real(n_1*n_2))
561 safe_allocate(fft_array(jj)%drs_data(1:n_1, 1:n_2, 1:n3))
562 else
563 n3 = ceiling(real(alloc_size)/real(fft_array(jj)%rs_n(1)*fft_array(jj)%rs_n(2)))
564 safe_allocate(fft_array(jj)%zrs_data(1:fft_array(jj)%rs_n(1), 1:fft_array(jj)%rs_n(2), 1:n3))
565 end if
566
567 n_1 = max(1, fft_array(jj)%fs_n(1))
568 n_2 = max(1, fft_array(jj)%fs_n(2))
569 n_3 = max(1, fft_array(jj)%fs_n(3))
570
571 n3 = ceiling(real(alloc_size)/real(n_3*n_1))
572 safe_allocate(fft_array(jj)%fs_data(1:n_3, 1:n_1, 1:n3))
573
574 case (fftlib_accel)
575 call fftw_get_dims(fft_array(jj)%rs_n_global, (type == fft_real), fft_array(jj)%fs_n_global)
576 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
577 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
578 fft_array(jj)%rs_istart = 1
579 fft_array(jj)%fs_istart = 1
580
581 case (fftlib_nfft)
582 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
583 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
584 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
585 fft_array(jj)%rs_istart = 1
586 fft_array(jj)%fs_istart = 1
587
588 case (fftlib_pnfft)
589 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
590 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
591 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
592 fft_array(jj)%rs_istart = 1
593 fft_array(jj)%fs_istart = 1
594 ! indices partition is performed together with the plan preparation
595
596
597 end select
598
599 ! Prepare plans
600 select case (library_)
601 case (fftlib_fftw)
602 if (.not. this%aligned_memory) then
603 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
604 type == fft_real, fftw_forward, fft_prepare_plan+fftw_unaligned)
605 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
606 type == fft_real, fftw_backward, fft_prepare_plan+fftw_unaligned)
607 else
608 if (type == fft_real) then
609 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
610 type == fft_real, fftw_forward, fft_prepare_plan, &
611 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
612 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
613 type == fft_real, fftw_backward, fft_prepare_plan, &
614 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
615 else
616 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
617 type == fft_real, fftw_forward, fft_prepare_plan, &
618 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
619 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
620 type == fft_real, fftw_backward, fft_prepare_plan, &
621 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
622 end if
623 end if
624
625 case (fftlib_nfft)
626#ifdef HAVE_NFFT
627 call nfft_copy_info(this%nfft,fft_array(jj)%nfft) !copy default parameters set in the calling routine
628 call nfft_init(fft_array(jj)%nfft, nfft_options, fft_array(jj)%rs_n_global, &
629 fft_dim, fft_array(jj)%rs_n_global, optimize = .true.)
630#endif
631 case (fftlib_pfft)
632#ifdef HAVE_PFFT
633 if (type == fft_real) then
634 call pfft_prepare_plan_r2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%drs_data, &
635 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
636 call pfft_prepare_plan_c2r(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &
637 fft_array(jj)%drs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
638 else
639 call pfft_prepare_plan_c2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%zrs_data, &
640 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
641 call pfft_prepare_plan_c2c(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &
642 fft_array(jj)%zrs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
643 end if
644#endif
645 case (fftlib_pnfft)
646#ifdef HAVE_PNFFT
647 call pnfft_copy_params(this%pnfft, fft_array(jj)%pnfft) ! pass default parameters like in NFFT
648
649 ! NOTE:
650 ! PNFFT (likewise NFFT) breaks the symmetry between real space and Fourier space
651 ! by allowing the possibility to have an unstructured grid in rs and by
652 ! using different parallelizations (the rs is transposed w.r.t. fs).
653 ! Octopus, in fourier_space_m, uses the convention for which the mapping
654 ! between rs and fs is done with a forward transform (and fs->rs with backward).
655 ! This is exactly the opposite of the definitions used by all the libraries
656 ! performing FFTs (PNFFT and NFFT included) [see e.g. M. Frigo, and S. G. Johnson, Proc.
657 ! IEEE 93, 216-231 (2005)].
658 ! While this leads to no problem on ordinary ffts where fs and rs can be exchanged
659 ! it does makes a fundamental difference for PNFFT (for some reason I don`t know NFFT
660 ! is still symmetric).
661 ! Therefore, in order to perform rs->fs tranforms with PNFFT one should use the
662 ! backward transform.
663
664 call pnfft_init_plan(fft_array(jj)%pnfft, pnfft_options, comm, fft_array(jj)%fs_n_global, &
665 fft_array(jj)%fs_n, fft_array(jj)%fs_istart, fft_array(jj)%rs_n, fft_array(jj)%rs_istart)
666#endif
667 case (fftlib_accel)
668
669 fft_array(jj)%stride_rs(1) = 1
670 fft_array(jj)%stride_fs(1) = 1
671 do ii = 2, fft_dim
672 fft_array(jj)%stride_rs(ii) = fft_array(jj)%stride_rs(ii - 1)*fft_array(jj)%rs_n(ii - 1)
673 fft_array(jj)%stride_fs(ii) = fft_array(jj)%stride_fs(ii - 1)*fft_array(jj)%fs_n(ii - 1)
674 end do
675
676#ifdef HAVE_CUDA
677 if (type == fft_real) then
678 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
679 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_d2z, &
680 accel%cuda_stream)
681 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
682 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2d, &
683 accel%cuda_stream)
684 else
685 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
686 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &
687 accel%cuda_stream)
688 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
689 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &
690 accel%cuda_stream)
691 end if
692#endif
693
694#ifdef HAVE_CLFFT
695
696 ! create the plans
697 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_fw, accel%context%cl_context, &
698 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
699 if (status /= clfft_success) call clfft_print_error(status, 'clfftCreateDefaultPlan')
700
701 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_bw, accel%context%cl_context, &
702 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
703 if (status /= clfft_success) call clfft_print_error(status, 'clfftCreateDefaultPlan')
704
705 ! set precision
706
707 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
708 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
709
710 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
711 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
712
713 ! set number of transforms to 1
714
715 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_fw, 1_real64, status)
716 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanBatchSize')
717
718 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_bw, 1_real64, status)
719 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanBatchSize')
720
721 ! set the type precision to double
722
723 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
724 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
725
726 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
727 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')
728
729
730 ! set the layout
731
732 if (type == fft_real) then
733
734 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_real, clfft_hermitian_interleaved, status)
735 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
736
737 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_hermitian_interleaved, clfft_real, status)
738 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
739
740 else
741
742 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_complex_interleaved, clfft_complex_interleaved, status)
743 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
744
745 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_complex_interleaved, clfft_complex_interleaved, status)
746 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')
747
748 end if
749
750 ! set the plans as at out of place
751
752 call clfftsetresultlocation(fft_array(jj)%cl_plan_fw, clfft_outofplace, status)
753 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetResultLocation')
754
755 call clfftsetresultlocation(fft_array(jj)%cl_plan_bw, clfft_outofplace, status)
756 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetResultLocation')
757
758 ! the strides
759
760 call clfftsetplaninstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
761 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanInStride')
762
763 call clfftsetplanoutstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
764 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanOutStride')
765
766 call clfftsetplaninstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
767 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanInStride')
768
769 call clfftsetplanoutstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
770 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanOutStride')
771
772 ! set the scaling factors
773
774 scale = 1.0_real64/(product(real(fft_array(jj)%rs_n_global(1:fft_dim), real64)))
775
776 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_forward, 1.0_real64, status)
777 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
778
779 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_backward, scale, status)
780 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
781
782 if (type == fft_real) then
783
784 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, 1.0_real64, status)
785 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
786
787 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, scale, status)
788 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
789
790 else
791
792 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, scale, status)
793 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
794
795 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, 1.0_real64, status)
796 if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')
797
798 end if
799
800 ! now 'bake' the plans, this signals that the plans are ready to use
801
802 call clfftbakeplan(fft_array(jj)%cl_plan_fw, accel%command_queue, status)
803 if (status /= clfft_success) call clfft_print_error(status, 'clfftBakePlan')
804
805 call clfftbakeplan(fft_array(jj)%cl_plan_bw, accel%command_queue, status)
806 if (status /= clfft_success) call clfft_print_error(status, 'clfftBakePlan')
807
808#endif
809
810 case default
811 call messages_write('Invalid FFT library.')
812 call messages_fatal()
813 end select
814
815 this = fft_array(jj)
816
817 ! Write information
818 if (.not. (library_ == fftlib_nfft .or. library_ == fftlib_pnfft)) then
819 call messages_write('Info: FFT grid dimensions =')
820 number_points = 1
821 do idir = 1, dim
822 call messages_write(fft_array(jj)%rs_n_global(idir))
823 if (idir < dim) call messages_write(" x ")
824 ! do the multiplication in a integer(int64) to avoid overflow for large grids
825 number_points = number_points * fft_array(jj)%rs_n_global(idir)
826 end do
827 call messages_new_line()
828
829 call messages_write(' Total grid size =')
830 call messages_write(number_points)
831 call messages_write(' (')
832 call messages_write(number_points*8.0_real64, units = unit_megabytes, fmt = '(f9.1)')
833 call messages_write(' )')
834 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then
835 call messages_new_line()
836 call messages_write(' Inefficient FFT grid. A better grid would be: ')
837 do idir = 1, fft_dim
838 call messages_write(nn_temp(idir))
839 end do
840 end if
841 call messages_info()
842 end if
843
844 select case (library_)
845 case (fftlib_pfft)
846 write(message(1),'(a)') "Info: FFT library = PFFT"
847 write(message(2),'(a)') "Info: PFFT processor grid"
848 write(message(3),'(a, i9)') " No. of processors = ", mpi_grp_%size
849 write(message(4),'(a, i9)') " No. of columns in the proc. grid = ", column_size
850 write(message(5),'(a, i9)') " No. of rows in the proc. grid = ", row_size
851 write(message(6),'(a, i9)') " The size of integer is = ", c_intptr_t
852 call messages_info(6)
853
854 case (fftlib_pnfft)
855#ifdef HAVE_PNFFT
856 call messages_write("Info: FFT library = PNFFT")
857 call messages_info()
858 call pnfft_write_info(fft_array(jj)%pnfft)
859#endif
860 case (fftlib_nfft)
861#ifdef HAVE_NFFT
862 call messages_write("Info: FFT library = NFFT")
863 call messages_info()
864 call nfft_write_info(fft_array(jj)%nfft)
865#endif
866 end select
867
868 pop_sub(fft_init)
869 end subroutine fft_init
870
871 ! ---------------------------------------------------------
875 subroutine fft_init_stage1(this, namespace, XX, nn)
876 type(fft_t), intent(inout) :: this
879 type(namespace_t), intent(in) :: namespace
880 real(real64), intent(in) :: xx(:,:)
881 integer, optional, intent(in) :: nn(:)
882
883 integer :: slot
884
885 push_sub(fft_init_stage1)
886
887 assert(size(xx,2) == 3)
888
889 slot = this%slot
890 select case (fft_array(slot)%library)
891 case (fftlib_fftw)
892 !Do nothing
893 case (fftlib_nfft)
894#ifdef HAVE_NFFT
895 assert(present(nn))
896 call nfft_precompute(fft_array(slot)%nfft, &
897 xx(1:nn(1),1), xx(1:nn(2),2), xx(1:nn(3),3))
898#endif
899 case (fftlib_pfft)
900 !Do nothing
901 case (fftlib_accel)
902 !Do nothing
903 case (fftlib_pnfft)
904#ifdef HAVE_PNFFT
905 call pnfft_set_sp_nodes(fft_array(slot)%pnfft, namespace, xx)
906#endif
907 case default
908 call messages_write('Invalid FFT library.')
909 call messages_fatal()
910 end select
911
912
913
914 pop_sub(fft_init_stage1)
915 end subroutine fft_init_stage1
916 ! ---------------------------------------------------------
917 subroutine fft_end(this)
918 type(fft_t), intent(inout) :: this
919
920 integer :: ii
921#ifdef HAVE_CLFFT
922 integer :: status
923#endif
924
925 push_sub(fft_end)
926
927 ii = this%slot
928 if (fft_refs(ii) == fft_null) then
929 message(1) = "Trying to deallocate FFT that has not been allocated."
930 call messages_warning(1)
931 else
932 if (fft_refs(ii) > 1) then
933 fft_refs(ii) = fft_refs(ii) - 1
934 else
935 select case (fft_array(ii)%library)
936 case (fftlib_fftw)
937 call fftw_destroy_plan(fft_array(ii)%planf)
938 call fftw_destroy_plan(fft_array(ii)%planb)
939
940 if (this%aligned_memory) then
941 call fftw_free_memory(this%type == fft_real, &
942 fft_array(ii)%drs_data, fft_array(ii)%zrs_data, fft_array(ii)%fs_data)
943 end if
944
945 case (fftlib_pfft)
946#ifdef HAVE_PFFT
947 call pfft_destroy_plan(fft_array(ii)%planf)
948 call pfft_destroy_plan(fft_array(ii)%planb)
949#endif
950 safe_deallocate_p(fft_array(ii)%drs_data)
951 safe_deallocate_p(fft_array(ii)%zrs_data)
952 safe_deallocate_p(fft_array(ii)%fs_data)
953
954 case (fftlib_accel)
955#ifdef HAVE_CUDA
956 call cuda_fft_destroy(fft_array(ii)%cuda_plan_fw)
957 call cuda_fft_destroy(fft_array(ii)%cuda_plan_bw)
958#endif
959#ifdef HAVE_CLFFT
960 call clfftdestroyplan(fft_array(ii)%cl_plan_fw, status)
961 call clfftdestroyplan(fft_array(ii)%cl_plan_bw, status)
962#endif
963
964 case (fftlib_nfft)
965#ifdef HAVE_NFFT
966 call nfft_end(fft_array(ii)%nfft)
967#endif
968 case (fftlib_pnfft)
969#ifdef HAVE_PNFFT
970 call pnfft_end(fft_array(ii)%pnfft)
971#endif
972 end select
973 fft_refs(ii) = fft_null
974 end if
975 end if
976 this%slot = 0
977
978 pop_sub(fft_end)
979 end subroutine fft_end
980
981 ! ---------------------------------------------------------
982 subroutine fft_copy(fft_i, fft_o)
983 type(fft_t), intent(in) :: fft_i
984 type(fft_t), intent(inout) :: fft_o
985
986 push_sub(fft_copy)
987
988 if (fft_o%slot > 0) then
989 call fft_end(fft_o)
990 end if
991 assert(fft_i%slot >= 1.and.fft_i%slot <= fft_max)
992 assert(fft_refs(fft_i%slot) > 0)
993
994 fft_o = fft_i
995 fft_refs(fft_i%slot) = fft_refs(fft_i%slot) + 1
996
997 pop_sub(fft_copy)
998 end subroutine fft_copy
999
1000 ! ---------------------------------------------------------
1001 subroutine fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
1002 type(fft_t), intent(in) :: fft
1003 integer, intent(out) :: rs_n_global(1:3)
1004 integer, intent(out) :: fs_n_global(1:3)
1005 integer, intent(out) :: rs_n(1:3)
1006 integer, intent(out) :: fs_n(1:3)
1007 integer, intent(out) :: rs_istart(1:3)
1008 integer, intent(out) :: fs_istart(1:3)
1009
1010 integer :: slot
1011
1012 push_sub(fft_get_dims)
1013
1014 slot = fft%slot
1015 rs_n_global(1:3) = fft_array(slot)%rs_n_global(1:3)
1016 fs_n_global(1:3) = fft_array(slot)%fs_n_global(1:3)
1017 rs_n(1:3) = fft_array(slot)%rs_n(1:3)
1018 fs_n(1:3) = fft_array(slot)%fs_n(1:3)
1019 rs_istart(1:3) = fft_array(slot)%rs_istart(1:3)
1020 fs_istart(1:3) = fft_array(slot)%fs_istart(1:3)
1021
1022 pop_sub(fft_get_dims)
1023 end subroutine fft_get_dims
1024
1025 ! ---------------------------------------------------------
1027 pure function pad_feq(ii, nn, mode)
1028 integer, intent(in) :: ii,nn
1029 logical, intent(in) :: mode
1030 integer :: pad_feq
1031
1032 ! no push_sub: called too frequently
1033
1034 if (mode) then ! index to frequency number
1035 if (ii <= nn/2 + 1) then
1036 pad_feq = ii - 1
1037 else
1038 pad_feq = ii - nn - 1
1039 end if
1040 else
1041 if (ii >= 0) then
1042 pad_feq = ii + 1
1043 else
1044 pad_feq = ii + nn + 1
1045 end if
1046 end if
1047
1048 end function pad_feq
1049
1050 ! -------------------------------------------------------
1051
1052 integer function fft_size(size, factors, parity)
1053 integer, intent(in) :: size
1054 integer, intent(in) :: factors(:)
1055 integer, intent(in) :: parity
1056
1057 integer :: nfactors
1058 integer :: nondiv
1059 integer, allocatable :: exponents(:)
1060
1061 push_sub(fft_size)
1062
1063 nfactors = ubound(factors, dim = 1)
1064
1065 safe_allocate(exponents(1:nfactors))
1066
1067 fft_size = size
1068 do
1069 call get_exponents(fft_size, nfactors, factors, exponents, nondiv)
1070 if (nondiv == 1 .and. mod(fft_size, 2) == parity) exit
1071 fft_size = fft_size + 1
1072 end do
1073
1074 safe_deallocate_a(exponents)
1075
1076 pop_sub(fft_size)
1077 end function fft_size
1079 ! -------------------------------------------------------
1080
1081 subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
1082 integer, intent(in) :: num
1083 integer, intent(in) :: nfactors
1084 integer, intent(in) :: factors(:)
1085 integer, intent(out) :: exponents(:)
1086 integer, intent(out) :: nondiv
1087
1088 integer :: ifactor
1089
1090 push_sub(get_exponents)
1091
1092 nondiv = num
1093 do ifactor = 1, nfactors
1094 exponents(ifactor) = 0
1095 do
1096 if (mod(nondiv, factors(ifactor)) /= 0) exit
1097 nondiv = nondiv/factors(ifactor)
1098 exponents(ifactor) = exponents(ifactor) + 1
1099 end do
1100 end do
1101
1102 pop_sub(get_exponents)
1103 end subroutine get_exponents
1105
1106 ! ----------------------------------------------------------
1107
1108 subroutine fft_operation_count(fft)
1109 type(fft_t), intent(in) :: fft
1110
1111 real(real64) :: fullsize
1112
1113 push_sub(fft_operation_count)
1114
1115 fullsize = product(real(fft%fs_n(1:3), real64))
1116 call profiling_count_operations(5.0_real64*fullsize*log(fullsize)/log(m_two))
1117
1118 pop_sub(fft_operation_count)
1119 end subroutine fft_operation_count
1120
1121 !-----------------------------------------------------------------
1122 subroutine fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
1123 integer, intent(in) :: gg_in(:)
1124 real(real64), intent(in) :: temp(:)
1125 integer, intent(in) :: periodic_dim
1126 type(lattice_vectors_t), intent(in) :: latt
1127 real(real64), intent(in) :: qq(:)
1128 real(real64), intent(inout) :: gg(:)
1129 real(real64), intent(out) :: modg2
1130
1131 ! no PUSH_SUB, called too frequently
1132
1133 gg(1:3) = gg_in(1:3)
1134 gg(1:periodic_dim) = gg(1:periodic_dim) + qq(1:periodic_dim)
1135 gg(1:3) = gg(1:3) * temp(1:3)
1136 gg(1:3) = matmul(latt%klattice_primitive(1:3,1:3),gg(1:3))
1137 modg2 = sum(gg(1:3)**2)
1138
1139 end subroutine fft_gg_transform
1140
1141 ! ----------------------------------------------------------
1142
1145 real(real64) pure function fft_scaling_factor(fft) result(scaling_factor)
1146 type(fft_t), intent(in) :: fft
1147
1148 ! for the moment this factor is handled by the backwards transform for most libraries
1149 scaling_factor = m_one
1150
1151 select case (fft_array(fft%slot)%library)
1152 case (fftlib_accel)
1153#ifdef HAVE_CUDA
1154 scaling_factor = m_one/real(fft_array(fft%slot)%rs_n_global(1), real64)
1155 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(2), real64)
1156 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(3), real64)
1157#endif
1158 end select
1159
1160 end function fft_scaling_factor
1161
1162 ! ----------------------------------------------------------
1165 !
1166 ! Inspired by the routine bounds from Abinit
1167 real(real64) function fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq) result(ecut)
1168 integer, intent(in) :: box_dim(:)
1169 integer, intent(in) :: fs_istart(:)
1170 type(lattice_vectors_t), intent(in) :: latt
1171 real(real64), intent(in) :: gspacing(:)
1172 integer, intent(in) :: periodic_dim
1173 real(real64), intent(in) :: qq(:)
1174
1175 integer :: lx, ix, iy, iz, idir, idir2, idir3
1176 real(real64) :: dminsq, gg(3), modg2
1177 integer :: box_dim_(3), ixx(3)
1178 integer :: ming(3), maxg(3)
1179
1180 ! no PUSH_SUB, called too frequently
1181
1182 assert(periodic_dim > 0)
1183
1184 box_dim_(1:periodic_dim) = box_dim(1:periodic_dim)
1185 if (periodic_dim < 3) box_dim_(periodic_dim+1:3) = 1
1186
1187 ! We first need to remove asymetric planes for the case of even FFT grids
1188 ming = 1
1189 maxg = 1
1190 do idir = 1, periodic_dim
1191 do lx = 1, box_dim(idir)
1192 ix = fs_istart(idir) + lx - 1
1193 ixx(idir) = pad_feq(ix, box_dim(idir), .true.)
1194 ming(idir) = min(ming(idir), ixx(idir))
1195 maxg(idir) = max(maxg(idir), ixx(idir))
1196 end do
1197 maxg(idir) = min(abs(ming(idir)), maxg(idir))
1198 end do
1200 ! Given the boundaries, we can search the min distance, which gives us the the cutoff energy
1201 dminsq = m_huge
1202 do idir = 1, periodic_dim
1203 idir2 = mod(idir, 3)+1
1204 idir3 = mod(idir+1, 3)+1
1205
1206 ! Negative plane
1207 ixx(idir) = -maxg(idir)
1208 do iy = -maxg(idir2), maxg(idir2)
1209 ixx(idir2) = iy
1210 do iz = -maxg(idir3), maxg(idir3)
1211 ixx(idir3) = iz
1212 call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)
1213 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1214 end do
1215 end do
1216 ! Positive plane
1217 ixx(idir) = maxg(idir)
1218 do iy = -maxg(idir2), maxg(idir2)
1219 ixx(idir2) = iy
1220 do iz = -maxg(idir3), maxg(idir3)
1221 ixx(idir3) = iz
1222 call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)
1223 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1224 end do
1225 end do
1226 end do
1227
1228 ecut = m_half * dminsq
1229
1230 end function fft_get_ecut_from_box
1231
1232#include "undef.F90"
1233#include "real.F90"
1234#include "fft_inc.F90"
1235
1236#include "undef.F90"
1237#include "complex.F90"
1238#include "fft_inc.F90"
1239
1240end module fft_oct_m
1241
1242!! Local Variables:
1243!! mode: f90
1244!! coding: utf-8
1245!! End:
subroutine optimize()
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
double log(double __x) __attribute__((__nothrow__
subroutine, public clfft_print_error(ierr, name)
Definition: accel.F90:1787
pure logical function, public accel_is_enabled()
Definition: accel.F90:400
type(accel_t), public accel
Definition: accel.F90:270
Fast Fourier Transform module. This module provides a single interface that works with different FFT ...
Definition: fft.F90:118
subroutine zfft_forward_accel(fft, in, out)
Definition: fft.F90:1944
subroutine dfft_backward_1d(fft, in, out)
Definition: fft.F90:1748
integer, parameter cufft_z2d
Definition: fft.F90:264
subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
Definition: fft.F90:1159
subroutine, public fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
Definition: fft.F90:417
subroutine, public fft_all_init(namespace)
initialize the table
Definition: fft.F90:277
real(real64) function, public fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq)
Given an fft box (fixed by the real-space grid), it returns the cutoff energy of the sphere that fits...
Definition: fft.F90:1245
subroutine dfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1380
subroutine dfft_forward_accel(fft, in, out)
Definition: fft.F90:1488
subroutine, public fft_end(this)
Definition: fft.F90:995
subroutine, public fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
Definition: fft.F90:1200
real(real64) pure function, public fft_scaling_factor(fft)
This function returns the factor required to normalize a function after a forward and backward transf...
Definition: fft.F90:1223
integer, parameter cufft_z2z
Definition: fft.F90:264
pure integer function, public pad_feq(ii, nn, mode)
convert between array index and G-vector
Definition: fft.F90:1105
subroutine zfft_backward_1d(fft, in, out)
Definition: fft.F90:2204
integer, parameter, public fftlib_accel
Definition: fft.F90:182
subroutine, public fft_all_end()
delete all plans
Definition: fft.F90:390
integer function fft_size(size, factors, parity)
Definition: fft.F90:1130
subroutine zfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:2027
subroutine fft_operation_count(fft)
Definition: fft.F90:1186
subroutine zfft_backward_accel(fft, in, out)
Definition: fft.F90:2141
integer, parameter cufft_c2r
Definition: fft.F90:264
integer, parameter cufft_c2c
Definition: fft.F90:264
integer, parameter, public fft_real
Definition: fft.F90:177
subroutine, public fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: fft.F90:1079
integer, parameter, public fft_complex
Definition: fft.F90:177
integer, parameter, public fftlib_nfft
Definition: fft.F90:182
subroutine dfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:1571
subroutine, public fft_copy(fft_i, fft_o)
Definition: fft.F90:1060
subroutine dfft_forward_1d(fft, in, out)
Definition: fft.F90:1553
integer, parameter cufft_d2z
Definition: fft.F90:264
integer, parameter fft_null
Definition: fft.F90:190
integer, parameter, public fftlib_pnfft
Definition: fft.F90:182
subroutine zfft_forward_1d(fft, in, out)
Definition: fft.F90:2009
subroutine zfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1845
integer, parameter, public fftlib_pfft
Definition: fft.F90:182
subroutine dfft_backward_accel(fft, in, out)
Definition: fft.F90:1685
integer, parameter, public fftlib_fftw
Definition: fft.F90:182
subroutine, public fft_init_stage1(this, namespace, XX, nn)
Some fft-libraries (only NFFT for the moment) need an additional precomputation stage that depends on...
Definition: fft.F90:953
subroutine, public fftw_prepare_plan(plan, dim, n, is_real, sign, flags, din_, cin_, cout_)
Definition: fftw.F90:187
subroutine, public fftw_free_memory(is_real, drs_data, zrs_data, fs_data)
Definition: fftw.F90:344
subroutine, public fftw_get_dims(rs_n, is_real, fs_n)
Definition: fftw.F90:305
subroutine, public fftw_alloc_memory(rs_n, is_real, fs_n, drs_data, zrs_data, fs_data)
Definition: fftw.F90:319
real(real64), parameter, public m_two
Definition: global.F90:189
real(real64), parameter, public m_huge
Definition: global.F90:205
real(real64), parameter, public m_half
Definition: global.F90:193
real(real64), parameter, public m_one
Definition: global.F90:188
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1125
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:543
subroutine, public messages_new_line()
Definition: messages.F90:1146
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:420
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:723
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:624
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
Definition: mpi.F90:136
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:266
subroutine, public nfft_write_info(nfft)
Definition: nfft.F90:323
subroutine, public nfft_end(nfft)
Definition: nfft.F90:386
subroutine, public nfft_init(nfft, nfft_options, N, dim, M, optimize)
Definition: nfft.F90:257
subroutine, public nfft_copy_info(in, out)
Definition: nfft.F90:400
subroutine, public nfft_precompute(nfft, X1, X2, X3)
Definition: nfft.F90:429
subroutine, public nfft_guru_options(nfft, namespace)
Definition: nfft.F90:190
The low level module to work with the PFFT library. http:
Definition: pfft.F90:164
subroutine, public pfft_prepare_plan_c2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:298
subroutine, public pfft_prepare_plan_c2r(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:263
subroutine, public pfft_decompose(n_proc, dim1, dim2)
Decompose all available processors in 2D processor grid, most equally possible.
Definition: pfft.F90:188
subroutine, public pfft_prepare_plan_r2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:229
subroutine, public pfft_get_dims(rs_n_global, mpi_comm, is_real, alloc_size, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: pfft.F90:335
The includes for the PFFT.
Definition: pfft.F90:115
The low level module to work with the PNFFT library. http:
Definition: pnfft.F90:128
subroutine, public pnfft_copy_params(in, out)
Definition: pnfft.F90:304
subroutine, public pnfft_set_sp_nodes(pnfft, namespace, X)
Definition: pnfft.F90:495
subroutine, public pnfft_init_plan(pnfft, pnfft_options, comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart)
Definition: pnfft.F90:362
subroutine, public pnfft_write_info(pnfft)
Definition: pnfft.F90:319
subroutine, public pnfft_guru_options(pnfft, namespace)
Definition: pnfft.F90:202
subroutine, public pnfft_end(pnfft)
Definition: pnfft.F90:471
subroutine, public pnfft_init_procmesh(pnfft, mpi_grp, comm)
Definition: pnfft.F90:268
This module defines the unit system, used for input and output.
type(unit_t), public unit_megabytes
For large amounts of data (natural code units are bytes)
This is defined even when running serial.
Definition: mpi.F90:142
int true(void)