main/doxygen_doc/fft_8F90_source.html

!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch

!! Copyright (C) 2011 J. Alberdi-Rodriguez, P. Garcia Risueño, M. Oliveira

!!

!! This program is free software; you can redistribute it and/or modify

!! it under the terms of the GNU General Public License as published by

!! the Free Software Foundation; either version 2, or (at your option)

!! any later version.

!!

!! This program is distributed in the hope that it will be useful,

!! but WITHOUT ANY WARRANTY; without even the implied warranty of

!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

!! GNU General Public License for more details.

!!

!! You should have received a copy of the GNU General Public License

!! along with this program; if not, write to the Free Software

!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

!! 02110-1301, USA.

!!


#include "global.h"


module fft_oct_m

  use, intrinsic :: iso_c_binding

  use, intrinsic :: ieee_arithmetic


  use accel_oct_m

#ifdef HAVE_OPENCL

  use cl

#ifdef HAVE_CLFFT

  use clfft

#endif

#endif

  use fftw_oct_m

  use fftw_params_oct_m

  use debug_oct_m

  use global_oct_m

  use, intrinsic :: iso_fortran_env

  use lattice_vectors_oct_m

  use lalg_basic_oct_m

  use loct_math_oct_m

  use messages_oct_m

  use mpi_oct_m

  use namespace_oct_m

#ifdef HAVE_NFFT

  use nfft_oct_m

#endif

#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)

  use omp_lib

#endif

  use parser_oct_m

  use pfft_oct_m

  use pfft_params_oct_m

  use pnfft_oct_m

  use profiling_oct_m

  use types_oct_m

  use unit_system_oct_m

  use varinfo_oct_m


  implicit none


  private

  public ::            &

    fft_t,             &

    fft_all_init,      &

    fft_all_end,       &

    fft_init,          &

    fft_init_stage1,   &

    fft_end,           &

    fft_copy,          &

    fft_get_dims,      &

    pad_feq,           &

    dfft_forward,      &

    zfft_forward,      &

    dfft_backward,     &

    zfft_backward,     &

    fft_scaling_factor,&

    fft_gg_transform,  &

    fft_get_ecut_from_box


  integer, public, parameter :: &

    FFT_NONE    = 0,         &

    fft_real    = 1,         &

    fft_complex = 2


  integer, public, parameter :: &

    FFTLIB_NONE  = 0, &

    fftlib_fftw  = 1, &

    fftlib_pfft  = 2, &

    fftlib_accel = 3, &

    fftlib_nfft  = 4, &

    fftlib_pnfft = 5


  integer, parameter :: &

    FFT_MAX  = 10, &

    fft_null = -1


  type fft_t

    private

    integer         :: slot = 0


    integer, public :: type

    integer, public :: library


    type(MPI_Comm)  :: comm

    integer         :: rs_n_global(3)

    integer         :: fs_n_global(3)

    integer         :: rs_n(3)

    integer         :: fs_n(3)

    integer         :: rs_istart(1:3)

    integer         :: fs_istart(1:3)


    integer, public :: stride_rs(1:3)

    integer, public :: stride_fs(1:3)


    type(c_ptr) :: planf

    type(c_ptr) :: planb

    !integer(ptrdiff_t_kind) :: pfft_planf !< PFFT plan for forward transform

    !integer(ptrdiff_t_kind) :: pfft_planb !< PFFT plan for backward transform


    real(real64), pointer, public :: drs_data(:,:,:)

    complex(real64), pointer, public :: zrs_data(:,:,:)

    complex(real64), pointer, public ::  fs_data(:,:,:)

#ifdef HAVE_CLFFT


    type(clfftPlanHandle) :: cl_plan_fw

    type(clfftPlanHandle) :: cl_plan_bw

#endif

    type(c_ptr)           :: cuda_plan_fw

    type(c_ptr)           :: cuda_plan_bw

#ifdef HAVE_NFFT

    type(nfft_t),  public :: nfft

#endif

    type(pnfft_t), public :: pnfft


    logical, public :: aligned_memory

  end type fft_t


  interface dfft_forward

    module procedure dfft_forward_1d, dfft_forward_accel, dfft_forward_3d

  end interface dfft_forward


  interface zfft_forward

    module procedure zfft_forward_1d, zfft_forward_accel, zfft_forward_3d

  end interface zfft_forward


  interface dfft_backward

    module procedure dfft_backward_1d, dfft_backward_accel, dfft_backward_3d

  end interface dfft_backward


  interface zfft_backward

    module procedure zfft_backward_1d, zfft_backward_accel, zfft_backward_3d

  end interface zfft_backward


  logical, save, public :: fft_initialized = .false.

  integer, save         :: fft_refs(FFT_MAX)

  type(fft_t), save     :: fft_array(FFT_MAX)

  logical               :: fft_optimize

  integer, save         :: fft_prepare_plan

  integer, public       :: fft_default_lib = -1

#ifdef HAVE_NFFT

  type(nfft_t), save    :: nfft_options

#endif

  type(pnfft_t), save   :: pnfft_options


  integer, parameter ::  &

    CUFFT_R2C = int(z'2a'),   &

    cufft_c2r = int(z'2c'),   &

    cufft_c2c = int(z'29'),   &

    cufft_d2z = int(z'6a'),   &

    cufft_z2d = int(z'6c'),   &

    cufft_z2z = int(z'69')


contains


  ! ---------------------------------------------------------

  subroutine fft_all_init(namespace)

    type(namespace_t),      intent(in)   :: namespace


    integer :: ii, fft_default

#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)

    integer :: iret

#endif


    push_sub(fft_all_init)


    fft_initialized = .true.


    !%Variable FFTOptimize

    !%Type logical

    !%Default yes

    !%Section Mesh::FFTs

    !%Description

    !% Should <tt>octopus</tt> optimize the FFT dimensions?

    !% This means that the mesh to which FFTs are applied is not taken to be as small

    !% as possible: some points may be added to each direction in order to get a "good number"

    !% for the performance of the FFT algorithm.

    !% The best FFT grid dimensions are given by <math>2^a 3^b 5^c 7^d 11^e 13^f</math>

    !% where <math>a,b,c,d</math> are arbitrary and <math>e,f</math> are 0 or 1.

    !% (<a href=http://www.fftw.org/doc/Complex-DFTs.html>ref</a>).

    !% In some cases, namely when using

    !% the split-operator, or Suzuki-Trotter propagators, this option should be turned off.

    !% For spatial FFTs in periodic directions, the grid is never optimized, but a warning will

    !% be written if the number is not good, with a suggestion of a better one to use, so you

    !% can try a different spacing if you want to get a good number.

    !%End

    call parse_variable(namespace, 'FFTOptimize', .true., fft_optimize)

    do ii = 1, fft_max

      fft_refs(ii) = fft_null

    end do


    !%Variable FFTPreparePlan

    !%Type integer

    !%Default fftw_measure

    !%Section Mesh::FFTs

    !%Description

    !% The FFTs are performed in octopus with the help of <a href=http://www.fftw.org>FFTW</a> and similar packages.

    !% Before doing the actual computations, this package prepares a "plan", which means that

    !% the precise numerical strategy to be followed to compute the FFT is machine/compiler-dependent,

    !% and therefore the software attempts to figure out which is this precise strategy (see the

    !% FFTW documentation for details). This plan preparation, which has to be done for each particular

    !% FFT shape, can be done exhaustively and carefully (slow), or merely estimated. Since this is

    !% a rather critical numerical step, by default it is done carefully, which implies a longer initial

    !% initialization, but faster subsequent computations. You can change this behaviour by changing

    !% this <tt>FFTPreparePlan</tt> variable, and in this way you can force FFTW to do a fast guess or

    !% estimation of which is the best way to perform the FFT.

    !%Option fftw_measure 0

    !% This plan implies a longer initialization, but involves a more careful analysis

    !% of the strategy to follow, and therefore more efficient FFTs. A side effect of the runtime

    !% choices is that this plan can introduce slight numerical fluctuations between runs.

    !%Option fftw_estimate 64

    !% This is the "fast initialization" scheme, in which the plan is merely guessed from "reasonable"

    !% assumptions. This is the default option, as it guarantees stable results

    !%Option fftw_patient 32

    !% It is like fftw_measure, but considers a wider range of algorithms and often produces a

    !% "more optimal" plan (especially for large transforms), but at the expense of several times

    !% longer planning time (especially for large transforms).

    !%Option fftw_exhaustive 8

    !% It is like fftw_patient, but considers an even wider range of algorithms,

    !% including many that we think are unlikely to be fast, to produce the most optimal

    !%  plan but with a substantially increased planning time.

    !%End

    call parse_variable(namespace, 'FFTPreparePlan', fftw_estimate, fft_prepare_plan)

    if (.not. varinfo_valid_option('FFTPreparePlan', fft_prepare_plan)) then

      call messages_input_error(namespace, 'FFTPreparePlan')

    end if


    !%Variable FFTLibrary

    !%Type integer

    !%Section Mesh::FFTs

    !%Default fftw

    !%Description

    !% (experimental) You can select the FFT library to use.

    !%Option fftw 1

    !% Uses FFTW3 library.

    !%Option pfft 2

    !% (experimental) Uses PFFT library, which has to be linked.

    !%Option accel 3

    !% Uses a GPU accelerated library. This only

    !% works if Octopus was compiled with HIP, CUDA, or OpenCL support.

    !%End

    fft_default = fftlib_fftw

    if(accel_is_enabled()) then

      fft_default = fftlib_accel

    end if

    call parse_variable(namespace, 'FFTLibrary', fft_default, fft_default_lib)


    if (fft_default_lib == fftlib_accel) then

#if ! (defined(HAVE_CLFFT) || defined(HAVE_CUDA))

      call messages_write('You have selected the Accelerated FFT, but Octopus was compiled', new_line = .true.)

      call messages_write('without clfft (OpenCL) or Cuda support.')

      call messages_fatal()

#endif

      if (.not. accel_is_enabled()) then

        call messages_write('You have selected the accelerated FFT, but acceleration is disabled.')

        call messages_fatal()

      end if

    end if


#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)

    if (omp_get_max_threads() > 1) then


      call messages_write('Info: Initializing Multi-threaded FFTW')

      call messages_info()


      iret = fftw_init_threads()

      if (iret == 0) then

        call messages_write('Initialization of FFTW3 threads failed.')

        call messages_fatal()

      end if

      call fftw_plan_with_nthreads(omp_get_max_threads())


    end if

#endif

#ifdef HAVE_NFFT

    call nfft_guru_options(nfft_options, namespace)

#endif

    call pnfft_guru_options(pnfft_options, namespace)


    pop_sub(fft_all_init)

  end subroutine fft_all_init


  ! ---------------------------------------------------------

  subroutine fft_all_end()

    integer :: ii


    push_sub(fft_all_end)


    do ii = 1, fft_max

      if (fft_refs(ii) /= fft_null) then

        call fft_end(fft_array(ii))

      end if

    end do


#ifdef HAVE_PFFT

    call pfft_cleanup()

#endif


#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)

    call fftw_cleanup_threads()

#else

    call fftw_cleanup()

#endif


    fft_initialized = .false.


    pop_sub(fft_all_end)

  end subroutine fft_all_end


  ! ---------------------------------------------------------

  subroutine fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)

    type(fft_t),       intent(inout) :: this

    integer,           intent(inout) :: nn(3)

    integer,           intent(in)    :: dim

    integer,           intent(in)    :: type

    integer,           intent(in)    :: library

    logical,           intent(in)    :: optimize(3)

    integer,           intent(in)    :: optimize_parity(3)

    type(mpi_comm), optional, intent(out) :: comm

    type(mpi_grp_t), optional, intent(in) :: mpi_grp

    logical, optional                :: use_aligned


    integer :: ii, jj, fft_dim, idir, column_size, row_size, n3

    integer :: n_1, n_2, n_3, nn_temp(3)

    integer :: library_

    type(mpi_grp_t) :: mpi_grp_

    integer(int64) :: number_points, alloc_size


#ifdef HAVE_CLFFT

    real(real64) :: scale

    integer :: status

#endif

#ifdef HAVE_PFFT

    integer :: ierror

#endif


    push_sub(fft_init)


    assert(fft_initialized)


    assert(type == fft_real .or. type == fft_complex)


    mpi_grp_ = mpi_world

    if (present(mpi_grp)) mpi_grp_ = mpi_grp


    this%aligned_memory = optional_default(use_aligned, .false.)


    ! First, figure out the dimensionality of the FFT.

    fft_dim = 0

    do ii = 1, dim

      if (nn(ii) <= 1) exit

      fft_dim = fft_dim + 1

    end do


    if (fft_dim == 0) then

      message(1) = "Internal error in fft_init: apparently, a 1x1x1 FFT is required."

      call messages_fatal(1)

    end if


    if (fft_dim > 3) call messages_not_implemented('FFT for dimension > 3')


    library_ = library

    nn_temp(1:fft_dim) = nn(1:fft_dim)


    select case (library_)

    case (fftlib_accel)

      ! FFT optimization

      if(any(optimize_parity(1:fft_dim) > 1)) then

        message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."

        call messages_fatal(1)

      end if


      do ii = 1, fft_dim

        nn_temp(ii) = fft_size(nn(ii), (/2, 3, 5, 7/), optimize_parity(ii))

        if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)

      end do


      ! if we can't optimize, in some cases we can't use the library

      if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then

#ifdef HAVE_CLFFT

        call messages_write('Invalid grid size for accel fft. FFTW will be used instead.')

        call messages_warning()

        library_ = fftlib_fftw

#endif

      end if


    case (fftlib_nfft)


      do ii = 1, fft_dim

        !NFFT likes even grids

        !The underlying FFT grids are optimized inside the nfft_init routine

        if (int(nn(ii)/2)*2 /= nn(ii) .and. (fft_optimize .and. optimize(ii)))&

          nn(ii)=nn(ii)+1

      end do


    case (fftlib_pnfft)


      do ii = 1, fft_dim

        !also PNFFT likes even grids

        if (int(nn(ii)/2)*2 /= nn(ii)) nn(ii) = nn(ii) + 1

      end do


      if (fft_dim < 3) then

        call messages_not_implemented('PNFFT support for dimension < 3')

      end if


    case default


      if (fft_dim < 3 .and. library_ == fftlib_pfft) then

        call messages_not_implemented('PFFT support for dimension < 3')

      end if


      ! FFT optimization

      if (any(optimize_parity(1:fft_dim) > 1)) then

        message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."

        call messages_fatal(1)

      end if


      do ii = 1, fft_dim

        call loct_fft_optimize(nn_temp(ii), optimize_parity(ii))

        if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)

      end do


    end select


    ! find out if fft has already been allocated

    jj = 0

    do ii = fft_max, 1, -1

      if (fft_refs(ii) /= fft_null) then

        if (all(nn(1:dim) == fft_array(ii)%rs_n_global(1:dim)) .and. type == fft_array(ii)%type &

          .and. library_ == fft_array(ii)%library .and. library_ /= fftlib_nfft &

          .and. library_ /= fftlib_pnfft &

          .and. this%aligned_memory .eqv. fft_array(ii)%aligned_memory) then


          ! NFFT and PNFFT plans are always allocated from scratch since they

          ! are very likely to be different

          this = fft_array(ii)              ! return a copy

          fft_refs(ii) = fft_refs(ii) + 1  ! increment the ref count

          if (present(comm)) comm = fft_array(ii)%comm ! also return the MPI communicator

          pop_sub(fft_init)

          return

        end if

      else

        jj = ii

      end if

    end do


    if (jj == 0) then

      message(1) = "Not enough slots for FFTs."

      message(2) = "Please increase FFT_MAX in fft.F90 and recompile."

      call messages_fatal(2)

    end if


    ! jj now contains an empty slot

    fft_refs(jj) = 1

    fft_array(jj)%slot     = jj

    fft_array(jj)%type     = type

    fft_array(jj)%library  = library_

    fft_array(jj)%rs_n_global(1:dim) = nn(1:dim)

    fft_array(jj)%rs_n_global(dim+1:) = 1

    nullify(fft_array(jj)%drs_data)

    nullify(fft_array(jj)%zrs_data)

    nullify(fft_array(jj)%fs_data)


    fft_array(jj)%aligned_memory = this%aligned_memory


    ! Initialize parallel communicator

    select case (library_)

    case (fftlib_pfft)

#ifdef HAVE_PFFT

      call pfft_init()


      call pfft_decompose(mpi_grp_%size, column_size, row_size)


      ierror = pfft_create_procmesh_2d(mpi_grp_%comm%MPI_VAL, column_size, row_size, fft_array(jj)%comm%MPI_VAL)


      if (ierror /= 0) then

        message(1) = "The number of rows and columns in PFFT processor grid is not equal to "

        message(2) = "the number of processor in the MPI communicator."

        message(3) = "Please check it."

        call messages_fatal(3)

      end if

#endif


    case (fftlib_pnfft)

#ifdef HAVE_PNFFT

      call pnfft_init_procmesh(fft_array(jj)%pnfft, mpi_grp_, fft_array(jj)%comm)

#endif

    case default

      fft_array(jj)%comm = mpi_comm_undefined


    end select


    if (present(comm)) comm = fft_array(jj)%comm


    ! Get dimentions of arrays

    select case (library_)

    case (fftlib_fftw)

      call fftw_get_dims(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global)

      fft_array(jj)%rs_n = fft_array(jj)%rs_n_global

      fft_array(jj)%fs_n = fft_array(jj)%fs_n_global

      fft_array(jj)%rs_istart = 1

      fft_array(jj)%fs_istart = 1


      if (this%aligned_memory) then

        call fftw_alloc_memory(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global, &

          fft_array(jj)%drs_data, fft_array(jj)%zrs_data, fft_array(jj)%fs_data)

      end if


    case (fftlib_pfft)

#ifdef HAVE_PFFT

      call pfft_get_dims(fft_array(jj)%rs_n_global, comm%MPI_VAL, type == fft_real, &

        alloc_size, fft_array(jj)%fs_n_global, fft_array(jj)%rs_n, &

        fft_array(jj)%fs_n, fft_array(jj)%rs_istart, fft_array(jj)%fs_istart)

      !write(*,"(6(A,3I4,/),A,I10,/)") "PFFT: rs_n_global = ",fft_array(jj)%rs_n_global,&

      !  "fs_n_global = ",fft_array(jj)%fs_n_global,&

      !  "rs_n        = ",fft_array(jj)%rs_n,&

      !  "fs_n        = ",fft_array(jj)%fs_n,&

      !  "rs_istart   = ",fft_array(jj)%rs_istart,&

      !  "fs_istart   = ",fft_array(jj)%fs_istart,&

      !  "alloc_size  = ",alloc_size

#endif


      ! Allocate memory. Note that PFFT may need extra memory space

      ! and that in fourier space the function will be transposed

      if (type == fft_real) then

        n_1 = max(1, fft_array(jj)%rs_n(1))

        n_2 = max(1, fft_array(jj)%rs_n(2))

        n_3 = max(1, fft_array(jj)%rs_n(3))


        n3 = ceiling(real(2*alloc_size)/real(n_1*n_2))

        safe_allocate(fft_array(jj)%drs_data(1:n_1, 1:n_2, 1:n3))

      else

        n3 = ceiling(real(alloc_size)/real(fft_array(jj)%rs_n(1)*fft_array(jj)%rs_n(2)))

        safe_allocate(fft_array(jj)%zrs_data(1:fft_array(jj)%rs_n(1), 1:fft_array(jj)%rs_n(2), 1:n3))

      end if


      n_1 = max(1, fft_array(jj)%fs_n(1))

      n_2 = max(1, fft_array(jj)%fs_n(2))

      n_3 = max(1, fft_array(jj)%fs_n(3))


      n3 = ceiling(real(alloc_size)/real(n_3*n_1))

      safe_allocate(fft_array(jj)%fs_data(1:n_3, 1:n_1, 1:n3))


    case (fftlib_accel)

      call fftw_get_dims(fft_array(jj)%rs_n_global, (type == fft_real), fft_array(jj)%fs_n_global)

      fft_array(jj)%rs_n = fft_array(jj)%rs_n_global

      fft_array(jj)%fs_n = fft_array(jj)%fs_n_global

      fft_array(jj)%rs_istart = 1

      fft_array(jj)%fs_istart = 1


    case (fftlib_nfft)

      fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global

      fft_array(jj)%rs_n = fft_array(jj)%rs_n_global

      fft_array(jj)%fs_n = fft_array(jj)%fs_n_global

      fft_array(jj)%rs_istart = 1

      fft_array(jj)%fs_istart = 1


    case (fftlib_pnfft)

      fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global

      fft_array(jj)%rs_n = fft_array(jj)%rs_n_global

      fft_array(jj)%fs_n = fft_array(jj)%fs_n_global

      fft_array(jj)%rs_istart = 1

      fft_array(jj)%fs_istart = 1

      ! indices partition is performed together with the plan preparation


    end select


    ! Prepare plans

    select case (library_)

    case (fftlib_fftw)

      if (.not. this%aligned_memory) then

        call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &

          type == fft_real, fftw_forward, fft_prepare_plan+fftw_unaligned)

        call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &

          type == fft_real, fftw_backward, fft_prepare_plan+fftw_unaligned)

      else

        if (type == fft_real) then

          call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &

            type == fft_real, fftw_forward, fft_prepare_plan, &

            din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)

          call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &

            type == fft_real, fftw_backward, fft_prepare_plan, &

            din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)

        else

          call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &

            type == fft_real, fftw_forward, fft_prepare_plan, &

            cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)

          call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &

            type == fft_real, fftw_backward, fft_prepare_plan, &

            cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)

        end if

      end if


    case (fftlib_nfft)

#ifdef HAVE_NFFT

      call nfft_copy_info(this%nfft,fft_array(jj)%nfft) !copy default parameters set in the calling routine

      call nfft_init(fft_array(jj)%nfft, nfft_options, fft_array(jj)%rs_n_global, &

        fft_dim, fft_array(jj)%rs_n_global, optimize = .true.)

#endif

    case (fftlib_pfft)

#ifdef HAVE_PFFT

      if (type == fft_real) then

        call pfft_prepare_plan_r2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%drs_data, &

          fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)

        call pfft_prepare_plan_c2r(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &

          fft_array(jj)%drs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)

      else

        call pfft_prepare_plan_c2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%zrs_data, &

          fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)

        call pfft_prepare_plan_c2c(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &

          fft_array(jj)%zrs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)

      end if

#endif

    case (fftlib_pnfft)

#ifdef HAVE_PNFFT

      call pnfft_copy_params(this%pnfft, fft_array(jj)%pnfft) ! pass default parameters like in NFFT


      ! NOTE:

      ! PNFFT (likewise NFFT) breaks the symmetry between real space and Fourier space

      ! by allowing the possibility to have an unstructured grid in rs and by

      ! using different parallelizations (the rs is transposed w.r.t. fs).

      ! Octopus, in fourier_space_m, uses the convention for which the mapping

      ! between rs and fs is done with a forward transform (and fs->rs with backward).

      ! This is exactly the opposite of the definitions used by all the libraries

      ! performing FFTs (PNFFT and NFFT included) [see e.g. M. Frigo, and S. G. Johnson, Proc.

      ! IEEE 93, 216-231 (2005)].

      ! While this leads to no problem on ordinary ffts where fs and rs can be exchanged

      ! it does makes a fundamental difference for PNFFT (for some reason I don`t know NFFT

      ! is still symmetric).

      ! Therefore, in order to perform rs->fs tranforms with PNFFT one should use the

      ! backward transform.


      call pnfft_init_plan(fft_array(jj)%pnfft, pnfft_options, comm, fft_array(jj)%fs_n_global, &

        fft_array(jj)%fs_n, fft_array(jj)%fs_istart, fft_array(jj)%rs_n, fft_array(jj)%rs_istart)

#endif

    case (fftlib_accel)


      fft_array(jj)%stride_rs(1) = 1

      fft_array(jj)%stride_fs(1) = 1

      do ii = 2, fft_dim

        fft_array(jj)%stride_rs(ii) = fft_array(jj)%stride_rs(ii - 1)*fft_array(jj)%rs_n(ii - 1)

        fft_array(jj)%stride_fs(ii) = fft_array(jj)%stride_fs(ii - 1)*fft_array(jj)%fs_n(ii - 1)

      end do


#ifdef HAVE_CUDA

      if (type == fft_real) then

        call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &

          fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_d2z, &

          accel%cuda_stream)

        call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &

          fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2d, &

          accel%cuda_stream)

      else

        call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &

          fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &

          accel%cuda_stream)

        call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &

          fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &

          accel%cuda_stream)

      end if

#endif


#ifdef HAVE_CLFFT


      ! create the plans

      call clfftcreatedefaultplan(fft_array(jj)%cl_plan_fw, accel%context%cl_context, &

        fft_dim, int(fft_array(jj)%rs_n_global, int64), status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftCreateDefaultPlan')


      call clfftcreatedefaultplan(fft_array(jj)%cl_plan_bw, accel%context%cl_context, &

        fft_dim, int(fft_array(jj)%rs_n_global, int64), status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftCreateDefaultPlan')


      ! set precision


      call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')


      call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')


      ! set number of transforms to 1


      call clfftsetplanbatchsize(fft_array(jj)%cl_plan_fw, 1_real64, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanBatchSize')


      call clfftsetplanbatchsize(fft_array(jj)%cl_plan_bw, 1_real64, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanBatchSize')


      ! set the type precision to double


      call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')


      call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanPrecision')


      ! set the layout


      if (type == fft_real) then


        call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_real, clfft_hermitian_interleaved, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')


        call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_hermitian_interleaved, clfft_real, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')


      else


        call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_complex_interleaved, clfft_complex_interleaved, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')


        call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_complex_interleaved, clfft_complex_interleaved, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetLayout')


      end if


      ! set the plans as at out of place


      call clfftsetresultlocation(fft_array(jj)%cl_plan_fw, clfft_outofplace, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetResultLocation')


      call clfftsetresultlocation(fft_array(jj)%cl_plan_bw, clfft_outofplace, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetResultLocation')


      ! the strides


      call clfftsetplaninstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanInStride')


      call clfftsetplanoutstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanOutStride')


      call clfftsetplaninstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanInStride')


      call clfftsetplanoutstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanOutStride')


      ! set the scaling factors


      scale = 1.0_real64/(product(real(fft_array(jj)%rs_n_global(1:fft_dim), real64)))


      call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_forward, 1.0_real64, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')


      call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_backward, scale, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')


      if (type == fft_real) then


        call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, 1.0_real64, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')


        call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, scale, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')


      else


        call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, scale, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')


        call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, 1.0_real64, status)

        if (status /= clfft_success) call clfft_print_error(status, 'clfftSetPlanScale')


      end if


      ! now 'bake' the plans, this signals that the plans are ready to use


      call clfftbakeplan(fft_array(jj)%cl_plan_fw, accel%command_queue, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftBakePlan')


      call clfftbakeplan(fft_array(jj)%cl_plan_bw, accel%command_queue, status)

      if (status /= clfft_success) call clfft_print_error(status, 'clfftBakePlan')


#endif


    case default

      call messages_write('Invalid FFT library.')

      call messages_fatal()

    end select


    this = fft_array(jj)


    ! Write information

    if (.not. (library_ == fftlib_nfft .or. library_ == fftlib_pnfft)) then

      call messages_write('Info: FFT grid dimensions       =')

      number_points = 1

      do idir = 1, dim

        call messages_write(fft_array(jj)%rs_n_global(idir))

        if (idir < dim) call messages_write(" x ")

        ! do the multiplication in a integer(int64) to avoid overflow for large grids

        number_points = number_points * fft_array(jj)%rs_n_global(idir)

      end do

      call messages_new_line()


      call messages_write('      Total grid size           =')

      call messages_write(number_points)

      call messages_write(' (')

      call messages_write(number_points*8.0_real64, units = unit_megabytes, fmt = '(f9.1)')

      call messages_write(' )')

      if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then

        call messages_new_line()

        call messages_write('      Inefficient FFT grid. A better grid would be: ')

        do idir = 1, fft_dim

          call messages_write(nn_temp(idir))

        end do

      end if

      call messages_info()

    end if


    select case (library_)

    case (fftlib_pfft)

      write(message(1),'(a)') "Info: FFT library = PFFT"

      write(message(2),'(a)') "Info: PFFT processor grid"

      write(message(3),'(a, i9)') " No. of processors                = ", mpi_grp_%size

      write(message(4),'(a, i9)') " No. of columns in the proc. grid = ", column_size

      write(message(5),'(a, i9)') " No. of rows    in the proc. grid = ", row_size

      write(message(6),'(a, i9)') " The size of integer is = ", c_intptr_t

      call messages_info(6)


    case (fftlib_pnfft)

#ifdef HAVE_PNFFT

      call messages_write("Info: FFT library = PNFFT")

      call messages_info()

      call pnfft_write_info(fft_array(jj)%pnfft)

#endif

    case (fftlib_nfft)

#ifdef HAVE_NFFT

      call messages_write("Info: FFT library = NFFT")

      call messages_info()

      call nfft_write_info(fft_array(jj)%nfft)

#endif

    end select


    pop_sub(fft_init)

  end subroutine fft_init


  ! ---------------------------------------------------------

  subroutine fft_init_stage1(this, namespace, XX, nn)

    type(fft_t),       intent(inout) :: this

    type(namespace_t), intent(in)    :: namespace

    real(real64),      intent(in)    :: xx(:,:)

    integer, optional, intent(in)    :: nn(:)


    integer :: slot


    push_sub(fft_init_stage1)


    assert(size(xx,2) == 3)


    slot = this%slot

    select case (fft_array(slot)%library)

    case (fftlib_fftw)

      !Do nothing

    case (fftlib_nfft)

#ifdef HAVE_NFFT

      assert(present(nn))

      call nfft_precompute(fft_array(slot)%nfft, &

        xx(1:nn(1),1), xx(1:nn(2),2), xx(1:nn(3),3))

#endif

    case (fftlib_pfft)

      !Do nothing

    case (fftlib_accel)

      !Do nothing

    case (fftlib_pnfft)

#ifdef HAVE_PNFFT

      call pnfft_set_sp_nodes(fft_array(slot)%pnfft, namespace, xx)

#endif

    case default

      call messages_write('Invalid FFT library.')

      call messages_fatal()

    end select


    pop_sub(fft_init_stage1)

  end subroutine fft_init_stage1

  ! ---------------------------------------------------------

  subroutine fft_end(this)

    type(fft_t), intent(inout) :: this


    integer :: ii

#ifdef HAVE_CLFFT

    integer :: status

#endif


    push_sub(fft_end)


    ii = this%slot

    if (fft_refs(ii) == fft_null) then

      message(1) = "Trying to deallocate FFT that has not been allocated."

      call messages_warning(1)

    else

      if (fft_refs(ii) > 1) then

        fft_refs(ii) = fft_refs(ii) - 1

      else

        select case (fft_array(ii)%library)

        case (fftlib_fftw)

          call fftw_destroy_plan(fft_array(ii)%planf)

          call fftw_destroy_plan(fft_array(ii)%planb)


          if (this%aligned_memory) then

            call fftw_free_memory(this%type == fft_real, &

              fft_array(ii)%drs_data, fft_array(ii)%zrs_data, fft_array(ii)%fs_data)

          end if


        case (fftlib_pfft)

#ifdef HAVE_PFFT

          call pfft_destroy_plan(fft_array(ii)%planf)

          call pfft_destroy_plan(fft_array(ii)%planb)

#endif

          safe_deallocate_p(fft_array(ii)%drs_data)

          safe_deallocate_p(fft_array(ii)%zrs_data)

          safe_deallocate_p(fft_array(ii)%fs_data)


        case (fftlib_accel)

#ifdef HAVE_CUDA

          call cuda_fft_destroy(fft_array(ii)%cuda_plan_fw)

          call cuda_fft_destroy(fft_array(ii)%cuda_plan_bw)

#endif

#ifdef HAVE_CLFFT

          call clfftdestroyplan(fft_array(ii)%cl_plan_fw, status)

          call clfftdestroyplan(fft_array(ii)%cl_plan_bw, status)

#endif


        case (fftlib_nfft)

#ifdef HAVE_NFFT

          call nfft_end(fft_array(ii)%nfft)

#endif

        case (fftlib_pnfft)

#ifdef HAVE_PNFFT

          call pnfft_end(fft_array(ii)%pnfft)

#endif

        end select

        fft_refs(ii) = fft_null

      end if

    end if

    this%slot = 0


    pop_sub(fft_end)

  end subroutine fft_end


  ! ---------------------------------------------------------

  subroutine fft_copy(fft_i, fft_o)

    type(fft_t), intent(in)    :: fft_i

    type(fft_t), intent(inout) :: fft_o


    push_sub(fft_copy)


    if (fft_o%slot > 0) then

      call fft_end(fft_o)

    end if

    assert(fft_i%slot >= 1.and.fft_i%slot <= fft_max)

    assert(fft_refs(fft_i%slot) > 0)


    fft_o = fft_i

    fft_refs(fft_i%slot) = fft_refs(fft_i%slot) + 1


    pop_sub(fft_copy)

  end subroutine fft_copy


  ! ---------------------------------------------------------

  subroutine fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)

    type(fft_t), intent(in)  :: fft

    integer,     intent(out) :: rs_n_global(1:3)

    integer,     intent(out) :: fs_n_global(1:3)

    integer,     intent(out) :: rs_n(1:3)

    integer,     intent(out) :: fs_n(1:3)

    integer,     intent(out) :: rs_istart(1:3)

    integer,     intent(out) :: fs_istart(1:3)


    integer :: slot


    push_sub(fft_get_dims)


    slot = fft%slot

    rs_n_global(1:3) = fft_array(slot)%rs_n_global(1:3)

    fs_n_global(1:3) = fft_array(slot)%fs_n_global(1:3)

    rs_n(1:3) = fft_array(slot)%rs_n(1:3)

    fs_n(1:3) = fft_array(slot)%fs_n(1:3)

    rs_istart(1:3) = fft_array(slot)%rs_istart(1:3)

    fs_istart(1:3) = fft_array(slot)%fs_istart(1:3)


    pop_sub(fft_get_dims)

  end subroutine fft_get_dims


  ! ---------------------------------------------------------

  pure function pad_feq(ii, nn, mode)

    integer, intent(in) :: ii,nn

    logical, intent(in) :: mode

    integer :: pad_feq


    ! no push_sub: called too frequently


    if (mode) then      ! index to frequency number

      if (ii <= nn/2 + 1) then

        pad_feq = ii - 1

      else

        pad_feq = ii - nn - 1

      end if

    else

      if (ii >= 0) then

        pad_feq = ii + 1

      else

        pad_feq = ii + nn + 1

      end if

    end if


  end function pad_feq


  ! -------------------------------------------------------


  integer function fft_size(size, factors, parity)

    integer, intent(in) :: size

    integer, intent(in) :: factors(:)

    integer, intent(in) :: parity


    integer :: nfactors

    integer :: nondiv

    integer, allocatable :: exponents(:)


    push_sub(fft_size)


    nfactors = ubound(factors, dim = 1)


    safe_allocate(exponents(1:nfactors))


    fft_size = size

    do

      call get_exponents(fft_size, nfactors, factors, exponents, nondiv)

      if (nondiv == 1 .and. mod(fft_size, 2) == parity) exit

      fft_size = fft_size + 1

    end do


    safe_deallocate_a(exponents)


    pop_sub(fft_size)

  end function fft_size


  ! -------------------------------------------------------


  subroutine get_exponents(num, nfactors, factors, exponents, nondiv)

    integer, intent(in)  :: num

    integer, intent(in)  :: nfactors

    integer, intent(in)  :: factors(:)

    integer, intent(out) :: exponents(:)

    integer, intent(out) :: nondiv


    integer :: ifactor


    push_sub(get_exponents)


    nondiv = num

    do ifactor = 1, nfactors

      exponents(ifactor) = 0

      do

        if (mod(nondiv, factors(ifactor)) /= 0) exit

        nondiv = nondiv/factors(ifactor)

        exponents(ifactor) = exponents(ifactor) + 1

      end do

    end do


    pop_sub(get_exponents)

  end subroutine get_exponents


  ! ----------------------------------------------------------


  subroutine fft_operation_count(fft)

    type(fft_t), intent(in)  :: fft


    real(real64) :: fullsize


    push_sub(fft_operation_count)


    fullsize = product(real(fft%fs_n(1:3), real64))

    call profiling_count_operations(5.0_real64*fullsize*log(fullsize)/log(m_two))


    pop_sub(fft_operation_count)

  end subroutine fft_operation_count


  !-----------------------------------------------------------------

  subroutine fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)

    integer,                 intent(in)    :: gg_in(:)

    real(real64),            intent(in)    :: temp(:)

    integer,                 intent(in)    :: periodic_dim

    type(lattice_vectors_t), intent(in)    :: latt

    real(real64),            intent(in)    :: qq(:)

    real(real64),            intent(inout) :: gg(:)

    real(real64),            intent(out)   :: modg2


    ! no PUSH_SUB, called too frequently


    gg(1:3) = gg_in(1:3)

    gg(1:periodic_dim) = gg(1:periodic_dim) + qq(1:periodic_dim)

    gg(1:3) = gg(1:3) * temp(1:3)

    gg(1:3) = matmul(latt%klattice_primitive(1:3,1:3),gg(1:3))

    modg2 = sum(gg(1:3)**2)


  end subroutine fft_gg_transform


  ! ----------------------------------------------------------


  real(real64) pure function fft_scaling_factor(fft) result(scaling_factor)

    type(fft_t), intent(in)  :: fft


    ! for the moment this factor is handled by the backwards transform for most libraries

    scaling_factor = m_one


    select case (fft_array(fft%slot)%library)

    case (fftlib_accel)

#ifdef HAVE_CUDA

      scaling_factor = m_one/real(fft_array(fft%slot)%rs_n_global(1), real64)

      scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(2), real64)

      scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(3), real64)

#endif

    end select


  end function fft_scaling_factor


  ! ----------------------------------------------------------

  !

  ! Inspired by the routine bounds from Abinit

  real(real64) function fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq) result(ecut)

    integer,                 intent(in) :: box_dim(:)

    integer,                 intent(in) :: fs_istart(:)

    type(lattice_vectors_t), intent(in) :: latt

    real(real64),            intent(in) :: gspacing(:)

    integer,                 intent(in) :: periodic_dim

    real(real64),            intent(in) :: qq(:)


    integer :: lx, ix, iy, iz, idir, idir2, idir3

    real(real64) :: dminsq, gg(3), modg2

    integer :: box_dim_(3), ixx(3)

    integer :: ming(3), maxg(3)


    ! no PUSH_SUB, called too frequently


    assert(periodic_dim > 0)


    box_dim_(1:periodic_dim) = box_dim(1:periodic_dim)

    if (periodic_dim < 3) box_dim_(periodic_dim+1:3) = 1


    ! We first need to remove asymetric planes for the case of even FFT grids

    ming = 1

    maxg = 1

    do idir = 1, periodic_dim

      do lx = 1, box_dim(idir)

        ix = fs_istart(idir) + lx - 1

        ixx(idir) = pad_feq(ix, box_dim(idir), .true.)

        ming(idir) = min(ming(idir), ixx(idir))

        maxg(idir) = max(maxg(idir), ixx(idir))

      end do

      maxg(idir) = min(abs(ming(idir)), maxg(idir))

    end do


    ! Given the boundaries, we can search the min distance, which gives us the the cutoff energy

    dminsq = m_huge

    do idir = 1, periodic_dim

      idir2 = mod(idir, 3)+1

      idir3 = mod(idir+1, 3)+1


      ! Negative plane

      ixx(idir) = -maxg(idir)

      do iy = -maxg(idir2), maxg(idir2)

        ixx(idir2) = iy

        do iz = -maxg(idir3), maxg(idir3)

          ixx(idir3) = iz

          call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)

          dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))

        end do

      end do

      ! Positive plane

      ixx(idir) = maxg(idir)

      do iy = -maxg(idir2), maxg(idir2)

        ixx(idir2) = iy

        do iz = -maxg(idir3), maxg(idir3)

          ixx(idir3) = iz

          call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)

          dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))

        end do

      end do

    end do


    ecut = m_half * dminsq


  end function fft_get_ecut_from_box


#include "undef.F90"

#include "real.F90"

#include "fft_inc.F90"


#include "undef.F90"

#include "complex.F90"

#include "fft_inc.F90"


end module fft_oct_m


!! Local Variables:

!! mode: f90

!! coding: utf-8

!! End:

optimize
subroutine optimize()
Definition: curv_modine.F90:296

mode
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that

fft_oct_m::dfft_backward
Definition: fft.F90:246

fft_oct_m::dfft_forward
Definition: fft.F90:238

fft_oct_m::zfft_backward
Definition: fft.F90:250

fft_oct_m::zfft_forward
Definition: fft.F90:242

global_oct_m::optional_default
Definition: global.F90:270

loct_math_oct_m::loct_fft_optimize
Definition: loct_math.F90:366

messages_oct_m::messages_write
Definition: messages.F90:188

parser_oct_m::parse_variable
Definition: parser.F90:262

profiling_oct_m::profiling_count_operations
Definition: profiling.F90:200

varinfo_oct_m::varinfo_valid_option
Definition: varinfo.F90:132

log
double log(double __x) __attribute__((__nothrow__

accel_oct_m
Definition: accel.F90:114

accel_oct_m::clfft_print_error
subroutine, public clfft_print_error(ierr, name)
Definition: accel.F90:1898

accel_oct_m::accel_is_enabled
pure logical function, public accel_is_enabled()
Definition: accel.F90:427

accel_oct_m::accel
type(accel_t), public accel
Definition: accel.F90:274

debug_oct_m
Definition: debug.F90:114

fft_oct_m
Fast Fourier Transform module. This module provides a single interface that works with different FFT ...
Definition: fft.F90:118

fft_oct_m::zfft_forward_accel
subroutine zfft_forward_accel(fft, in, out)
Definition: fft.F90:1945

fft_oct_m::dfft_backward_1d
subroutine dfft_backward_1d(fft, in, out)
Definition: fft.F90:1749

fft_oct_m::cufft_z2d
integer, parameter cufft_z2d
Definition: fft.F90:265

fft_oct_m::get_exponents
subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
Definition: fft.F90:1160

fft_oct_m::fft_init
subroutine, public fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
Definition: fft.F90:418

fft_oct_m::fft_all_init
subroutine, public fft_all_init(namespace)
initialize the table
Definition: fft.F90:278

fft_oct_m::fft_get_ecut_from_box
real(real64) function, public fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq)
Given an fft box (fixed by the real-space grid), it returns the cutoff energy of the sphere that fits...
Definition: fft.F90:1246

fft_oct_m::dfft_forward_3d
subroutine dfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1381

fft_oct_m::dfft_forward_accel
subroutine dfft_forward_accel(fft, in, out)
Definition: fft.F90:1489

fft_oct_m::fft_end
subroutine, public fft_end(this)
Definition: fft.F90:996

fft_oct_m::fft_gg_transform
subroutine, public fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
Definition: fft.F90:1201

fft_oct_m::fft_scaling_factor
real(real64) pure function, public fft_scaling_factor(fft)
This function returns the factor required to normalize a function after a forward and backward transf...
Definition: fft.F90:1224

fft_oct_m::cufft_z2z
integer, parameter cufft_z2z
Definition: fft.F90:265

fft_oct_m::pad_feq
pure integer function, public pad_feq(ii, nn, mode)
convert between array index and G-vector
Definition: fft.F90:1106

fft_oct_m::zfft_backward_1d
subroutine zfft_backward_1d(fft, in, out)
Definition: fft.F90:2205

fft_oct_m::fftlib_accel
integer, parameter, public fftlib_accel
Definition: fft.F90:183

fft_oct_m::fft_all_end
subroutine, public fft_all_end()
delete all plans
Definition: fft.F90:391

fft_oct_m::fft_size
integer function fft_size(size, factors, parity)
Definition: fft.F90:1131

fft_oct_m::zfft_backward_3d
subroutine zfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:2028

fft_oct_m::fft_operation_count
subroutine fft_operation_count(fft)
Definition: fft.F90:1187

fft_oct_m::zfft_backward_accel
subroutine zfft_backward_accel(fft, in, out)
Definition: fft.F90:2142

fft_oct_m::cufft_c2r
integer, parameter cufft_c2r
Definition: fft.F90:265

fft_oct_m::cufft_c2c
integer, parameter cufft_c2c
Definition: fft.F90:265

fft_oct_m::fft_real
integer, parameter, public fft_real
Definition: fft.F90:178

fft_oct_m::fft_get_dims
subroutine, public fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: fft.F90:1080

fft_oct_m::fft_complex
integer, parameter, public fft_complex
Definition: fft.F90:178

fft_oct_m::fftlib_nfft
integer, parameter, public fftlib_nfft
Definition: fft.F90:183

fft_oct_m::dfft_backward_3d
subroutine dfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:1572

fft_oct_m::fft_copy
subroutine, public fft_copy(fft_i, fft_o)
Definition: fft.F90:1061

fft_oct_m::dfft_forward_1d
subroutine dfft_forward_1d(fft, in, out)
Definition: fft.F90:1554

fft_oct_m::cufft_d2z
integer, parameter cufft_d2z
Definition: fft.F90:265

fft_oct_m::fft_null
integer, parameter fft_null
Definition: fft.F90:191

fft_oct_m::fftlib_pnfft
integer, parameter, public fftlib_pnfft
Definition: fft.F90:183

fft_oct_m::zfft_forward_1d
subroutine zfft_forward_1d(fft, in, out)
Definition: fft.F90:2010

fft_oct_m::zfft_forward_3d
subroutine zfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1846

fft_oct_m::fftlib_pfft
integer, parameter, public fftlib_pfft
Definition: fft.F90:183

fft_oct_m::dfft_backward_accel
subroutine dfft_backward_accel(fft, in, out)
Definition: fft.F90:1686

fft_oct_m::fftlib_fftw
integer, parameter, public fftlib_fftw
Definition: fft.F90:183

fft_oct_m::fft_init_stage1
subroutine, public fft_init_stage1(this, namespace, XX, nn)
Some fft-libraries (only NFFT for the moment) need an additional precomputation stage that depends on...
Definition: fft.F90:954

fftw_oct_m
Definition: fftw.F90:165

fftw_oct_m::fftw_prepare_plan
subroutine, public fftw_prepare_plan(plan, dim, n, is_real, sign, flags, din_, cin_, cout_)
Definition: fftw.F90:187

fftw_oct_m::fftw_free_memory
subroutine, public fftw_free_memory(is_real, drs_data, zrs_data, fs_data)
Definition: fftw.F90:344

fftw_oct_m::fftw_get_dims
subroutine, public fftw_get_dims(rs_n, is_real, fs_n)
Definition: fftw.F90:305

fftw_oct_m::fftw_alloc_memory
subroutine, public fftw_alloc_memory(rs_n, is_real, fs_n, drs_data, zrs_data, fs_data)
Definition: fftw.F90:319

fftw_params_oct_m
Definition: fftw.F90:115

global_oct_m
Definition: global.F90:114

global_oct_m::m_two
real(real64), parameter, public m_two
Definition: global.F90:190

global_oct_m::m_huge
real(real64), parameter, public m_huge
Definition: global.F90:206

global_oct_m::m_half
real(real64), parameter, public m_half
Definition: global.F90:194

global_oct_m::m_one
real(real64), parameter, public m_one
Definition: global.F90:189

lalg_basic_oct_m
Definition: lalg_basic.F90:114

lattice_vectors_oct_m
Definition: lattice_vectors.F90:114

loct_math_oct_m
Definition: loct_math.F90:114

messages_oct_m
Definition: messages.F90:115

messages_oct_m::messages_not_implemented
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1113

messages_oct_m::messages_warning
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:537

messages_oct_m::messages_new_line
subroutine, public messages_new_line()
Definition: messages.F90:1134

messages_oct_m::message
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160

messages_oct_m::messages_fatal
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:414

messages_oct_m::messages_input_error
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:713

messages_oct_m::messages_info
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:616

mpi_oct_m
Definition: mpi.F90:114

mpi_oct_m::mpi_comm_undefined
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
Definition: mpi.F90:136

mpi_oct_m::mpi_world
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:270

namespace_oct_m
Definition: namespace.F90:103

nfft_oct_m
Definition: nfft.F90:115

nfft_oct_m::nfft_write_info
subroutine, public nfft_write_info(nfft)
Definition: nfft.F90:323

nfft_oct_m::nfft_end
subroutine, public nfft_end(nfft)
Definition: nfft.F90:386

nfft_oct_m::nfft_init
subroutine, public nfft_init(nfft, nfft_options, N, dim, M, optimize)
Definition: nfft.F90:257

nfft_oct_m::nfft_copy_info
subroutine, public nfft_copy_info(in, out)
Definition: nfft.F90:400

nfft_oct_m::nfft_precompute
subroutine, public nfft_precompute(nfft, X1, X2, X3)
Definition: nfft.F90:429

nfft_oct_m::nfft_guru_options
subroutine, public nfft_guru_options(nfft, namespace)
Definition: nfft.F90:190

parser_oct_m
Definition: parser.F90:114

pfft_oct_m
The low level module to work with the PFFT library. http:
Definition: pfft.F90:164

pfft_oct_m::pfft_prepare_plan_c2c
subroutine, public pfft_prepare_plan_c2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:298

pfft_oct_m::pfft_prepare_plan_c2r
subroutine, public pfft_prepare_plan_c2r(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:263

pfft_oct_m::pfft_decompose
subroutine, public pfft_decompose(n_proc, dim1, dim2)
Decompose all available processors in 2D processor grid, most equally possible.
Definition: pfft.F90:188

pfft_oct_m::pfft_prepare_plan_r2c
subroutine, public pfft_prepare_plan_r2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:229

pfft_oct_m::pfft_get_dims
subroutine, public pfft_get_dims(rs_n_global, mpi_comm, is_real, alloc_size, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: pfft.F90:335

pfft_params_oct_m
The includes for the PFFT.
Definition: pfft.F90:115

pnfft_oct_m
The low level module to work with the PNFFT library. http:
Definition: pnfft.F90:128

pnfft_oct_m::pnfft_copy_params
subroutine, public pnfft_copy_params(in, out)
Definition: pnfft.F90:304

pnfft_oct_m::pnfft_set_sp_nodes
subroutine, public pnfft_set_sp_nodes(pnfft, namespace, X)
Definition: pnfft.F90:495

pnfft_oct_m::pnfft_init_plan
subroutine, public pnfft_init_plan(pnfft, pnfft_options, comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart)
Definition: pnfft.F90:362

pnfft_oct_m::pnfft_write_info
subroutine, public pnfft_write_info(pnfft)
Definition: pnfft.F90:319

pnfft_oct_m::pnfft_guru_options
subroutine, public pnfft_guru_options(pnfft, namespace)
Definition: pnfft.F90:202

pnfft_oct_m::pnfft_end
subroutine, public pnfft_end(pnfft)
Definition: pnfft.F90:471

pnfft_oct_m::pnfft_init_procmesh
subroutine, public pnfft_init_procmesh(pnfft, mpi_grp, comm)
Definition: pnfft.F90:268

profiling_oct_m
Definition: profiling.F90:116

types_oct_m
Definition: types.F90:114

unit_system_oct_m
This module defines the unit system, used for input and output.
Definition: unit_system.F90:126

unit_system_oct_m::unit_megabytes
type(unit_t), public unit_megabytes
For large amounts of data (natural code units are bytes)
Definition: unit_system.F90:174

varinfo_oct_m
Definition: varinfo.F90:114

fft_oct_m::fft_t
Definition: fft.F90:195

lattice_vectors_oct_m::lattice_vectors_t
Definition: lattice_vectors.F90:135

mpi_oct_m::mpi_grp_t
This is defined even when running serial.
Definition: mpi.F90:142

namespace_oct_m::namespace_t
Definition: namespace.F90:115

true
int true(void)
Definition: symmetries_finite.c:3153