26 use,
intrinsic :: iso_c_binding
39 use,
intrinsic :: iso_fortran_env
49#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
84 integer,
public,
parameter :: &
89 integer,
public,
parameter :: &
97 integer,
parameter :: &
105 integer,
public ::
type
106 integer,
public :: library
108 type(MPI_Comm) :: comm
109 integer :: rs_n_global(3)
110 integer :: fs_n_global(3)
113 integer :: rs_istart(1:3)
114 integer :: fs_istart(1:3)
116 integer,
public :: stride_rs(1:3)
117 integer,
public :: stride_fs(1:3)
126 real(real64),
pointer,
public :: drs_data(:,:,:)
127 complex(real64),
pointer,
public :: zrs_data(:,:,:)
128 complex(real64),
pointer,
public :: fs_data(:,:,:)
131 type(clfftPlanHandle) :: cl_plan_fw
132 type(clfftPlanHandle) :: cl_plan_bw
134 type(c_ptr) :: cuda_plan_fw
135 type(c_ptr) :: cuda_plan_bw
137 type(nfft_t),
public :: nfft
139 type(pnfft_t),
public :: pnfft
141 logical,
public :: aligned_memory
160 logical,
save,
public :: fft_initialized = .false.
161 integer,
save :: fft_refs(FFT_MAX)
162 type(fft_t),
save :: fft_array(FFT_MAX)
163 logical :: fft_optimize
164 integer,
save :: fft_prepare_plan
165 integer,
public :: fft_default_lib = -1
167 type(nfft_t),
save :: nfft_options
169 type(pnfft_t),
save :: pnfft_options
171 integer,
parameter :: &
172 CUFFT_R2C = int(z
'2a'), &
186 integer :: ii, fft_default
187#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
193 fft_initialized = .
true.
249 call parse_variable(namespace,
'FFTPreparePlan', fftw_estimate, fft_prepare_plan)
272 call parse_variable(namespace,
'FFTLibrary', fft_default, fft_default_lib)
275#if ! (defined(HAVE_CLFFT) || defined(HAVE_CUDA))
276 call messages_write(
'You have selected the Accelerated FFT, but Octopus was compiled', new_line = .
true.)
281 call messages_write(
'You have selected the accelerated FFT, but acceleration is disabled.')
286#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
287 if (omp_get_max_threads() > 1)
then
292 iret = fftw_init_threads()
297 call fftw_plan_with_nthreads(omp_get_max_threads())
327#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
328 call fftw_cleanup_threads()
333 fft_initialized = .false.
339 subroutine fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
340 type(
fft_t),
intent(inout) :: this
341 integer,
intent(inout) :: nn(3)
342 integer,
intent(in) :: dim
343 integer,
intent(in) ::
type
344 integer,
intent(in) :: library
346 integer,
intent(in) :: optimize_parity(3)
348 type(mpi_comm),
optional,
intent(out) :: comm
349 type(
mpi_grp_t),
optional,
intent(in) :: mpi_grp
350 logical,
optional :: use_aligned
352 integer :: ii, jj, fft_dim, idir, column_size, row_size, n3
353 integer :: n_1, n_2, n_3, nn_temp(3)
356 integer(int64) :: number_points, alloc_size
359 real(real64) :: scale
368 assert(fft_initialized)
373 if (
present(mpi_grp)) mpi_grp_ = mpi_grp
380 if (nn(ii) <= 1)
exit
381 fft_dim = fft_dim + 1
384 if (fft_dim == 0)
then
385 message(1) =
"Internal error in fft_init: apparently, a 1x1x1 FFT is required."
392 nn_temp(1:fft_dim) = nn(1:fft_dim)
394 select case (library_)
397 if(any(optimize_parity(1:fft_dim) > 1))
then
398 message(1) =
"Internal error in fft_init: optimize_parity must be negative, 0, or 1."
403 nn_temp(ii) =
fft_size(nn(ii), (/2, 3, 5, 7/), optimize_parity(ii))
404 if (fft_optimize .and.
optimize(ii)) nn(ii) = nn_temp(ii)
408 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim)))
then
410 call messages_write(
'Invalid grid size for accel fft. FFTW will be used instead.')
421 if (int(nn(ii)/2)*2 /= nn(ii) .and. (fft_optimize .and.
optimize(ii)))&
429 if (int(nn(ii)/2)*2 /= nn(ii)) nn(ii) = nn(ii) + 1
432 if (fft_dim < 3)
then
438 if (fft_dim < 3 .and. library_ ==
fftlib_pfft)
then
443 if (any(optimize_parity(1:fft_dim) > 1))
then
444 message(1) =
"Internal error in fft_init: optimize_parity must be negative, 0, or 1."
450 if (fft_optimize .and.
optimize(ii)) nn(ii) = nn_temp(ii)
457 do ii = fft_max, 1, -1
459 if (all(nn(1:dim) == fft_array(ii)%rs_n_global(1:dim)) .and.
type == fft_array(ii)%type &
460 .and. library_ == fft_array(ii)%library .and. library_ /=
fftlib_nfft &
462 .and. this%aligned_memory .eqv. fft_array(ii)%aligned_memory)
then
467 fft_refs(ii) = fft_refs(ii) + 1
468 if (
present(comm)) comm = fft_array(ii)%comm
478 message(1) =
"Not enough slots for FFTs."
479 message(2) =
"Please increase FFT_MAX in fft.F90 and recompile."
485 fft_array(jj)%slot = jj
486 fft_array(jj)%type =
type
487 fft_array(jj)%library = library_
488 fft_array(jj)%rs_n_global(1:dim) = nn(1:dim)
489 fft_array(jj)%rs_n_global(dim+1:) = 1
490 nullify(fft_array(jj)%drs_data)
491 nullify(fft_array(jj)%zrs_data)
492 nullify(fft_array(jj)%fs_data)
494 fft_array(jj)%aligned_memory = this%aligned_memory
497 select case (library_)
504 ierror = pfft_create_procmesh_2d(mpi_grp_%comm%MPI_VAL, column_size, row_size, fft_array(jj)%comm%MPI_VAL)
506 if (ierror /= 0)
then
507 message(1) =
"The number of rows and columns in PFFT processor grid is not equal to "
508 message(2) =
"the number of processor in the MPI communicator."
509 message(3) =
"Please check it."
523 if (
present(comm)) comm = fft_array(jj)%comm
526 select case (library_)
529 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
530 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
531 fft_array(jj)%rs_istart = 1
532 fft_array(jj)%fs_istart = 1
534 if (this%aligned_memory)
then
536 fft_array(jj)%drs_data, fft_array(jj)%zrs_data, fft_array(jj)%fs_data)
542 alloc_size, fft_array(jj)%fs_n_global, fft_array(jj)%rs_n, &
543 fft_array(jj)%fs_n, fft_array(jj)%rs_istart, fft_array(jj)%fs_istart)
556 n_1 = max(1, fft_array(jj)%rs_n(1))
557 n_2 = max(1, fft_array(jj)%rs_n(2))
558 n_3 = max(1, fft_array(jj)%rs_n(3))
560 n3 = ceiling(real(2*alloc_size)/real(n_1*n_2))
561 safe_allocate(fft_array(jj)%drs_data(1:n_1, 1:n_2, 1:n3))
563 n3 = ceiling(real(alloc_size)/real(fft_array(jj)%rs_n(1)*fft_array(jj)%rs_n(2)))
564 safe_allocate(fft_array(jj)%zrs_data(1:fft_array(jj)%rs_n(1), 1:fft_array(jj)%rs_n(2), 1:n3))
567 n_1 = max(1, fft_array(jj)%fs_n(1))
568 n_2 = max(1, fft_array(jj)%fs_n(2))
569 n_3 = max(1, fft_array(jj)%fs_n(3))
571 n3 = ceiling(real(alloc_size)/real(n_3*n_1))
572 safe_allocate(fft_array(jj)%fs_data(1:n_3, 1:n_1, 1:n3))
576 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
577 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
578 fft_array(jj)%rs_istart = 1
579 fft_array(jj)%fs_istart = 1
582 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
583 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
584 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
585 fft_array(jj)%rs_istart = 1
586 fft_array(jj)%fs_istart = 1
589 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
590 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
591 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
592 fft_array(jj)%rs_istart = 1
593 fft_array(jj)%fs_istart = 1
600 select case (library_)
602 if (.not. this%aligned_memory)
then
603 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
604 type ==
fft_real, fftw_forward, fft_prepare_plan+fftw_unaligned)
605 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
606 type ==
fft_real, fftw_backward, fft_prepare_plan+fftw_unaligned)
609 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
610 type ==
fft_real, fftw_forward, fft_prepare_plan, &
611 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
612 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
613 type ==
fft_real, fftw_backward, fft_prepare_plan, &
614 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
616 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
617 type ==
fft_real, fftw_forward, fft_prepare_plan, &
618 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
619 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
620 type ==
fft_real, fftw_backward, fft_prepare_plan, &
621 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
628 call nfft_init(fft_array(jj)%nfft, nfft_options, fft_array(jj)%rs_n_global, &
635 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
637 fft_array(jj)%drs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
640 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
642 fft_array(jj)%zrs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
664 call pnfft_init_plan(fft_array(jj)%pnfft, pnfft_options, comm, fft_array(jj)%fs_n_global, &
665 fft_array(jj)%fs_n, fft_array(jj)%fs_istart, fft_array(jj)%rs_n, fft_array(jj)%rs_istart)
669 fft_array(jj)%stride_rs(1) = 1
670 fft_array(jj)%stride_fs(1) = 1
672 fft_array(jj)%stride_rs(ii) = fft_array(jj)%stride_rs(ii - 1)*fft_array(jj)%rs_n(ii - 1)
673 fft_array(jj)%stride_fs(ii) = fft_array(jj)%stride_fs(ii - 1)*fft_array(jj)%fs_n(ii - 1)
678 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
679 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_d2z, &
681 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
682 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_z2d, &
685 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
686 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_z2z, &
688 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
689 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_z2z, &
697 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_fw,
accel%context%cl_context, &
698 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
699 if (status /= clfft_success)
call clfft_print_error(status,
'clfftCreateDefaultPlan')
701 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_bw,
accel%context%cl_context, &
702 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
703 if (status /= clfft_success)
call clfft_print_error(status,
'clfftCreateDefaultPlan')
707 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
708 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
710 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
711 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
715 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_fw, 1_real64, status)
716 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanBatchSize')
718 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_bw, 1_real64, status)
719 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanBatchSize')
723 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
724 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
726 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
727 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
734 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_real, clfft_hermitian_interleaved, status)
737 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_hermitian_interleaved, clfft_real, status)
742 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_complex_interleaved, clfft_complex_interleaved, status)
745 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_complex_interleaved, clfft_complex_interleaved, status)
752 call clfftsetresultlocation(fft_array(jj)%cl_plan_fw, clfft_outofplace, status)
753 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetResultLocation')
755 call clfftsetresultlocation(fft_array(jj)%cl_plan_bw, clfft_outofplace, status)
756 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetResultLocation')
760 call clfftsetplaninstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
761 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanInStride')
763 call clfftsetplanoutstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
764 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanOutStride')
766 call clfftsetplaninstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
767 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanInStride')
769 call clfftsetplanoutstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
770 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanOutStride')
774 scale = 1.0_real64/(product(real(fft_array(jj)%rs_n_global(1:fft_dim), real64)))
776 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_forward, 1.0_real64, status)
779 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_backward, scale, status)
784 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, 1.0_real64, status)
787 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, scale, status)
792 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, scale, status)
795 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, 1.0_real64, status)
802 call clfftbakeplan(fft_array(jj)%cl_plan_fw,
accel%command_queue, status)
805 call clfftbakeplan(fft_array(jj)%cl_plan_bw,
accel%command_queue, status)
825 number_points = number_points * fft_array(jj)%rs_n_global(idir)
834 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim)))
then
836 call messages_write(
' Inefficient FFT grid. A better grid would be: ')
844 select case (library_)
846 write(
message(1),
'(a)')
"Info: FFT library = PFFT"
847 write(
message(2),
'(a)')
"Info: PFFT processor grid"
848 write(
message(3),
'(a, i9)')
" No. of processors = ", mpi_grp_%size
849 write(
message(4),
'(a, i9)')
" No. of columns in the proc. grid = ", column_size
850 write(
message(5),
'(a, i9)')
" No. of rows in the proc. grid = ", row_size
851 write(
message(6),
'(a, i9)')
" The size of integer is = ", c_intptr_t
876 type(
fft_t),
intent(inout) :: this
880 real(real64),
intent(in) :: xx(:,:)
881 integer,
optional,
intent(in) :: nn(:)
887 assert(
size(xx,2) == 3)
890 select case (fft_array(slot)%library)
897 xx(1:nn(1),1), xx(1:nn(2),2), xx(1:nn(3),3))
918 type(
fft_t),
intent(inout) :: this
929 message(1) =
"Trying to deallocate FFT that has not been allocated."
932 if (fft_refs(ii) > 1)
then
933 fft_refs(ii) = fft_refs(ii) - 1
935 select case (fft_array(ii)%library)
937 call fftw_destroy_plan(fft_array(ii)%planf)
938 call fftw_destroy_plan(fft_array(ii)%planb)
940 if (this%aligned_memory)
then
942 fft_array(ii)%drs_data, fft_array(ii)%zrs_data, fft_array(ii)%fs_data)
947 call pfft_destroy_plan(fft_array(ii)%planf)
948 call pfft_destroy_plan(fft_array(ii)%planb)
950 safe_deallocate_p(fft_array(ii)%drs_data)
951 safe_deallocate_p(fft_array(ii)%zrs_data)
952 safe_deallocate_p(fft_array(ii)%fs_data)
956 call cuda_fft_destroy(fft_array(ii)%cuda_plan_fw)
957 call cuda_fft_destroy(fft_array(ii)%cuda_plan_bw)
960 call clfftdestroyplan(fft_array(ii)%cl_plan_fw, status)
961 call clfftdestroyplan(fft_array(ii)%cl_plan_bw, status)
983 type(
fft_t),
intent(in) :: fft_i
984 type(
fft_t),
intent(inout) :: fft_o
988 if (fft_o%slot > 0)
then
991 assert(fft_i%slot >= 1.and.fft_i%slot <= fft_max)
992 assert(fft_refs(fft_i%slot) > 0)
995 fft_refs(fft_i%slot) = fft_refs(fft_i%slot) + 1
1001 subroutine fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
1002 type(
fft_t),
intent(in) :: fft
1003 integer,
intent(out) :: rs_n_global(1:3)
1004 integer,
intent(out) :: fs_n_global(1:3)
1005 integer,
intent(out) :: rs_n(1:3)
1006 integer,
intent(out) :: fs_n(1:3)
1007 integer,
intent(out) :: rs_istart(1:3)
1008 integer,
intent(out) :: fs_istart(1:3)
1015 rs_n_global(1:3) = fft_array(slot)%rs_n_global(1:3)
1016 fs_n_global(1:3) = fft_array(slot)%fs_n_global(1:3)
1017 rs_n(1:3) = fft_array(slot)%rs_n(1:3)
1018 fs_n(1:3) = fft_array(slot)%fs_n(1:3)
1019 rs_istart(1:3) = fft_array(slot)%rs_istart(1:3)
1020 fs_istart(1:3) = fft_array(slot)%fs_istart(1:3)
1027 pure function pad_feq(ii, nn, mode)
1028 integer,
intent(in) :: ii,nn
1029 logical,
intent(in) ::
mode
1035 if (ii <= nn/2 + 1)
then
1052 integer function fft_size(size, factors, parity)
1053 integer,
intent(in) :: size
1054 integer,
intent(in) :: factors(:)
1055 integer,
intent(in) :: parity
1059 integer,
allocatable :: exponents(:)
1063 nfactors = ubound(factors, dim = 1)
1065 safe_allocate(exponents(1:nfactors))
1070 if (nondiv == 1 .and. mod(
fft_size, 2) == parity)
exit
1074 safe_deallocate_a(exponents)
1081 subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
1082 integer,
intent(in) :: num
1083 integer,
intent(in) :: nfactors
1084 integer,
intent(in) :: factors(:)
1085 integer,
intent(out) :: exponents(:)
1086 integer,
intent(out) :: nondiv
1093 do ifactor = 1, nfactors
1094 exponents(ifactor) = 0
1096 if (mod(nondiv, factors(ifactor)) /= 0)
exit
1097 nondiv = nondiv/factors(ifactor)
1098 exponents(ifactor) = exponents(ifactor) + 1
1109 type(
fft_t),
intent(in) :: fft
1111 real(real64) :: fullsize
1115 fullsize = product(real(fft%fs_n(1:3), real64))
1122 subroutine fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
1123 integer,
intent(in) :: gg_in(:)
1124 real(real64),
intent(in) :: temp(:)
1125 integer,
intent(in) :: periodic_dim
1127 real(real64),
intent(in) :: qq(:)
1128 real(real64),
intent(inout) :: gg(:)
1129 real(real64),
intent(out) :: modg2
1133 gg(1:3) = gg_in(1:3)
1134 gg(1:periodic_dim) = gg(1:periodic_dim) + qq(1:periodic_dim)
1135 gg(1:3) = gg(1:3) * temp(1:3)
1136 gg(1:3) = matmul(latt%klattice_primitive(1:3,1:3),gg(1:3))
1137 modg2 = sum(gg(1:3)**2)
1146 type(
fft_t),
intent(in) :: fft
1149 scaling_factor =
m_one
1151 select case (fft_array(fft%slot)%library)
1154 scaling_factor =
m_one/real(fft_array(fft%slot)%rs_n_global(1), real64)
1155 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(2), real64)
1156 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(3), real64)
1167 real(real64) function fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq) result(ecut)
1168 integer,
intent(in) :: box_dim(:)
1169 integer,
intent(in) :: fs_istart(:)
1171 real(real64),
intent(in) :: gspacing(:)
1172 integer,
intent(in) :: periodic_dim
1173 real(real64),
intent(in) :: qq(:)
1175 integer :: lx, ix, iy, iz, idir, idir2, idir3
1176 real(real64) :: dminsq, gg(3), modg2
1177 integer :: box_dim_(3), ixx(3)
1178 integer :: ming(3), maxg(3)
1182 assert(periodic_dim > 0)
1184 box_dim_(1:periodic_dim) = box_dim(1:periodic_dim)
1185 if (periodic_dim < 3) box_dim_(periodic_dim+1:3) = 1
1190 do idir = 1, periodic_dim
1191 do lx = 1, box_dim(idir)
1192 ix = fs_istart(idir) + lx - 1
1194 ming(idir) = min(ming(idir), ixx(idir))
1195 maxg(idir) = max(maxg(idir), ixx(idir))
1197 maxg(idir) = min(abs(ming(idir)), maxg(idir))
1202 do idir = 1, periodic_dim
1203 idir2 = mod(idir, 3)+1
1204 idir3 = mod(idir+1, 3)+1
1207 ixx(idir) = -maxg(idir)
1208 do iy = -maxg(idir2), maxg(idir2)
1210 do iz = -maxg(idir3), maxg(idir3)
1213 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1217 ixx(idir) = maxg(idir)
1218 do iy = -maxg(idir2), maxg(idir2)
1220 do iz = -maxg(idir3), maxg(idir3)
1223 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1234#include "fft_inc.F90"
1237#include "complex.F90"
1238#include "fft_inc.F90"
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
double log(double __x) __attribute__((__nothrow__
subroutine, public clfft_print_error(ierr, name)
pure logical function, public accel_is_enabled()
type(accel_t), public accel
Fast Fourier Transform module. This module provides a single interface that works with different FFT ...
subroutine zfft_forward_accel(fft, in, out)
subroutine dfft_backward_1d(fft, in, out)
integer, parameter cufft_z2d
subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
subroutine, public fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
subroutine, public fft_all_init(namespace)
initialize the table
real(real64) function, public fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq)
Given an fft box (fixed by the real-space grid), it returns the cutoff energy of the sphere that fits...
subroutine dfft_forward_3d(fft, in, out, norm)
subroutine dfft_forward_accel(fft, in, out)
subroutine, public fft_end(this)
subroutine, public fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
real(real64) pure function, public fft_scaling_factor(fft)
This function returns the factor required to normalize a function after a forward and backward transf...
integer, parameter cufft_z2z
pure integer function, public pad_feq(ii, nn, mode)
convert between array index and G-vector
subroutine zfft_backward_1d(fft, in, out)
integer, parameter, public fftlib_accel
subroutine, public fft_all_end()
delete all plans
integer function fft_size(size, factors, parity)
subroutine zfft_backward_3d(fft, in, out, norm)
subroutine fft_operation_count(fft)
subroutine zfft_backward_accel(fft, in, out)
integer, parameter cufft_c2r
integer, parameter cufft_c2c
integer, parameter, public fft_real
subroutine, public fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
integer, parameter, public fft_complex
integer, parameter, public fftlib_nfft
subroutine dfft_backward_3d(fft, in, out, norm)
subroutine, public fft_copy(fft_i, fft_o)
subroutine dfft_forward_1d(fft, in, out)
integer, parameter cufft_d2z
integer, parameter fft_null
integer, parameter, public fftlib_pnfft
subroutine zfft_forward_1d(fft, in, out)
subroutine zfft_forward_3d(fft, in, out, norm)
integer, parameter, public fftlib_pfft
subroutine dfft_backward_accel(fft, in, out)
integer, parameter, public fftlib_fftw
subroutine, public fft_init_stage1(this, namespace, XX, nn)
Some fft-libraries (only NFFT for the moment) need an additional precomputation stage that depends on...
subroutine, public fftw_prepare_plan(plan, dim, n, is_real, sign, flags, din_, cin_, cout_)
subroutine, public fftw_free_memory(is_real, drs_data, zrs_data, fs_data)
subroutine, public fftw_get_dims(rs_n, is_real, fs_n)
subroutine, public fftw_alloc_memory(rs_n, is_real, fs_n, drs_data, zrs_data, fs_data)
real(real64), parameter, public m_two
real(real64), parameter, public m_huge
real(real64), parameter, public m_half
real(real64), parameter, public m_one
subroutine, public messages_not_implemented(feature, namespace)
subroutine, public messages_warning(no_lines, all_nodes, namespace)
subroutine, public messages_new_line()
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
type(mpi_grp_t), public mpi_world
subroutine, public nfft_write_info(nfft)
subroutine, public nfft_end(nfft)
subroutine, public nfft_init(nfft, nfft_options, N, dim, M, optimize)
subroutine, public nfft_copy_info(in, out)
subroutine, public nfft_precompute(nfft, X1, X2, X3)
subroutine, public nfft_guru_options(nfft, namespace)
The low level module to work with the PFFT library. http:
subroutine, public pfft_prepare_plan_c2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
subroutine, public pfft_prepare_plan_c2r(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
subroutine, public pfft_decompose(n_proc, dim1, dim2)
Decompose all available processors in 2D processor grid, most equally possible.
subroutine, public pfft_prepare_plan_r2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
subroutine, public pfft_get_dims(rs_n_global, mpi_comm, is_real, alloc_size, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
The includes for the PFFT.
The low level module to work with the PNFFT library. http:
subroutine, public pnfft_copy_params(in, out)
subroutine, public pnfft_set_sp_nodes(pnfft, namespace, X)
subroutine, public pnfft_init_plan(pnfft, pnfft_options, comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart)
subroutine, public pnfft_write_info(pnfft)
subroutine, public pnfft_guru_options(pnfft, namespace)
subroutine, public pnfft_end(pnfft)
subroutine, public pnfft_init_procmesh(pnfft, mpi_grp, comm)
This module defines the unit system, used for input and output.
type(unit_t), public unit_megabytes
For large amounts of data (natural code units are bytes)
This is defined even when running serial.