26 use,
intrinsic :: iso_c_binding
27 use,
intrinsic :: ieee_arithmetic
40 use,
intrinsic :: iso_fortran_env
50#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
85 integer,
public,
parameter :: &
90 integer,
public,
parameter :: &
98 integer,
parameter :: &
106 integer,
public ::
type
107 integer,
public :: library
109 type(MPI_Comm) :: comm
110 integer :: rs_n_global(3)
111 integer :: fs_n_global(3)
114 integer :: rs_istart(1:3)
115 integer :: fs_istart(1:3)
117 integer,
public :: stride_rs(1:3)
118 integer,
public :: stride_fs(1:3)
127 real(real64),
pointer,
public :: drs_data(:,:,:)
128 complex(real64),
pointer,
public :: zrs_data(:,:,:)
129 complex(real64),
pointer,
public :: fs_data(:,:,:)
132 type(clfftPlanHandle) :: cl_plan_fw
133 type(clfftPlanHandle) :: cl_plan_bw
135 type(c_ptr) :: cuda_plan_fw
136 type(c_ptr) :: cuda_plan_bw
138 type(nfft_t),
public :: nfft
140 type(pnfft_t),
public :: pnfft
142 logical,
public :: aligned_memory
161 logical,
save,
public :: fft_initialized = .false.
162 integer,
save :: fft_refs(FFT_MAX)
163 type(fft_t),
save :: fft_array(FFT_MAX)
164 logical :: fft_optimize
165 integer,
save :: fft_prepare_plan
166 integer,
public :: fft_default_lib = -1
168 type(nfft_t),
save :: nfft_options
170 type(pnfft_t),
save :: pnfft_options
172 integer,
parameter :: &
173 CUFFT_R2C = int(z
'2a'), &
187 integer :: ii, fft_default
188#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
194 fft_initialized = .
true.
250 call parse_variable(namespace,
'FFTPreparePlan', fftw_estimate, fft_prepare_plan)
273 call parse_variable(namespace,
'FFTLibrary', fft_default, fft_default_lib)
276#if ! (defined(HAVE_CLFFT) || defined(HAVE_CUDA))
277 call messages_write(
'You have selected the Accelerated FFT, but Octopus was compiled', new_line = .
true.)
282 call messages_write(
'You have selected the accelerated FFT, but acceleration is disabled.')
287#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
288 if (omp_get_max_threads() > 1)
then
293 iret = fftw_init_threads()
298 call fftw_plan_with_nthreads(omp_get_max_threads())
328#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
329 call fftw_cleanup_threads()
334 fft_initialized = .false.
340 subroutine fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
341 type(
fft_t),
intent(inout) :: this
342 integer,
intent(inout) :: nn(3)
343 integer,
intent(in) :: dim
344 integer,
intent(in) ::
type
345 integer,
intent(in) :: library
347 integer,
intent(in) :: optimize_parity(3)
349 type(mpi_comm),
optional,
intent(out) :: comm
350 type(
mpi_grp_t),
optional,
intent(in) :: mpi_grp
351 logical,
optional :: use_aligned
353 integer :: ii, jj, fft_dim, idir, column_size, row_size, n3
354 integer :: n_1, n_2, n_3, nn_temp(3)
357 integer(int64) :: number_points, alloc_size
360 real(real64) :: scale
369 assert(fft_initialized)
374 if (
present(mpi_grp)) mpi_grp_ = mpi_grp
381 if (nn(ii) <= 1)
exit
382 fft_dim = fft_dim + 1
385 if (fft_dim == 0)
then
386 message(1) =
"Internal error in fft_init: apparently, a 1x1x1 FFT is required."
393 nn_temp(1:fft_dim) = nn(1:fft_dim)
395 select case (library_)
398 if(any(optimize_parity(1:fft_dim) > 1))
then
399 message(1) =
"Internal error in fft_init: optimize_parity must be negative, 0, or 1."
404 nn_temp(ii) =
fft_size(nn(ii), (/2, 3, 5, 7/), optimize_parity(ii))
405 if (fft_optimize .and.
optimize(ii)) nn(ii) = nn_temp(ii)
409 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim)))
then
411 call messages_write(
'Invalid grid size for accel fft. FFTW will be used instead.')
422 if (int(nn(ii)/2)*2 /= nn(ii) .and. (fft_optimize .and.
optimize(ii)))&
430 if (int(nn(ii)/2)*2 /= nn(ii)) nn(ii) = nn(ii) + 1
433 if (fft_dim < 3)
then
439 if (fft_dim < 3 .and. library_ ==
fftlib_pfft)
then
444 if (any(optimize_parity(1:fft_dim) > 1))
then
445 message(1) =
"Internal error in fft_init: optimize_parity must be negative, 0, or 1."
451 if (fft_optimize .and.
optimize(ii)) nn(ii) = nn_temp(ii)
458 do ii = fft_max, 1, -1
460 if (all(nn(1:dim) == fft_array(ii)%rs_n_global(1:dim)) .and.
type == fft_array(ii)%type &
461 .and. library_ == fft_array(ii)%library .and. library_ /=
fftlib_nfft &
463 .and. this%aligned_memory .eqv. fft_array(ii)%aligned_memory)
then
468 fft_refs(ii) = fft_refs(ii) + 1
469 if (
present(comm)) comm = fft_array(ii)%comm
479 message(1) =
"Not enough slots for FFTs."
480 message(2) =
"Please increase FFT_MAX in fft.F90 and recompile."
486 fft_array(jj)%slot = jj
487 fft_array(jj)%type =
type
488 fft_array(jj)%library = library_
489 fft_array(jj)%rs_n_global(1:dim) = nn(1:dim)
490 fft_array(jj)%rs_n_global(dim+1:) = 1
491 nullify(fft_array(jj)%drs_data)
492 nullify(fft_array(jj)%zrs_data)
493 nullify(fft_array(jj)%fs_data)
495 fft_array(jj)%aligned_memory = this%aligned_memory
498 select case (library_)
505 ierror = pfft_create_procmesh_2d(mpi_grp_%comm%MPI_VAL, column_size, row_size, fft_array(jj)%comm%MPI_VAL)
507 if (ierror /= 0)
then
508 message(1) =
"The number of rows and columns in PFFT processor grid is not equal to "
509 message(2) =
"the number of processor in the MPI communicator."
510 message(3) =
"Please check it."
524 if (
present(comm)) comm = fft_array(jj)%comm
527 select case (library_)
530 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
531 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
532 fft_array(jj)%rs_istart = 1
533 fft_array(jj)%fs_istart = 1
535 if (this%aligned_memory)
then
537 fft_array(jj)%drs_data, fft_array(jj)%zrs_data, fft_array(jj)%fs_data)
543 alloc_size, fft_array(jj)%fs_n_global, fft_array(jj)%rs_n, &
544 fft_array(jj)%fs_n, fft_array(jj)%rs_istart, fft_array(jj)%fs_istart)
557 n_1 = max(1, fft_array(jj)%rs_n(1))
558 n_2 = max(1, fft_array(jj)%rs_n(2))
559 n_3 = max(1, fft_array(jj)%rs_n(3))
561 n3 = ceiling(real(2*alloc_size)/real(n_1*n_2))
562 safe_allocate(fft_array(jj)%drs_data(1:n_1, 1:n_2, 1:n3))
564 n3 = ceiling(real(alloc_size)/real(fft_array(jj)%rs_n(1)*fft_array(jj)%rs_n(2)))
565 safe_allocate(fft_array(jj)%zrs_data(1:fft_array(jj)%rs_n(1), 1:fft_array(jj)%rs_n(2), 1:n3))
568 n_1 = max(1, fft_array(jj)%fs_n(1))
569 n_2 = max(1, fft_array(jj)%fs_n(2))
570 n_3 = max(1, fft_array(jj)%fs_n(3))
572 n3 = ceiling(real(alloc_size)/real(n_3*n_1))
573 safe_allocate(fft_array(jj)%fs_data(1:n_3, 1:n_1, 1:n3))
577 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
578 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
579 fft_array(jj)%rs_istart = 1
580 fft_array(jj)%fs_istart = 1
583 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
584 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
585 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
586 fft_array(jj)%rs_istart = 1
587 fft_array(jj)%fs_istart = 1
590 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
591 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
592 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
593 fft_array(jj)%rs_istart = 1
594 fft_array(jj)%fs_istart = 1
601 select case (library_)
603 if (.not. this%aligned_memory)
then
604 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
605 type ==
fft_real, fftw_forward, fft_prepare_plan+fftw_unaligned)
606 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
607 type ==
fft_real, fftw_backward, fft_prepare_plan+fftw_unaligned)
610 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
611 type ==
fft_real, fftw_forward, fft_prepare_plan, &
612 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
613 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
614 type ==
fft_real, fftw_backward, fft_prepare_plan, &
615 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
617 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
618 type ==
fft_real, fftw_forward, fft_prepare_plan, &
619 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
620 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
621 type ==
fft_real, fftw_backward, fft_prepare_plan, &
622 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
629 call nfft_init(fft_array(jj)%nfft, nfft_options, fft_array(jj)%rs_n_global, &
636 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
638 fft_array(jj)%drs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
641 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
643 fft_array(jj)%zrs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
665 call pnfft_init_plan(fft_array(jj)%pnfft, pnfft_options, comm, fft_array(jj)%fs_n_global, &
666 fft_array(jj)%fs_n, fft_array(jj)%fs_istart, fft_array(jj)%rs_n, fft_array(jj)%rs_istart)
670 fft_array(jj)%stride_rs(1) = 1
671 fft_array(jj)%stride_fs(1) = 1
673 fft_array(jj)%stride_rs(ii) = fft_array(jj)%stride_rs(ii - 1)*fft_array(jj)%rs_n(ii - 1)
674 fft_array(jj)%stride_fs(ii) = fft_array(jj)%stride_fs(ii - 1)*fft_array(jj)%fs_n(ii - 1)
679 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
680 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_d2z, &
682 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
683 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_z2d, &
686 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
687 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_z2z, &
689 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
690 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1),
cufft_z2z, &
698 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_fw,
accel%context%cl_context, &
699 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
700 if (status /= clfft_success)
call clfft_print_error(status,
'clfftCreateDefaultPlan')
702 call clfftcreatedefaultplan(fft_array(jj)%cl_plan_bw,
accel%context%cl_context, &
703 fft_dim, int(fft_array(jj)%rs_n_global, int64), status)
704 if (status /= clfft_success)
call clfft_print_error(status,
'clfftCreateDefaultPlan')
708 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
709 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
711 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
712 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
716 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_fw, 1_real64, status)
717 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanBatchSize')
719 call clfftsetplanbatchsize(fft_array(jj)%cl_plan_bw, 1_real64, status)
720 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanBatchSize')
724 call clfftsetplanprecision(fft_array(jj)%cl_plan_fw, clfft_double, status)
725 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
727 call clfftsetplanprecision(fft_array(jj)%cl_plan_bw, clfft_double, status)
728 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanPrecision')
735 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_real, clfft_hermitian_interleaved, status)
738 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_hermitian_interleaved, clfft_real, status)
743 call clfftsetlayout(fft_array(jj)%cl_plan_fw, clfft_complex_interleaved, clfft_complex_interleaved, status)
746 call clfftsetlayout(fft_array(jj)%cl_plan_bw, clfft_complex_interleaved, clfft_complex_interleaved, status)
753 call clfftsetresultlocation(fft_array(jj)%cl_plan_fw, clfft_outofplace, status)
754 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetResultLocation')
756 call clfftsetresultlocation(fft_array(jj)%cl_plan_bw, clfft_outofplace, status)
757 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetResultLocation')
761 call clfftsetplaninstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
762 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanInStride')
764 call clfftsetplanoutstride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
765 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanOutStride')
767 call clfftsetplaninstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_fs, int64), status)
768 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanInStride')
770 call clfftsetplanoutstride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_rs, int64), status)
771 if (status /= clfft_success)
call clfft_print_error(status,
'clfftSetPlanOutStride')
775 scale = 1.0_real64/(product(real(fft_array(jj)%rs_n_global(1:fft_dim), real64)))
777 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_forward, 1.0_real64, status)
780 call clfftsetplanscale(fft_array(jj)%cl_plan_fw, clfft_backward, scale, status)
785 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, 1.0_real64, status)
788 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, scale, status)
793 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_forward, scale, status)
796 call clfftsetplanscale(fft_array(jj)%cl_plan_bw, clfft_backward, 1.0_real64, status)
803 call clfftbakeplan(fft_array(jj)%cl_plan_fw,
accel%command_queue, status)
806 call clfftbakeplan(fft_array(jj)%cl_plan_bw,
accel%command_queue, status)
826 number_points = number_points * fft_array(jj)%rs_n_global(idir)
835 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim)))
then
837 call messages_write(
' Inefficient FFT grid. A better grid would be: ')
845 select case (library_)
847 write(
message(1),
'(a)')
"Info: FFT library = PFFT"
848 write(
message(2),
'(a)')
"Info: PFFT processor grid"
849 write(
message(3),
'(a, i9)')
" No. of processors = ", mpi_grp_%size
850 write(
message(4),
'(a, i9)')
" No. of columns in the proc. grid = ", column_size
851 write(
message(5),
'(a, i9)')
" No. of rows in the proc. grid = ", row_size
852 write(
message(6),
'(a, i9)')
" The size of integer is = ", c_intptr_t
877 type(
fft_t),
intent(inout) :: this
881 real(real64),
intent(in) :: xx(:,:)
882 integer,
optional,
intent(in) :: nn(:)
888 assert(
size(xx,2) == 3)
891 select case (fft_array(slot)%library)
898 xx(1:nn(1),1), xx(1:nn(2),2), xx(1:nn(3),3))
919 type(
fft_t),
intent(inout) :: this
930 message(1) =
"Trying to deallocate FFT that has not been allocated."
933 if (fft_refs(ii) > 1)
then
934 fft_refs(ii) = fft_refs(ii) - 1
936 select case (fft_array(ii)%library)
938 call fftw_destroy_plan(fft_array(ii)%planf)
939 call fftw_destroy_plan(fft_array(ii)%planb)
941 if (this%aligned_memory)
then
943 fft_array(ii)%drs_data, fft_array(ii)%zrs_data, fft_array(ii)%fs_data)
948 call pfft_destroy_plan(fft_array(ii)%planf)
949 call pfft_destroy_plan(fft_array(ii)%planb)
951 safe_deallocate_p(fft_array(ii)%drs_data)
952 safe_deallocate_p(fft_array(ii)%zrs_data)
953 safe_deallocate_p(fft_array(ii)%fs_data)
957 call cuda_fft_destroy(fft_array(ii)%cuda_plan_fw)
958 call cuda_fft_destroy(fft_array(ii)%cuda_plan_bw)
961 call clfftdestroyplan(fft_array(ii)%cl_plan_fw, status)
962 call clfftdestroyplan(fft_array(ii)%cl_plan_bw, status)
984 type(
fft_t),
intent(in) :: fft_i
985 type(
fft_t),
intent(inout) :: fft_o
989 if (fft_o%slot > 0)
then
992 assert(fft_i%slot >= 1.and.fft_i%slot <= fft_max)
993 assert(fft_refs(fft_i%slot) > 0)
996 fft_refs(fft_i%slot) = fft_refs(fft_i%slot) + 1
1002 subroutine fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
1003 type(
fft_t),
intent(in) :: fft
1004 integer,
intent(out) :: rs_n_global(1:3)
1005 integer,
intent(out) :: fs_n_global(1:3)
1006 integer,
intent(out) :: rs_n(1:3)
1007 integer,
intent(out) :: fs_n(1:3)
1008 integer,
intent(out) :: rs_istart(1:3)
1009 integer,
intent(out) :: fs_istart(1:3)
1016 rs_n_global(1:3) = fft_array(slot)%rs_n_global(1:3)
1017 fs_n_global(1:3) = fft_array(slot)%fs_n_global(1:3)
1018 rs_n(1:3) = fft_array(slot)%rs_n(1:3)
1019 fs_n(1:3) = fft_array(slot)%fs_n(1:3)
1020 rs_istart(1:3) = fft_array(slot)%rs_istart(1:3)
1021 fs_istart(1:3) = fft_array(slot)%fs_istart(1:3)
1028 pure function pad_feq(ii, nn, mode)
1029 integer,
intent(in) :: ii,nn
1030 logical,
intent(in) ::
mode
1036 if (ii <= nn/2 + 1)
then
1053 integer function fft_size(size, factors, parity)
1054 integer,
intent(in) :: size
1055 integer,
intent(in) :: factors(:)
1056 integer,
intent(in) :: parity
1060 integer,
allocatable :: exponents(:)
1064 nfactors = ubound(factors, dim = 1)
1066 safe_allocate(exponents(1:nfactors))
1071 if (nondiv == 1 .and. mod(
fft_size, 2) == parity)
exit
1075 safe_deallocate_a(exponents)
1082 subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
1083 integer,
intent(in) :: num
1084 integer,
intent(in) :: nfactors
1085 integer,
intent(in) :: factors(:)
1086 integer,
intent(out) :: exponents(:)
1087 integer,
intent(out) :: nondiv
1094 do ifactor = 1, nfactors
1095 exponents(ifactor) = 0
1097 if (mod(nondiv, factors(ifactor)) /= 0)
exit
1098 nondiv = nondiv/factors(ifactor)
1099 exponents(ifactor) = exponents(ifactor) + 1
1110 type(
fft_t),
intent(in) :: fft
1112 real(real64) :: fullsize
1116 fullsize = product(real(fft%fs_n(1:3), real64))
1123 subroutine fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
1124 integer,
intent(in) :: gg_in(:)
1125 real(real64),
intent(in) :: temp(:)
1126 integer,
intent(in) :: periodic_dim
1128 real(real64),
intent(in) :: qq(:)
1129 real(real64),
intent(inout) :: gg(:)
1130 real(real64),
intent(out) :: modg2
1134 gg(1:3) = gg_in(1:3)
1135 gg(1:periodic_dim) = gg(1:periodic_dim) + qq(1:periodic_dim)
1136 gg(1:3) = gg(1:3) * temp(1:3)
1137 gg(1:3) = matmul(latt%klattice_primitive(1:3,1:3),gg(1:3))
1138 modg2 = sum(gg(1:3)**2)
1147 type(
fft_t),
intent(in) :: fft
1150 scaling_factor =
m_one
1152 select case (fft_array(fft%slot)%library)
1155 scaling_factor =
m_one/real(fft_array(fft%slot)%rs_n_global(1), real64)
1156 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(2), real64)
1157 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(3), real64)
1168 real(real64) function fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq) result(ecut)
1169 integer,
intent(in) :: box_dim(:)
1170 integer,
intent(in) :: fs_istart(:)
1172 real(real64),
intent(in) :: gspacing(:)
1173 integer,
intent(in) :: periodic_dim
1174 real(real64),
intent(in) :: qq(:)
1176 integer :: lx, ix, iy, iz, idir, idir2, idir3
1177 real(real64) :: dminsq, gg(3), modg2
1178 integer :: box_dim_(3), ixx(3)
1179 integer :: ming(3), maxg(3)
1183 assert(periodic_dim > 0)
1185 box_dim_(1:periodic_dim) = box_dim(1:periodic_dim)
1186 if (periodic_dim < 3) box_dim_(periodic_dim+1:3) = 1
1191 do idir = 1, periodic_dim
1192 do lx = 1, box_dim(idir)
1193 ix = fs_istart(idir) + lx - 1
1195 ming(idir) = min(ming(idir), ixx(idir))
1196 maxg(idir) = max(maxg(idir), ixx(idir))
1198 maxg(idir) = min(abs(ming(idir)), maxg(idir))
1203 do idir = 1, periodic_dim
1204 idir2 = mod(idir, 3)+1
1205 idir3 = mod(idir+1, 3)+1
1208 ixx(idir) = -maxg(idir)
1209 do iy = -maxg(idir2), maxg(idir2)
1211 do iz = -maxg(idir3), maxg(idir3)
1214 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1218 ixx(idir) = maxg(idir)
1219 do iy = -maxg(idir2), maxg(idir2)
1221 do iz = -maxg(idir3), maxg(idir3)
1224 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1235#include "fft_inc.F90"
1238#include "complex.F90"
1239#include "fft_inc.F90"
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
double log(double __x) __attribute__((__nothrow__
subroutine, public clfft_print_error(ierr, name)
pure logical function, public accel_is_enabled()
type(accel_t), public accel
Fast Fourier Transform module. This module provides a single interface that works with different FFT ...
subroutine zfft_forward_accel(fft, in, out)
subroutine dfft_backward_1d(fft, in, out)
integer, parameter cufft_z2d
subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
subroutine, public fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
subroutine, public fft_all_init(namespace)
initialize the table
real(real64) function, public fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq)
Given an fft box (fixed by the real-space grid), it returns the cutoff energy of the sphere that fits...
subroutine dfft_forward_3d(fft, in, out, norm)
subroutine dfft_forward_accel(fft, in, out)
subroutine, public fft_end(this)
subroutine, public fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
real(real64) pure function, public fft_scaling_factor(fft)
This function returns the factor required to normalize a function after a forward and backward transf...
integer, parameter cufft_z2z
pure integer function, public pad_feq(ii, nn, mode)
convert between array index and G-vector
subroutine zfft_backward_1d(fft, in, out)
integer, parameter, public fftlib_accel
subroutine, public fft_all_end()
delete all plans
integer function fft_size(size, factors, parity)
subroutine zfft_backward_3d(fft, in, out, norm)
subroutine fft_operation_count(fft)
subroutine zfft_backward_accel(fft, in, out)
integer, parameter cufft_c2r
integer, parameter cufft_c2c
integer, parameter, public fft_real
subroutine, public fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
integer, parameter, public fft_complex
integer, parameter, public fftlib_nfft
subroutine dfft_backward_3d(fft, in, out, norm)
subroutine, public fft_copy(fft_i, fft_o)
subroutine dfft_forward_1d(fft, in, out)
integer, parameter cufft_d2z
integer, parameter fft_null
integer, parameter, public fftlib_pnfft
subroutine zfft_forward_1d(fft, in, out)
subroutine zfft_forward_3d(fft, in, out, norm)
integer, parameter, public fftlib_pfft
subroutine dfft_backward_accel(fft, in, out)
integer, parameter, public fftlib_fftw
subroutine, public fft_init_stage1(this, namespace, XX, nn)
Some fft-libraries (only NFFT for the moment) need an additional precomputation stage that depends on...
subroutine, public fftw_prepare_plan(plan, dim, n, is_real, sign, flags, din_, cin_, cout_)
subroutine, public fftw_free_memory(is_real, drs_data, zrs_data, fs_data)
subroutine, public fftw_get_dims(rs_n, is_real, fs_n)
subroutine, public fftw_alloc_memory(rs_n, is_real, fs_n, drs_data, zrs_data, fs_data)
real(real64), parameter, public m_two
real(real64), parameter, public m_huge
real(real64), parameter, public m_half
real(real64), parameter, public m_one
subroutine, public messages_not_implemented(feature, namespace)
subroutine, public messages_warning(no_lines, all_nodes, namespace)
subroutine, public messages_new_line()
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
type(mpi_grp_t), public mpi_world
subroutine, public nfft_write_info(nfft)
subroutine, public nfft_end(nfft)
subroutine, public nfft_init(nfft, nfft_options, N, dim, M, optimize)
subroutine, public nfft_copy_info(in, out)
subroutine, public nfft_precompute(nfft, X1, X2, X3)
subroutine, public nfft_guru_options(nfft, namespace)
The low level module to work with the PFFT library. http:
subroutine, public pfft_prepare_plan_c2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
subroutine, public pfft_prepare_plan_c2r(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
subroutine, public pfft_decompose(n_proc, dim1, dim2)
Decompose all available processors in 2D processor grid, most equally possible.
subroutine, public pfft_prepare_plan_r2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
subroutine, public pfft_get_dims(rs_n_global, mpi_comm, is_real, alloc_size, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
The includes for the PFFT.
The low level module to work with the PNFFT library. http:
subroutine, public pnfft_copy_params(in, out)
subroutine, public pnfft_set_sp_nodes(pnfft, namespace, X)
subroutine, public pnfft_init_plan(pnfft, pnfft_options, comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart)
subroutine, public pnfft_write_info(pnfft)
subroutine, public pnfft_guru_options(pnfft, namespace)
subroutine, public pnfft_end(pnfft)
subroutine, public pnfft_init_procmesh(pnfft, mpi_grp, comm)
This module defines the unit system, used for input and output.
type(unit_t), public unit_megabytes
For large amounts of data (natural code units are bytes)
This is defined even when running serial.