Octopus
sgfft.F90
Go to the documentation of this file.
1!! Copyright (C) 2011 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include <global.h>
20
24module sgfft_oct_m
25 use, intrinsic :: iso_fortran_env
26 use global_oct_m
28 use mpi_oct_m
29#ifdef HAVE_OPENMP
30 use omp_lib
31#endif
33
34 implicit none
35
36 private
37
38 public :: &
39 fft, &
41 kernelfft, &
43
44contains
45
46 !!****h* BigDFT/fourier_dim
47 !! NAME
48 !! fourier_dim
49 !!
50 !! FUNCTION
51 !! Give a number n_next > n compatible for the FFT
52 !!
53 !! SOURCE
54 !!
55 subroutine fourier_dim(n,n_next)
56 integer, intent(in) :: n
57 integer, intent(out) :: n_next
58
59 !Local variables
60 integer, parameter :: ndata1024 = 149, ndata = 149
61 !Multiple of 2,3,5
62 integer, dimension(ndata), parameter :: idata = (/ &
63 3, 4, 5, 6, 8, 9, 12, 15, 16, 18, &
64 20, 24, 25, 27, 30, 32, 36, 40, 45, 48, &
65 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, &
66 108, 120, 125, 128, 135, 144, 150, 160, 162, 180, &
67 192, 200, 216, 225, 240, 243, 256, 270, 288, 300, &
68 320, 324, 360, 375, 384, 400, 405, 432, 450, 480, &
69 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, &
70 720, 729, 750, 768, 800, 810, 864, 900, 960, 972, &
71 1000, 1024, 1080, 1125, 1152, 1200, 1215, 1280, 1296, 1350,&
72 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875, 1920,&
73 1944, 2000, 2025, 2048, 2160, 2250, 2304, 2400, 2430, 2500,&
74 2560, 2592, 2700, 2880, 3000, 3072, 3125, 3200, 3240, 3375,&
75 3456, 3600, 3750, 3840, 3888, 4000, 4050, 4096, 4320, 4500,&
76 4608, 4800, 5000, 5120, 5184, 5400, 5625, 5760, 6000, 6144,&
77 6400, 6480, 6750, 6912, 7200, 7500, 7680, 8000, 8192 /)
78 integer :: i
79
80 loop_data: do i = 1,ndata1024
81 if (n <= idata(i)) then
82 n_next = idata(i)
83 return
84 end if
85 end do loop_data
86 write(unit=*,fmt=*) "fourier_dim: ",n," is bigger than ",idata(ndata1024)
87 stop
88 end subroutine fourier_dim
89 !!***
90
91
92 ! Copyright (C) Stefan Goedecker, CEA Grenoble, 2002
93 ! This file is distributed under the terms of the
94 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
95
96
97 ! --------------------------------------------------------------
98 ! 3-dimensional complex-complex FFT routine:
99 ! When compared to the best vendor implementations on RISC architectures
100 ! it gives close to optimal performance (perhaps loosing 20 percent in speed)
101 ! and it is significanly faster than many not so good vendor implementations
102 ! as well as other portable FFT`s.
103 ! On all vector machines tested so far (Cray, NEC, Fujitsu) is
104 ! was significantly faster than the vendor routines
105 ! The theoretical background is described in :
106 ! 1) S. Goedecker: Rotating a three-dimensional array in optimal
107 ! positions for vector processing: Case study for a three-dimensional Fast
108 ! Fourier Transform, Comp. Phys. Commun. \underline{76}, 294 (1993)
109 ! Citing of this reference is greatly appreciated if the routines are used
110 ! for scientific work.
111
112
113 ! Presumably good compiler flags:
114 ! IBM, serial power 2: xlf -qarch=pwr2 -O2 -qmaxmem=-1
115 ! with OpenMP: IBM: xlf_r -qfree -O4 -qarch=pwr3 -qtune=pwr3 -qsmp=omp -qmaxmem=-1 ;
116 ! a.out
117 ! DEC: f90 -O3 -arch ev67 -pipeline
118 ! with OpenMP: DEC: f90 -O3 -arch ev67 -pipeline -omp -lelan ;
119 ! prun -N1 -c4 a.out
120
121
122 !-----------------------------------------------------------
123
124 ! FFT PART -----------------------------------------------------------------
125 ! CALCULATES THE DISCRETE FOURIER TRANSFORM F(I1,I2,I3)=
126 ! S_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) R(j1,j2,j3)
127 ! with optimal performance on vector computer, workstations and
128 ! multiprocessor shared memory computers using OpenMP compiler directives
129 ! INPUT:
130 ! n1,n2,n3:physical dimension of the transform. It must be a
131 ! product of the prime factors 2,3,5, but greater than 3.
132 ! If two ni`s are equal it is recommended to place them
133 ! behind each other.
134 ! nd1,nd2,nd3:memory dimension of Z. ndi must always be greater or
135 ! equal than ni. On a vector machine, it is recomended
136 ! to chose ndi=ni if ni is odd and ndi=ni+1 if ni is
137 ! even to obtain optimal execution speed. On RISC
138 ! machines ndi=ni is usually fine for odd ni, for even
139 ! ni one should try ndi=ni+1, ni+2, ni+4 to find the
140 ! optimal performance.
141 ! inzee=1: first part of Z is data (input) array,
142 ! second part work array
143 ! inzee=2: first part of Z is work array, second part data array
144 ! Z(1,i1,i2,i3,inzee)=real(R(i1,i2,i3))
145 ! Z(2,i1,i2,i3,inzee)=imag(R(i1,i2,i3))
146 ! OUTPUT:
147 ! inzee=1: first part of Z is data (output) array,
148 ! second part work array
149 ! inzee=2: first part of Z is work array, second part data array
150 ! real(F(i1,i2,i3))=Z(1,i1,i2,i3,inzee)
151 ! imag(F(i1,i2,i3))=Z(2,i1,i2,i3,inzee)
152 ! inzee on output is in general different from inzee on input
153 ! The input data are always overwritten independently of the
154 ! value of inzee.
155 ! PERFORMANCE AND THE NCACHE
156 ! The most important feature for performance is the right choice of
157 ! the parameter ncache. On a vector machine ncache has to be put to 0.
158 ! On a RISC machine with cache, it is very important to find the optimal
159 ! value of NCACHE. NCACHE determines the size of the work array zw, that
160 ! has to fit into cache. It has therefore to be chosen to equal roughly
161 ! half the size of the physical cache in units of real*8 numbers.
162 ! If the machine has 2 cache levels it can not be predicted which
163 ! cache level will be the most relevant one for choosing ncache.
164 ! The optimal value of ncache can easily be determined by numerical
165 ! experimentation. A too large value of ncache leads to a dramatic
166 ! and sudden decrease of performance, a too small value to a to a
167 ! slow and less dramatic decrease of performance. If NCACHE is set
168 ! to a value so small, that not even a single one dimensional transform
169 ! can be done in the workarray zw, the program stops with an error
170 ! message.
171 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
172 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1995, 1999
173 ! Copyright (C) Stefan Goedecker, CEA Grenoble, 2002
174 ! This file is distributed under the terms of the
175 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
176
177 subroutine fft(n1, n2, n3, nd1, nd2, nd3, z, isign, inzee)
178 integer, intent(in) :: n1, n2, n3, nd1, nd2, nd3
179 real(real64), intent(inout) :: z(1:2, 1:nd1*nd2*nd3, 1:2)
180 integer, intent(in) :: isign
181 integer, intent(inout) :: inzee
182
183 real(real64), allocatable :: zw(:, :, :)
184 real(real64), allocatable :: trig(:, :)
185 integer, allocatable :: after(:), now(:), before(:)
186
187 integer :: ncache, nfft, mm, i, ic
188 integer :: npr, iam, inzet, m, lot, nn, lotomp, j, jompa, jompb
189 integer :: n, ma, mb, jj, inzeep
190
191 call profiling_in("SGFFT")
192
193 if (max(n1,n2,n3) > 8192) stop 'fft:8192 limit reached'
194
195 ! some reasonable values of ncache:
196 ! IBM/RS6000/590: 16*1024 ; IBM/RS6000/390: 3*1024 ;
197 ! IBM/PwPC: 1*1024 ; SGI/MIPS/R8000: 16*1024 ; DEC/Alpha/EV5 and EV6 6*1024
198 ! But if you care about performance find the optimal value of ncache yourself!
199 ! On all vector machines: ncache=0
200
201 ncache = 8192
202 if (ncache /= 0 .and. ncache <= max(n1,n2,n3)*4) ncache=max(n1,n2,n3/2)*4
203
204 ! check whether input values are reasonable
205 if (inzee <= 0 .or. inzee >= 3) stop 'fft:wrong inzee'
206 if (isign /= 1 .and. isign /= -1) stop 'fft:wrong isign'
207 if (n1 > nd1) stop 'fft:n1>nd1'
208 if (n2 > nd2) stop 'fft:n2>nd2'
209 if (n3 > nd3) stop 'fft:n3>nd3'
210
211
212 ! vector computer with memory banks:
213 if (ncache == 0) then
214 safe_allocate(trig(1:2,1:8192))
215 safe_allocate(after(1:20))
216 safe_allocate(now(1:20))
217 safe_allocate(before(1:20))
218
219 call ctrig(n3,trig,after,before,now,isign,ic)
220 nfft = nd1*n2
221 mm = nd1*nd2
222
223 do i = 1,ic-1
224 call fftstp(mm,nfft,nd3,mm,nd3,z(1,1,inzee),z(1,1,3-inzee), &
225 trig,after(i),now(i),before(i),isign)
226 inzee = 3-inzee
227 end do
228
229 i = ic
230
231 call fftrot(mm,nfft,nd3,mm,nd3,z(1,1,inzee),z(1,1,3-inzee), &
232 trig,after(i),now(i),before(i),isign)
233
234 inzee = 3-inzee
235
236 if (n2 /= n3) call ctrig(n2,trig,after,before,now,isign,ic)
237 nfft = nd3*n1
238 mm = nd3*nd1
239
240 do i = 1,ic-1
241 call fftstp(mm,nfft,nd2,mm,nd2,z(1,1,inzee),z(1,1,3-inzee), &
242 trig,after(i),now(i),before(i),isign)
243 inzee = 3-inzee
244 end do
245
246 i = ic
247 call fftrot(mm,nfft,nd2,mm,nd2,z(1,1,inzee),z(1,1,3-inzee), &
248 trig,after(i),now(i),before(i),isign)
249 inzee = 3-inzee
250
251 if (n1 /= n2) call ctrig(n1,trig,after,before,now,isign,ic)
252 nfft = nd2*n3
253 mm = nd2*nd3
254
255 do i = 1,ic-1
256 call fftstp(mm,nfft,nd1,mm,nd1,z(1,1,inzee),z(1,1,3-inzee), &
257 trig,after(i),now(i),before(i),isign)
258 inzee = 3-inzee
259 end do
260
261 i = ic
262
263 call fftrot(mm,nfft,nd1,mm,nd1,z(1,1,inzee),z(1,1,3-inzee), &
264 trig,after(i),now(i),before(i),isign)
265
266 inzee = 3-inzee
267
268 ! RISC machine with cache:
269 else
270 ! INtel IFC does not understand default(private)
271
272 !$omp parallel &
273 !$omp private(zw,trig,before,after,now,i,j,iam,npr,jj,ma,mb,mm,ic,n,m,jompa,jompb,lot,lotomp,inzeep,inzet,nn,nfft) &
274 !$omp shared(n1,n2,n3,nd1,nd2,nd3,z,isign,inzee,ncache)
275 npr = 1
276!$ npr = omp_get_num_threads()
277 iam = 0
278!$ iam = omp_get_thread_num()
279
280 safe_allocate(zw(1:2,1:ncache/4,1:2))
281 safe_allocate(trig(1:2,1:1024))
282 safe_allocate(after(1:20))
283 safe_allocate(now(1:20))
284 safe_allocate(before(1:20))
285
286 inzet = inzee
287 ! TRANSFORM ALONG Z AXIS
288
289 mm = nd1*nd2
290 m = nd3
291 lot = max(1,ncache/(4*n3))
292 nn = lot
293 n = n3
294 if (2*n*lot*2 > ncache) stop 'fft:ncache1'
295
296 call ctrig(n3,trig,after,before,now,isign,ic)
297
298 if (ic == 1) then
299 i = ic
300 lotomp=(nd1*n2)/npr+1
301 ma = iam*lotomp+1
302 mb = min((iam+1)*lotomp,nd1*n2)
303 nfft = mb-ma+1
304 j = ma
305 jj = j*nd3-nd3+1
306 call fftrot(mm,nfft,m,mm,m,z(1,j,inzet),z(1,jj,3-inzet), &
307 trig,after(i),now(i),before(i),isign)
308
309 else
310
311 lotomp = (nd1*n2)/npr+1
312 jompa = iam*lotomp+1
313 jompb = min((iam+1)*lotomp,nd1*n2)
314 do j = jompa,jompb,lot
315 ma = j
316 mb = min(j+(lot-1),jompb)
317 nfft = mb-ma+1
318 jj = j*nd3-nd3+1
319
320 i = 1
321 inzeep = 2
322 call fftstp(mm,nfft,m,nn,n,z(1,j,inzet),zw(1,1,3-inzeep), &
323 trig,after(i),now(i),before(i),isign)
324 inzeep = 1
325
326 do i = 2,ic-1
327 call fftstp(nn,nfft,n,nn,n,zw(1,1,inzeep),zw(1,1,3-inzeep), &
328 trig,after(i),now(i),before(i),isign)
329 inzeep = 3-inzeep
330 end do
331 i = ic
332 call fftrot(nn,nfft,n,mm,m,zw(1,1,inzeep),z(1,jj,3-inzet), &
333 trig,after(i),now(i),before(i),isign)
334 end do
335 end if
336
337 inzet = 3-inzet
338
339 !$omp barrier
340
341 ! TRANSFORM ALONG Y AXIS
342 mm = nd3*nd1
343 m = nd2
344 lot = max(1,ncache/(4*n2))
345 nn = lot
346 n = n2
347 if (2*n*lot*2 > ncache) stop 'fft:ncache2'
348
349 if (n2 /= n3) call ctrig(n2,trig,after,before,now,isign,ic)
350
351 if (ic == 1) then
352 i = ic
353 lotomp = (nd3*n1)/npr+1
354 ma = iam*lotomp+1
355 mb = min((iam+1)*lotomp,nd3*n1)
356 nfft = mb-ma+1
357 j = ma
358 jj = j*nd2-nd2+1
359 call fftrot(mm,nfft,m,mm,m,z(1,j,inzet),z(1,jj,3-inzet), &
360 trig,after(i),now(i),before(i),isign)
361
362 else
363
364 lotomp = (nd3*n1)/npr+1
365 jompa = iam*lotomp+1
366 jompb = min((iam+1)*lotomp,nd3*n1)
367 do j = jompa,jompb,lot
368 ma = j
369 mb = min(j+(lot-1),jompb)
370 nfft = mb-ma+1
371 jj = j*nd2-nd2+1
372
373 i = 1
374 inzeep = 2
375 call fftstp(mm,nfft,m,nn,n,z(1,j,inzet),zw(1,1,3-inzeep), &
376 trig,after(i),now(i),before(i),isign)
377 inzeep = 1
378
379 do i = 2,ic-1
380 call fftstp(nn,nfft,n,nn,n,zw(1,1,inzeep),zw(1,1,3-inzeep), &
381 trig,after(i),now(i),before(i),isign)
382 inzeep = 3 - inzeep
383 end do
384
385 i = ic
386 call fftrot(nn,nfft,n,mm,m,zw(1,1,inzeep),z(1,jj,3-inzet), &
387 trig,after(i),now(i),before(i),isign)
388 end do
389 end if
390 inzet = 3 - inzet
391
392 !$omp barrier
393
394 ! TRANSFORM ALONG X AXIS
395 mm = nd2*nd3
396 m = nd1
397 lot = max(1,ncache/(4*n1))
398 nn = lot
399 n = n1
400 if (2*n*lot*2 > ncache) stop 'fft:ncache3'
401
402 if (n1 /= n2) call ctrig(n1,trig,after,before,now,isign,ic)
403
404 if (ic == 1) then
405 i = ic
406 lotomp = (nd2*n3)/npr+1
407 ma = iam*lotomp+1
408 mb = min((iam+1)*lotomp,nd2*n3)
409 nfft = mb-ma+1
410 j = ma
411 jj = j*nd1-nd1+1
412 call fftrot(mm,nfft,m,mm,m,z(1,j,inzet),z(1,jj,3-inzet), &
413 trig,after(i),now(i),before(i),isign)
414
415 else
416
417 lotomp=(nd2*n3)/npr+1
418 jompa=iam*lotomp+1
419 jompb=min((iam+1)*lotomp,nd2*n3)
420 do j=jompa,jompb,lot
421 ma=j
422 mb=min(j+(lot-1),jompb)
423 nfft=mb-ma+1
424 jj=j*nd1-nd1+1
425
426 i=1
427 inzeep=2
428 call fftstp(mm,nfft,m,nn,n,z(1,j,inzet),zw(1,1,3-inzeep), &
429 trig,after(i),now(i),before(i),isign)
430 inzeep=1
431
432 do i=2,ic-1
433 call fftstp(nn,nfft,n,nn,n,zw(1,1,inzeep),zw(1,1,3-inzeep), &
434 trig,after(i),now(i),before(i),isign)
435 inzeep=3-inzeep
436 end do
437 i=ic
438 call fftrot(nn,nfft,n,mm,m,zw(1,1,inzeep),z(1,jj,3-inzet), &
439 trig,after(i),now(i),before(i),isign)
440 end do
441 end if
442 inzet=3-inzet
443
444 safe_deallocate_a(zw)
445 safe_deallocate_a(trig)
446 safe_deallocate_a(after)
447 safe_deallocate_a(now)
448 safe_deallocate_a(before)
449 if (iam == 0) inzee=inzet
450 !$omp end parallel
451
452 end if
453
454 call profiling_out("SGFFT")
455
456 end subroutine fft
457
458 ! ---------------------------------------------------------------------------------
459
460 ! Copyright (C) Stefan Goedecker, Lausanne, Switzerland, August 1, 1991
461 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
462 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
463 ! This file is distributed under the terms of the
464 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
465
466 ! Different factorizations affect the performance
467 ! Factoring 64 as 4*4*4 might for example be faster on some machines than 8*8.
468 subroutine ctrig(n,trig,after,before,now,isign,ic)
469 integer :: n, isign, ic
470 integer :: now(7), after(7), before(7)
471 real(real64) :: trig(:,:)
472
473 integer :: i, j, itt, nh
474 real(real64) :: angle, trigc, trigs, twopi
475 INTEGER, DIMENSION(7,149) :: idata
476
477 ! The factor 6 is only allowed in the first place!
478 data ((idata(i,j),i=1,7),j=1,74) / &
479 3, 3, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, &
480 5, 5, 1, 1, 1, 1, 1, 6, 6, 1, 1, 1, 1, 1, &
481 8, 8, 1, 1, 1, 1, 1, 9, 3, 3, 1, 1, 1, 1, &
482 12, 4, 3, 1, 1, 1, 1, 15, 5, 3, 1, 1, 1, 1, &
483 16, 4, 4, 1, 1, 1, 1, 18, 6, 3, 1, 1, 1, 1, &
484 20, 5, 4, 1, 1, 1, 1, 24, 8, 3, 1, 1, 1, 1, &
485 25, 5, 5, 1, 1, 1, 1, 27, 3, 3, 3, 1, 1, 1, &
486 30, 6, 5, 1, 1, 1, 1, 32, 8, 4, 1, 1, 1, 1, &
487 36, 4, 3, 3, 1, 1, 1, 40, 8, 5, 1, 1, 1, 1, &
488 45, 5, 3, 3, 1, 1, 1, 48, 4, 4, 3, 1, 1, 1, &
489 54, 6, 3, 3, 1, 1, 1, 60, 5, 4, 3, 1, 1, 1, &
490 64, 8, 8, 1, 1, 1, 1, 72, 8, 3, 3, 1, 1, 1, &
491 75, 5, 5, 3, 1, 1, 1, 80, 5, 4, 4, 1, 1, 1, &
492 81, 3, 3, 3, 3, 1, 1, 90, 6, 5, 3, 1, 1, 1, &
493 96, 8, 4, 3, 1, 1, 1, 100, 5, 5, 4, 1, 1, 1, &
494 108, 4, 3, 3, 3, 1, 1, 120, 8, 5, 3, 1, 1, 1, &
495 125, 5, 5, 5, 1, 1, 1, 128, 8, 4, 4, 1, 1, 1, &
496 135, 5, 3, 3, 3, 1, 1, 144, 6, 8, 3, 1, 1, 1, &
497 150, 6, 5, 5, 1, 1, 1, 160, 8, 5, 4, 1, 1, 1, &
498 162, 6, 3, 3, 3, 1, 1, 180, 5, 4, 3, 3, 1, 1, &
499 192, 6, 8, 4, 1, 1, 1, 200, 8, 5, 5, 1, 1, 1, &
500 216, 8, 3, 3, 3, 1, 1, 225, 5, 5, 3, 3, 1, 1, &
501 240, 6, 8, 5, 1, 1, 1, 243, 3, 3, 3, 3, 3, 1, &
502 256, 8, 8, 4, 1, 1, 1, 270, 6, 5, 3, 3, 1, 1, &
503 288, 8, 4, 3, 3, 1, 1, 300, 5, 5, 4, 3, 1, 1, &
504 320, 5, 4, 4, 4, 1, 1, 324, 4, 3, 3, 3, 3, 1, &
505 360, 8, 5, 3, 3, 1, 1, 375, 5, 5, 5, 3, 1, 1, &
506 384, 8, 4, 4, 3, 1, 1, 400, 5, 5, 4, 4, 1, 1, &
507 405, 5, 3, 3, 3, 3, 1, 432, 4, 4, 3, 3, 3, 1, &
508 450, 6, 5, 5, 3, 1, 1, 480, 8, 5, 4, 3, 1, 1, &
509 486, 6, 3, 3, 3, 3, 1, 500, 5, 5, 5, 4, 1, 1, &
510 512, 8, 8, 8, 1, 1, 1, 540, 5, 4, 3, 3, 3, 1, &
511 576, 4, 4, 4, 3, 3, 1, 600, 8, 5, 5, 3, 1, 1, &
512 625, 5, 5, 5, 5, 1, 1, 640, 8, 5, 4, 4, 1, 1, &
513 648, 8, 3, 3, 3, 3, 1, 675, 5, 5, 3, 3, 3, 1, &
514 720, 5, 4, 4, 3, 3, 1, 729, 3, 3, 3, 3, 3, 3, &
515 750, 6, 5, 5, 5, 1, 1, 768, 4, 4, 4, 4, 3, 1/
516
517 data ((idata(i,j),i=1,7),j=75,149) / &
518 800, 8, 5, 5, 4, 1, 1, 810, 6, 5, 3, 3, 3, 1, &
519 864, 8, 4, 3, 3, 3, 1, 900, 5, 5, 4, 3, 3, 1, &
520 960, 5, 4, 4, 4, 3, 1, 972, 4, 3, 3, 3, 3, 3, &
521 1000, 8, 5, 5, 5, 1, 1, 1024, 4, 4, 4, 4, 4, 1, &
522 1080, 6, 5, 4, 3, 3, 1, 1125, 5, 5, 5, 3, 3, 1, &
523 1152, 6, 4, 4, 4, 3, 1, 1200, 6, 8, 5, 5, 1, 1, &
524 1215, 5, 3, 3, 3, 3, 3, 1280, 8, 8, 5, 4, 1, 1, &
525 1296, 6, 8, 3, 3, 3, 1, 1350, 6, 5, 5, 3, 3, 1, &
526 1440, 6, 5, 4, 4, 3, 1, 1458, 6, 3, 3, 3, 3, 3, &
527 1500, 5, 5, 5, 4, 3, 1, 1536, 6, 8, 8, 4, 1, 1, &
528 1600, 8, 8, 5, 5, 1, 1, 1620, 5, 4, 3, 3, 3, 3, &
529 1728, 6, 8, 4, 3, 3, 1, 1800, 6, 5, 5, 4, 3, 1, &
530 1875, 5, 5, 5, 5, 3, 1, 1920, 6, 5, 4, 4, 4, 1, &
531 1944, 6, 4, 3, 3, 3, 3, 2000, 5, 5, 5, 4, 4, 1, &
532 2025, 5, 5, 3, 3, 3, 3, 2048, 8, 4, 4, 4, 4, 1, &
533 2160, 6, 8, 5, 3, 3, 1, 2250, 6, 5, 5, 5, 3, 1, &
534 2304, 6, 8, 4, 4, 3, 1, 2400, 6, 5, 5, 4, 4, 1, &
535 2430, 6, 5, 3, 3, 3, 3, 2500, 5, 5, 5, 5, 4, 1, &
536 2560, 8, 5, 4, 4, 4, 1, 2592, 6, 4, 4, 3, 3, 3, &
537 2700, 5, 5, 4, 3, 3, 3, 2880, 6, 8, 5, 4, 3, 1, &
538 3000, 6, 5, 5, 5, 4, 1, 3072, 6, 8, 4, 4, 4, 1, &
539 3125, 5, 5, 5, 5, 5, 1, 3200, 8, 5, 5, 4, 4, 1, &
540 3240, 6, 5, 4, 3, 3, 3, 3375, 5, 5, 5, 3, 3, 3, &
541 3456, 6, 4, 4, 4, 3, 3, 3600, 6, 8, 5, 5, 3, 1, &
542 3750, 6, 5, 5, 5, 5, 1, 3840, 6, 8, 5, 4, 4, 1, &
543 3888, 6, 8, 3, 3, 3, 3, 4000, 8, 5, 5, 5, 4, 1, &
544 4050, 6, 5, 5, 3, 3, 3, 4096, 8, 8, 4, 4, 4, 1, &
545 4320, 6, 5, 4, 4, 3, 3, 4500, 5, 5, 5, 4, 3, 3, &
546 4608, 6, 8, 8, 4, 3, 1, 4800, 6, 8, 5, 5, 4, 1, &
547 5000, 8, 5, 5, 5, 5, 1, 5120, 8, 8, 5, 4, 4, 1, &
548 5184, 6, 8, 4, 3, 3, 3, 5400, 6, 5, 5, 4, 3, 3, &
549 5625, 5, 5, 5, 5, 3, 3, 5760, 6, 8, 8, 5, 3, 1, &
550 6000, 6, 8, 5, 5, 5, 1, 6144, 6, 8, 8, 4, 4, 1, &
551 6400, 8, 8, 5, 5, 4, 1, 6480, 6, 8, 5, 3, 3, 3, &
552 6750, 6, 5, 5, 5, 3, 3, 6912, 6, 8, 4, 4, 3, 3, &
553 7200, 6, 5, 5, 4, 4, 3, 7500, 5, 5, 5, 5, 4, 3, &
554 7680, 6, 8, 8, 5, 4, 1, 8000, 8, 8, 5, 5, 5, 1, &
555 8192, 8, 8, 8, 4, 4, 1 /
556
557 do i = 1, 149
558 if (n == idata(1,i)) then
559 ic=0
560 do j = 1,6
561 itt=idata(1+j,i)
562 if (itt > 1) then
563 ic=ic+1
564 now(j)=idata(1+j,i)
565 else
566 goto 1000
567 end if
568 end do
569 goto 1000
570 end if
571 end do
572
573 print*,'VALUE OF',n,'NOT ALLOWED FOR FFT, ALLOWED VALUES ARE:'
57437 format(15(i5))
575 write(6,37) (idata(1,i),i=1,149)
576 stop
5771000 continue
578
579 after(1)=1
580 before(ic)=1
581 do i = 2, ic
582 after(i)=after(i-1)*now(i-1)
583 before(ic-i+1)=before(ic-i+2)*now(ic-i+2)
584 end do
585
586 twopi=6.283185307179586_real64
587 angle=isign*twopi/n
588
589 if (mod(n,2) == 0) then
590 nh=n/2
591 trig(1,1)=1.0_8
592 trig(2,1)=0.0_8
593 trig(1,nh+1)=-1.0_8
594 trig(2,nh+1)=0.0_8
595 do i = 1, nh - 1
596 trigc=cos(i*angle)
597 trigs=sin(i*angle)
598 trig(1,i+1)=trigc
599 trig(2,i+1)=trigs
600 trig(1,n-i+1)=trigc
601 trig(2,n-i+1)=-trigs
602 end do
603 else
604 nh=(n-1)/2
605 trig(1,1)=1.0_8
606 trig(2,1)=0.0_8
607 do i = 1, nh
608 trigc=cos(i*angle)
609 trigs=sin(i*angle)
610 trig(1,i+1)=trigc
611 trig(2,i+1)=trigs
612 trig(1,n-i+1)=trigc
613 trig(2,n-i+1)=-trigs
614 end do
615 end if
616
617 end subroutine ctrig
618
619
620 ! ------------------------------------------------------------------------
621 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
622 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1995, 1999
623 ! This file is distributed under the terms of the
624 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
625
626 subroutine fftstp(mm,nfft,m,nn,n,zin,zout,trig,after,now,before,isign)
627 integer :: mm, nfft, m, nn, n, isign
628 integer :: after, before, atn, atb, now
629 real(real64) :: trig(:,:)
630 real(real64) :: zin(2, mm, m), zout(2, nn, n)
631
632 real(real64) :: rt2i, dp, cp, cm, ci5, cr5, ci6, cr6, am, ap, ci8, cr8
633 real(real64) :: r, r1, r2, r3, r4, r5, r6, r7, r8_, r25, r34
634 real(real64) :: s, s1, s2, s3, s4, s5, s6, s7, s8, s25, s34
635 real(real64) :: bb, bm, dm, bp
636 real(real64) :: cr2, ci2, cr3, ci3, cr4, ci4, cr7, ci7
637 real(real64) :: cos2, sin2, cos4, sin4
638 real(real64) :: ur1, ur2, ur3, ui1, ui2, ui3
639 real(real64) :: vr1, vr2, vr3, vi1, vi2, vi3
640 integer :: ia, ib, j, ias, itrig, itt
641 integer :: nin1, nin2, nin3, nin4, nin5, nin6, nin7, nin8
642 integer :: nout1, nout2, nout3, nout4, nout5, nout6, nout7, nout8
643
644 atn=after*now
645 atb=after*before
646
647 ! sqrt(.5_real64)
648 rt2i=0.7071067811865475_real64
649 if (now == 2) then
650 ia=1
651 nin1=ia-after
652 nout1=ia-atn
653 do ib = 1, before
654 nin1=nin1+after
655 nin2=nin1+atb
656 nout1=nout1+atn
657 nout2=nout1+after
658 do j = 1, nfft
659 r1=zin(1,j,nin1)
660 s1=zin(2,j,nin1)
661 r2=zin(1,j,nin2)
662 s2=zin(2,j,nin2)
663 zout(1,j,nout1)= r2 + r1
664 zout(2,j,nout1)= s2 + s1
665 zout(1,j,nout2)= r1 - r2
666 zout(2,j,nout2)= s1 - s2
667 end do
668 end do
669 do ia = 2, after
670 ias=ia-1
671 if (2*ias == after) then
672 if (isign == 1) then
673 nin1=ia-after
674 nout1=ia-atn
675 do ib=1,before
676 nin1=nin1+after
677 nin2=nin1+atb
678 nout1=nout1+atn
679 nout2=nout1+after
680 do j = 1,nfft
681 r1=zin(1,j,nin1)
682 s1=zin(2,j,nin1)
683 r2=zin(2,j,nin2)
684 s2=zin(1,j,nin2)
685 zout(1,j,nout1)= r1 - r2
686 zout(2,j,nout1)= s2 + s1
687 zout(1,j,nout2)= r2 + r1
688 zout(2,j,nout2)= s1 - s2
689 end do
690 end do
691 else
692 nin1=ia-after
693 nout1=ia-atn
694 do ib=1,before
695 nin1=nin1+after
696 nin2=nin1+atb
697 nout1=nout1+atn
698 nout2=nout1+after
699 do j = 1,nfft
700 r1=zin(1,j,nin1)
701 s1=zin(2,j,nin1)
702 r2=zin(2,j,nin2)
703 s2=zin(1,j,nin2)
704 zout(1,j,nout1)= r2 + r1
705 zout(2,j,nout1)= s1 - s2
706 zout(1,j,nout2)= r1 - r2
707 zout(2,j,nout2)= s2 + s1
708 end do
709 end do
710 end if
711 else if (4*ias == after) then
712 if (isign == 1) then
713 nin1=ia-after
714 nout1=ia-atn
715 do ib = 1, before
716 nin1=nin1+after
717 nin2=nin1+atb
718 nout1=nout1+atn
719 nout2=nout1+after
720 do j = 1, nfft
721 r1=zin(1,j,nin1)
722 s1=zin(2,j,nin1)
723 r=zin(1,j,nin2)
724 s=zin(2,j,nin2)
725 r2=(r - s)*rt2i
726 s2=(r + s)*rt2i
727 zout(1,j,nout1)= r2 + r1
728 zout(2,j,nout1)= s2 + s1
729 zout(1,j,nout2)= r1 - r2
730 zout(2,j,nout2)= s1 - s2
731 end do
732 end do
733 else
734 nin1=ia-after
735 nout1=ia-atn
736 do ib=1,before
737 nin1=nin1+after
738 nin2=nin1+atb
739 nout1=nout1+atn
740 nout2=nout1+after
741 do j = 1,nfft
742 r1=zin(1,j,nin1)
743 s1=zin(2,j,nin1)
744 r=zin(1,j,nin2)
745 s=zin(2,j,nin2)
746 r2=(r + s)*rt2i
747 s2=(s - r)*rt2i
748 zout(1,j,nout1)= r2 + r1
749 zout(2,j,nout1)= s2 + s1
750 zout(1,j,nout2)= r1 - r2
751 zout(2,j,nout2)= s1 - s2
752 end do
753 end do
754 end if
755 else if (4*ias == 3*after) then
756 if (isign == 1) then
757 nin1=ia-after
758 nout1=ia-atn
759 do ib = 1, before
760 nin1=nin1+after
761 nin2=nin1+atb
762 nout1=nout1+atn
763 nout2=nout1+after
764 do j = 1, nfft
765 r1=zin(1,j,nin1)
766 s1=zin(2,j,nin1)
767 r=zin(1,j,nin2)
768 s=zin(2,j,nin2)
769 r2=(r + s)*rt2i
770 s2=(r - s)*rt2i
771 zout(1,j,nout1)= r1 - r2
772 zout(2,j,nout1)= s2 + s1
773 zout(1,j,nout2)= r2 + r1
774 zout(2,j,nout2)= s1 - s2
775 end do
776 end do
777 else
778 nin1=ia-after
779 nout1=ia-atn
780 do ib=1,before
781 nin1=nin1+after
782 nin2=nin1+atb
783 nout1=nout1+atn
784 nout2=nout1+after
785 do j = 1,nfft
786 r1=zin(1,j,nin1)
787 s1=zin(2,j,nin1)
788 r=zin(1,j,nin2)
789 s=zin(2,j,nin2)
790 r2=(s - r)*rt2i
791 s2=(r + s)*rt2i
792 zout(1,j,nout1)= r2 + r1
793 zout(2,j,nout1)= s1 - s2
794 zout(1,j,nout2)= r1 - r2
795 zout(2,j,nout2)= s2 + s1
796 end do
797 end do
798 end if
799 else
800 itrig=ias*before+1
801 cr2=trig(1,itrig)
802 ci2=trig(2,itrig)
803 nin1=ia-after
804 nout1=ia-atn
805 do ib=1,before
806 nin1=nin1+after
807 nin2=nin1+atb
808 nout1=nout1+atn
809 nout2=nout1+after
810 do j = 1,nfft
811 r1=zin(1,j,nin1)
812 s1=zin(2,j,nin1)
813 r=zin(1,j,nin2)
814 s=zin(2,j,nin2)
815 r2=r*cr2 - s*ci2
816 s2=r*ci2 + s*cr2
817 zout(1,j,nout1)= r2 + r1
818 zout(2,j,nout1)= s2 + s1
819 zout(1,j,nout2)= r1 - r2
820 zout(2,j,nout2)= s1 - s2
821 end do
822 end do
823 end if
824 end do
825 else if (now == 4) then
826 if (isign == 1) then
827 ia=1
828 nin1=ia-after
829 nout1=ia-atn
830 do ib = 1, before
831 nin1=nin1+after
832 nin2=nin1+atb
833 nin3=nin2+atb
834 nin4=nin3+atb
835 nout1=nout1+atn
836 nout2=nout1+after
837 nout3=nout2+after
838 nout4=nout3+after
839 do j = 1, nfft
840 r1=zin(1,j,nin1)
841 s1=zin(2,j,nin1)
842 r2=zin(1,j,nin2)
843 s2=zin(2,j,nin2)
844 r3=zin(1,j,nin3)
845 s3=zin(2,j,nin3)
846 r4=zin(1,j,nin4)
847 s4=zin(2,j,nin4)
848 r=r1 + r3
849 s=r2 + r4
850 zout(1,j,nout1) = r + s
851 zout(1,j,nout3) = r - s
852 r=r1 - r3
853 s=s2 - s4
854 zout(1,j,nout2) = r - s
855 zout(1,j,nout4) = r + s
856 r=s1 + s3
857 s=s2 + s4
858 zout(2,j,nout1) = r + s
859 zout(2,j,nout3) = r - s
860 r=s1 - s3
861 s=r2 - r4
862 zout(2,j,nout2) = r + s
863 zout(2,j,nout4) = r - s
864 end do
865 end do
866 do ia = 2, after
867 ias=ia-1
868 if (2*ias == after) then
869 nin1=ia-after
870 nout1=ia-atn
871 do ib = 1, before
872 nin1=nin1+after
873 nin2=nin1+atb
874 nin3=nin2+atb
875 nin4=nin3+atb
876 nout1=nout1+atn
877 nout2=nout1+after
878 nout3=nout2+after
879 nout4=nout3+after
880 do j = 1, nfft
881 r1=zin(1,j,nin1)
882 s1=zin(2,j,nin1)
883 r=zin(1,j,nin2)
884 s=zin(2,j,nin2)
885 r2=(r-s)*rt2i
886 s2=(r+s)*rt2i
887 r3=zin(2,j,nin3)
888 s3=zin(1,j,nin3)
889 r=zin(1,j,nin4)
890 s=zin(2,j,nin4)
891 r4=(r + s)*rt2i
892 s4=(r - s)*rt2i
893 r=r1 - r3
894 s=r2 - r4
895 zout(1,j,nout1) = r + s
896 zout(1,j,nout3) = r - s
897 r=r1 + r3
898 s=s2 - s4
899 zout(1,j,nout2) = r - s
900 zout(1,j,nout4) = r + s
901 r=s1 + s3
902 s=s2 + s4
903 zout(2,j,nout1) = r + s
904 zout(2,j,nout3) = r - s
905 r=s1 - s3
906 s=r2 + r4
907 zout(2,j,nout2) = r + s
908 zout(2,j,nout4) = r - s
909 end do
910 end do
911 else
912 itt=ias*before
913 itrig=itt+1
914 cr2=trig(1,itrig)
915 ci2=trig(2,itrig)
916 itrig=itrig+itt
917 cr3=trig(1,itrig)
918 ci3=trig(2,itrig)
919 itrig=itrig+itt
920 cr4=trig(1,itrig)
921 ci4=trig(2,itrig)
922 nin1=ia-after
923 nout1=ia-atn
924 do ib = 1, before
925 nin1=nin1+after
926 nin2=nin1+atb
927 nin3=nin2+atb
928 nin4=nin3+atb
929 nout1=nout1+atn
930 nout2=nout1+after
931 nout3=nout2+after
932 nout4=nout3+after
933 do j = 1, nfft
934 r1=zin(1,j,nin1)
935 s1=zin(2,j,nin1)
936 r=zin(1,j,nin2)
937 s=zin(2,j,nin2)
938 r2=r*cr2 - s*ci2
939 s2=r*ci2 + s*cr2
940 r=zin(1,j,nin3)
941 s=zin(2,j,nin3)
942 r3=r*cr3 - s*ci3
943 s3=r*ci3 + s*cr3
944 r=zin(1,j,nin4)
945 s=zin(2,j,nin4)
946 r4=r*cr4 - s*ci4
947 s4=r*ci4 + s*cr4
948 r=r1 + r3
949 s=r2 + r4
950 zout(1,j,nout1) = r + s
951 zout(1,j,nout3) = r - s
952 r=r1 - r3
953 s=s2 - s4
954 zout(1,j,nout2) = r - s
955 zout(1,j,nout4) = r + s
956 r=s1 + s3
957 s=s2 + s4
958 zout(2,j,nout1) = r + s
959 zout(2,j,nout3) = r - s
960 r=s1 - s3
961 s=r2 - r4
962 zout(2,j,nout2) = r + s
963 zout(2,j,nout4) = r - s
964 end do
965 end do
966 end if
967 end do
968 else
969 ia=1
970 nin1=ia-after
971 nout1=ia-atn
972 do ib = 1, before
973 nin1=nin1+after
974 nin2=nin1+atb
975 nin3=nin2+atb
976 nin4=nin3+atb
977 nout1=nout1+atn
978 nout2=nout1+after
979 nout3=nout2+after
980 nout4=nout3+after
981 do j = 1, nfft
982 r1=zin(1,j,nin1)
983 s1=zin(2,j,nin1)
984 r2=zin(1,j,nin2)
985 s2=zin(2,j,nin2)
986 r3=zin(1,j,nin3)
987 s3=zin(2,j,nin3)
988 r4=zin(1,j,nin4)
989 s4=zin(2,j,nin4)
990 r=r1 + r3
991 s=r2 + r4
992 zout(1,j,nout1) = r + s
993 zout(1,j,nout3) = r - s
994 r=r1 - r3
995 s=s2 - s4
996 zout(1,j,nout2) = r + s
997 zout(1,j,nout4) = r - s
998 r=s1 + s3
999 s=s2 + s4
1000 zout(2,j,nout1) = r + s
1001 zout(2,j,nout3) = r - s
1002 r=s1 - s3
1003 s=r2 - r4
1004 zout(2,j,nout2) = r - s
1005 zout(2,j,nout4) = r + s
1006 end do
1007 end do
1008 do ia = 2, after
1009 ias=ia-1
1010 if (2*ias == after) then
1011 nin1=ia-after
1012 nout1=ia-atn
1013 do ib = 1, before
1014 nin1=nin1+after
1015 nin2=nin1+atb
1016 nin3=nin2+atb
1017 nin4=nin3+atb
1018 nout1=nout1+atn
1019 nout2=nout1+after
1020 nout3=nout2+after
1021 nout4=nout3+after
1022 do j = 1,nfft
1023 r1=zin(1,j,nin1)
1024 s1=zin(2,j,nin1)
1025 r=zin(1,j,nin2)
1026 s=zin(2,j,nin2)
1027 r2=(r + s)*rt2i
1028 s2=(s - r)*rt2i
1029 r3=zin(2,j,nin3)
1030 s3=zin(1,j,nin3)
1031 r=zin(1,j,nin4)
1032 s=zin(2,j,nin4)
1033 r4=(s - r)*rt2i
1034 s4=(r + s)*rt2i
1035 r=r1 + r3
1036 s=r2 + r4
1037 zout(1,j,nout1) = r + s
1038 zout(1,j,nout3) = r - s
1039 r=r1 - r3
1040 s=s2 + s4
1041 zout(1,j,nout2) = r + s
1042 zout(1,j,nout4) = r - s
1043 r=s1 - s3
1044 s=s2 - s4
1045 zout(2,j,nout1) = r + s
1046 zout(2,j,nout3) = r - s
1047 r=s1 + s3
1048 s=r2 - r4
1049 zout(2,j,nout2) = r - s
1050 zout(2,j,nout4) = r + s
1051 end do
1052 end do
1053 else
1054 itt=ias*before
1055 itrig=itt+1
1056 cr2=trig(1,itrig)
1057 ci2=trig(2,itrig)
1058 itrig=itrig+itt
1059 cr3=trig(1,itrig)
1060 ci3=trig(2,itrig)
1061 itrig=itrig+itt
1062 cr4=trig(1,itrig)
1063 ci4=trig(2,itrig)
1064 nin1=ia-after
1065 nout1=ia-atn
1066 do ib = 1, before
1067 nin1=nin1+after
1068 nin2=nin1+atb
1069 nin3=nin2+atb
1070 nin4=nin3+atb
1071 nout1=nout1+atn
1072 nout2=nout1+after
1073 nout3=nout2+after
1074 nout4=nout3+after
1075 do j = 1, nfft
1076 r1=zin(1,j,nin1)
1077 s1=zin(2,j,nin1)
1078 r=zin(1,j,nin2)
1079 s=zin(2,j,nin2)
1080 r2=r*cr2 - s*ci2
1081 s2=r*ci2 + s*cr2
1082 r=zin(1,j,nin3)
1083 s=zin(2,j,nin3)
1084 r3=r*cr3 - s*ci3
1085 s3=r*ci3 + s*cr3
1086 r=zin(1,j,nin4)
1087 s=zin(2,j,nin4)
1088 r4=r*cr4 - s*ci4
1089 s4=r*ci4 + s*cr4
1090 r=r1 + r3
1091 s=r2 + r4
1092 zout(1,j,nout1) = r + s
1093 zout(1,j,nout3) = r - s
1094 r=r1 - r3
1095 s=s2 - s4
1096 zout(1,j,nout2) = r + s
1097 zout(1,j,nout4) = r - s
1098 r=s1 + s3
1099 s=s2 + s4
1100 zout(2,j,nout1) = r + s
1101 zout(2,j,nout3) = r - s
1102 r=s1 - s3
1103 s=r2 - r4
1104 zout(2,j,nout2) = r - s
1105 zout(2,j,nout4) = r + s
1106 end do
1107 end do
1108 end if
1109 end do
1110 end if
1111 else if (now == 8) then
1112 if (isign == -1) then
1113 ia=1
1114 nin1=ia-after
1115 nout1=ia-atn
1116 do ib = 1, before
1117 nin1=nin1+after
1118 nin2=nin1+atb
1119 nin3=nin2+atb
1120 nin4=nin3+atb
1121 nin5=nin4+atb
1122 nin6=nin5+atb
1123 nin7=nin6+atb
1124 nin8=nin7+atb
1125 nout1=nout1+atn
1126 nout2=nout1+after
1127 nout3=nout2+after
1128 nout4=nout3+after
1129 nout5=nout4+after
1130 nout6=nout5+after
1131 nout7=nout6+after
1132 nout8=nout7+after
1133 do j = 1, nfft
1134 r1=zin(1,j,nin1)
1135 s1=zin(2,j,nin1)
1136 r2=zin(1,j,nin2)
1137 s2=zin(2,j,nin2)
1138 r3=zin(1,j,nin3)
1139 s3=zin(2,j,nin3)
1140 r4=zin(1,j,nin4)
1141 s4=zin(2,j,nin4)
1142 r5=zin(1,j,nin5)
1143 s5=zin(2,j,nin5)
1144 r6=zin(1,j,nin6)
1145 s6=zin(2,j,nin6)
1146 r7=zin(1,j,nin7)
1147 s7=zin(2,j,nin7)
1148 r8_=zin(1,j,nin8)
1149 s8=zin(2,j,nin8)
1150 r=r1 + r5
1151 s=r3 + r7
1152 ap=r + s
1153 am=r - s
1154 r=r2 + r6
1155 s=r4 + r8_
1156 bp=r + s
1157 bm=r - s
1158 r=s1 + s5
1159 s=s3 + s7
1160 cp=r + s
1161 cm=r - s
1162 r=s2 + s6
1163 s=s4 + s8
1164 dp=r + s
1165 dm=r - s
1166 zout(1,j,nout1) = ap + bp
1167 zout(2,j,nout1) = cp + dp
1168 zout(1,j,nout5) = ap - bp
1169 zout(2,j,nout5) = cp - dp
1170 zout(1,j,nout3) = am + dm
1171 zout(2,j,nout3) = cm - bm
1172 zout(1,j,nout7) = am - dm
1173 zout(2,j,nout7) = cm + bm
1174 r=r1 - r5
1175 s=s3 - s7
1176 ap=r + s
1177 am=r - s
1178 r=s1 - s5
1179 s=r3 - r7
1180 bp=r + s
1181 bm=r - s
1182 r=s4 - s8
1183 s=r2 - r6
1184 cp=r + s
1185 cm=r - s
1186 r=s2 - s6
1187 s=r4 - r8_
1188 dp=r + s
1189 dm=r - s
1190 r = ( cp + dm)*rt2i
1191 s = ( dm - cp)*rt2i
1192 cp= ( cm + dp)*rt2i
1193 dp = ( cm - dp)*rt2i
1194 zout(1,j,nout2) = ap + r
1195 zout(2,j,nout2) = bm + s
1196 zout(1,j,nout6) = ap - r
1197 zout(2,j,nout6) = bm - s
1198 zout(1,j,nout4) = am + cp
1199 zout(2,j,nout4) = bp + dp
1200 zout(1,j,nout8) = am - cp
1201 zout(2,j,nout8) = bp - dp
1202 end do
1203 end do
1204 do ia = 2, after
1205 ias=ia-1
1206 itt=ias*before
1207 itrig=itt+1
1208 cr2=trig(1,itrig)
1209 ci2=trig(2,itrig)
1210 itrig=itrig+itt
1211 cr3=trig(1,itrig)
1212 ci3=trig(2,itrig)
1213 itrig=itrig+itt
1214 cr4=trig(1,itrig)
1215 ci4=trig(2,itrig)
1216 itrig=itrig+itt
1217 cr5=trig(1,itrig)
1218 ci5=trig(2,itrig)
1219 itrig=itrig+itt
1220 cr6=trig(1,itrig)
1221 ci6=trig(2,itrig)
1222 itrig=itrig+itt
1223 cr7=trig(1,itrig)
1224 ci7=trig(2,itrig)
1225 itrig=itrig+itt
1226 cr8=trig(1,itrig)
1227 ci8=trig(2,itrig)
1228 nin1=ia-after
1229 nout1=ia-atn
1230 do ib = 1, before
1231 nin1=nin1+after
1232 nin2=nin1+atb
1233 nin3=nin2+atb
1234 nin4=nin3+atb
1235 nin5=nin4+atb
1236 nin6=nin5+atb
1237 nin7=nin6+atb
1238 nin8=nin7+atb
1239 nout1=nout1+atn
1240 nout2=nout1+after
1241 nout3=nout2+after
1242 nout4=nout3+after
1243 nout5=nout4+after
1244 nout6=nout5+after
1245 nout7=nout6+after
1246 nout8=nout7+after
1247 do j = 1, nfft
1248 r1=zin(1,j,nin1)
1249 s1=zin(2,j,nin1)
1250 r=zin(1,j,nin2)
1251 s=zin(2,j,nin2)
1252 r2=r*cr2 - s*ci2
1253 s2=r*ci2 + s*cr2
1254 r=zin(1,j,nin3)
1255 s=zin(2,j,nin3)
1256 r3=r*cr3 - s*ci3
1257 s3=r*ci3 + s*cr3
1258 r=zin(1,j,nin4)
1259 s=zin(2,j,nin4)
1260 r4=r*cr4 - s*ci4
1261 s4=r*ci4 + s*cr4
1262 r=zin(1,j,nin5)
1263 s=zin(2,j,nin5)
1264 r5=r*cr5 - s*ci5
1265 s5=r*ci5 + s*cr5
1266 r=zin(1,j,nin6)
1267 s=zin(2,j,nin6)
1268 r6=r*cr6 - s*ci6
1269 s6=r*ci6 + s*cr6
1270 r=zin(1,j,nin7)
1271 s=zin(2,j,nin7)
1272 r7=r*cr7 - s*ci7
1273 s7=r*ci7 + s*cr7
1274 r=zin(1,j,nin8)
1275 s=zin(2,j,nin8)
1276 r8_=r*cr8 - s*ci8
1277 s8=r*ci8 + s*cr8
1278 r=r1 + r5
1279 s=r3 + r7
1280 ap=r + s
1281 am=r - s
1282 r=r2 + r6
1283 s=r4 + r8_
1284 bp=r + s
1285 bm=r - s
1286 r=s1 + s5
1287 s=s3 + s7
1288 cp=r + s
1289 cm=r - s
1290 r=s2 + s6
1291 s=s4 + s8
1292 dp=r + s
1293 dm=r - s
1294 zout(1,j,nout1) = ap + bp
1295 zout(2,j,nout1) = cp + dp
1296 zout(1,j,nout5) = ap - bp
1297 zout(2,j,nout5) = cp - dp
1298 zout(1,j,nout3) = am + dm
1299 zout(2,j,nout3) = cm - bm
1300 zout(1,j,nout7) = am - dm
1301 zout(2,j,nout7) = cm + bm
1302 r=r1 - r5
1303 s=s3 - s7
1304 ap=r + s
1305 am=r - s
1306 r=s1 - s5
1307 s=r3 - r7
1308 bp=r + s
1309 bm=r - s
1310 r=s4 - s8
1311 s=r2 - r6
1312 cp=r + s
1313 cm=r - s
1314 r=s2 - s6
1315 s=r4 - r8_
1316 dp=r + s
1317 dm=r - s
1318 r = ( cp + dm)*rt2i
1319 s = ( dm - cp)*rt2i
1320 cp= ( cm + dp)*rt2i
1321 dp = ( cm - dp)*rt2i
1322 zout(1,j,nout2) = ap + r
1323 zout(2,j,nout2) = bm + s
1324 zout(1,j,nout6) = ap - r
1325 zout(2,j,nout6) = bm - s
1326 zout(1,j,nout4) = am + cp
1327 zout(2,j,nout4) = bp + dp
1328 zout(1,j,nout8) = am - cp
1329 zout(2,j,nout8) = bp - dp
1330 end do
1331 end do
1332 end do
1333 else
1334 ia=1
1335 nin1=ia-after
1336 nout1=ia-atn
1337 do ib = 1, before
1338 nin1=nin1+after
1339 nin2=nin1+atb
1340 nin3=nin2+atb
1341 nin4=nin3+atb
1342 nin5=nin4+atb
1343 nin6=nin5+atb
1344 nin7=nin6+atb
1345 nin8=nin7+atb
1346 nout1=nout1+atn
1347 nout2=nout1+after
1348 nout3=nout2+after
1349 nout4=nout3+after
1350 nout5=nout4+after
1351 nout6=nout5+after
1352 nout7=nout6+after
1353 nout8=nout7+after
1354 do j = 1, nfft
1355 r1=zin(1,j,nin1)
1356 s1=zin(2,j,nin1)
1357 r2=zin(1,j,nin2)
1358 s2=zin(2,j,nin2)
1359 r3=zin(1,j,nin3)
1360 s3=zin(2,j,nin3)
1361 r4=zin(1,j,nin4)
1362 s4=zin(2,j,nin4)
1363 r5=zin(1,j,nin5)
1364 s5=zin(2,j,nin5)
1365 r6=zin(1,j,nin6)
1366 s6=zin(2,j,nin6)
1367 r7=zin(1,j,nin7)
1368 s7=zin(2,j,nin7)
1369 r8_=zin(1,j,nin8)
1370 s8=zin(2,j,nin8)
1371 r=r1 + r5
1372 s=r3 + r7
1373 ap=r + s
1374 am=r - s
1375 r=r2 + r6
1376 s=r4 + r8_
1377 bp=r + s
1378 bm=r - s
1379 r=s1 + s5
1380 s=s3 + s7
1381 cp=r + s
1382 cm=r - s
1383 r=s2 + s6
1384 s=s4 + s8
1385 dp=r + s
1386 dm=r - s
1387 zout(1,j,nout1) = ap + bp
1388 zout(2,j,nout1) = cp + dp
1389 zout(1,j,nout5) = ap - bp
1390 zout(2,j,nout5) = cp - dp
1391 zout(1,j,nout3) = am - dm
1392 zout(2,j,nout3) = cm + bm
1393 zout(1,j,nout7) = am + dm
1394 zout(2,j,nout7) = cm - bm
1395 r= r1 - r5
1396 s=-s3 + s7
1397 ap=r + s
1398 am=r - s
1399 r=s1 - s5
1400 s=r7 - r3
1401 bp=r + s
1402 bm=r - s
1403 r=-s4 + s8
1404 s= r2 - r6
1405 cp=r + s
1406 cm=r - s
1407 r=-s2 + s6
1408 s= r4 - r8_
1409 dp=r + s
1410 dm=r - s
1411 r = ( cp + dm)*rt2i
1412 s = ( cp - dm)*rt2i
1413 cp= ( cm + dp)*rt2i
1414 dp= ( dp - cm)*rt2i
1415 zout(1,j,nout2) = ap + r
1416 zout(2,j,nout2) = bm + s
1417 zout(1,j,nout6) = ap - r
1418 zout(2,j,nout6) = bm - s
1419 zout(1,j,nout4) = am + cp
1420 zout(2,j,nout4) = bp + dp
1421 zout(1,j,nout8) = am - cp
1422 zout(2,j,nout8) = bp - dp
1423 end do
1424 end do
1425
1426 do ia = 2, after
1427 ias=ia-1
1428 itt=ias*before
1429 itrig=itt+1
1430 cr2=trig(1,itrig)
1431 ci2=trig(2,itrig)
1432 itrig=itrig+itt
1433 cr3=trig(1,itrig)
1434 ci3=trig(2,itrig)
1435 itrig=itrig+itt
1436 cr4=trig(1,itrig)
1437 ci4=trig(2,itrig)
1438 itrig=itrig+itt
1439 cr5=trig(1,itrig)
1440 ci5=trig(2,itrig)
1441 itrig=itrig+itt
1442 cr6=trig(1,itrig)
1443 ci6=trig(2,itrig)
1444 itrig=itrig+itt
1445 cr7=trig(1,itrig)
1446 ci7=trig(2,itrig)
1447 itrig=itrig+itt
1448 cr8=trig(1,itrig)
1449 ci8=trig(2,itrig)
1450 nin1=ia-after
1451 nout1=ia-atn
1452 do ib = 1, before
1453 nin1=nin1+after
1454 nin2=nin1+atb
1455 nin3=nin2+atb
1456 nin4=nin3+atb
1457 nin5=nin4+atb
1458 nin6=nin5+atb
1459 nin7=nin6+atb
1460 nin8=nin7+atb
1461 nout1=nout1+atn
1462 nout2=nout1+after
1463 nout3=nout2+after
1464 nout4=nout3+after
1465 nout5=nout4+after
1466 nout6=nout5+after
1467 nout7=nout6+after
1468 nout8=nout7+after
1469 do j = 1, nfft
1470 r1=zin(1,j,nin1)
1471 s1=zin(2,j,nin1)
1472 r=zin(1,j,nin2)
1473 s=zin(2,j,nin2)
1474 r2=r*cr2 - s*ci2
1475 s2=r*ci2 + s*cr2
1476 r=zin(1,j,nin3)
1477 s=zin(2,j,nin3)
1478 r3=r*cr3 - s*ci3
1479 s3=r*ci3 + s*cr3
1480 r=zin(1,j,nin4)
1481 s=zin(2,j,nin4)
1482 r4=r*cr4 - s*ci4
1483 s4=r*ci4 + s*cr4
1484 r=zin(1,j,nin5)
1485 s=zin(2,j,nin5)
1486 r5=r*cr5 - s*ci5
1487 s5=r*ci5 + s*cr5
1488 r=zin(1,j,nin6)
1489 s=zin(2,j,nin6)
1490 r6=r*cr6 - s*ci6
1491 s6=r*ci6 + s*cr6
1492 r=zin(1,j,nin7)
1493 s=zin(2,j,nin7)
1494 r7=r*cr7 - s*ci7
1495 s7=r*ci7 + s*cr7
1496 r=zin(1,j,nin8)
1497 s=zin(2,j,nin8)
1498 r8_=r*cr8 - s*ci8
1499 s8=r*ci8 + s*cr8
1500 r=r1 + r5
1501 s=r3 + r7
1502 ap=r + s
1503 am=r - s
1504 r=r2 + r6
1505 s=r4 + r8_
1506 bp=r + s
1507 bm=r - s
1508 r=s1 + s5
1509 s=s3 + s7
1510 cp=r + s
1511 cm=r - s
1512 r=s2 + s6
1513 s=s4 + s8
1514 dp=r + s
1515 dm=r - s
1516 zout(1,j,nout1) = ap + bp
1517 zout(2,j,nout1) = cp + dp
1518 zout(1,j,nout5) = ap - bp
1519 zout(2,j,nout5) = cp - dp
1520 zout(1,j,nout3) = am - dm
1521 zout(2,j,nout3) = cm + bm
1522 zout(1,j,nout7) = am + dm
1523 zout(2,j,nout7) = cm - bm
1524 r= r1 - r5
1525 s=-s3 + s7
1526 ap=r + s
1527 am=r - s
1528 r=s1 - s5
1529 s=r7 - r3
1530 bp=r + s
1531 bm=r - s
1532 r=-s4 + s8
1533 s= r2 - r6
1534 cp=r + s
1535 cm=r - s
1536 r=-s2 + s6
1537 s= r4 - r8_
1538 dp=r + s
1539 dm=r - s
1540 r = ( cp + dm)*rt2i
1541 s = ( cp - dm)*rt2i
1542 cp= ( cm + dp)*rt2i
1543 dp= ( dp - cm)*rt2i
1544 zout(1,j,nout2) = ap + r
1545 zout(2,j,nout2) = bm + s
1546 zout(1,j,nout6) = ap - r
1547 zout(2,j,nout6) = bm - s
1548 zout(1,j,nout4) = am + cp
1549 zout(2,j,nout4) = bp + dp
1550 zout(1,j,nout8) = am - cp
1551 zout(2,j,nout8) = bp - dp
1552 end do
1553 end do
1554 end do
1555
1556 end if
1557 else if (now == 3) then
1558 ! .5_real64*sqrt(3._real64)
1559 bb=isign*0.8660254037844387_real64
1560 ia=1
1561 nin1=ia-after
1562 nout1=ia-atn
1563 do ib = 1, before
1564 nin1=nin1+after
1565 nin2=nin1+atb
1566 nin3=nin2+atb
1567 nout1=nout1+atn
1568 nout2=nout1+after
1569 nout3=nout2+after
1570 do j = 1, nfft
1571 r1=zin(1,j,nin1)
1572 s1=zin(2,j,nin1)
1573 r2=zin(1,j,nin2)
1574 s2=zin(2,j,nin2)
1575 r3=zin(1,j,nin3)
1576 s3=zin(2,j,nin3)
1577 r=r2 + r3
1578 s=s2 + s3
1579 zout(1,j,nout1) = r + r1
1580 zout(2,j,nout1) = s + s1
1581 r1=r1 - .5_real64*r
1582 s1=s1 - .5_real64*s
1583 r2=bb*(r2-r3)
1584 s2=bb*(s2-s3)
1585 zout(1,j,nout2) = r1 - s2
1586 zout(2,j,nout2) = s1 + r2
1587 zout(1,j,nout3) = r1 + s2
1588 zout(2,j,nout3) = s1 - r2
1589 end do
1590 end do
1591 do ia = 2, after
1592 ias=ia-1
1593 if (4*ias == 3*after) then
1594 if (isign == 1) then
1595 nin1=ia-after
1596 nout1=ia-atn
1597 do ib = 1, before
1598 nin1=nin1+after
1599 nin2=nin1+atb
1600 nin3=nin2+atb
1601 nout1=nout1+atn
1602 nout2=nout1+after
1603 nout3=nout2+after
1604 do j = 1, nfft
1605 r1=zin(1,j,nin1)
1606 s1=zin(2,j,nin1)
1607 r2=zin(2,j,nin2)
1608 s2=zin(1,j,nin2)
1609 r3=zin(1,j,nin3)
1610 s3=zin(2,j,nin3)
1611 r=r3 + r2
1612 s=s2 - s3
1613 zout(1,j,nout1) = r1 - r
1614 zout(2,j,nout1) = s + s1
1615 r1=r1 + .5_real64*r
1616 s1=s1 - .5_real64*s
1617 r2=bb*(r2-r3)
1618 s2=bb*(s2+s3)
1619 zout(1,j,nout2) = r1 - s2
1620 zout(2,j,nout2) = s1 - r2
1621 zout(1,j,nout3) = r1 + s2
1622 zout(2,j,nout3) = s1 + r2
1623 end do
1624 end do
1625 else
1626 nin1=ia-after
1627 nout1=ia-atn
1628 do ib = 1, before
1629 nin1=nin1+after
1630 nin2=nin1+atb
1631 nin3=nin2+atb
1632 nout1=nout1+atn
1633 nout2=nout1+after
1634 nout3=nout2+after
1635 do j = 1, nfft
1636 r1=zin(1,j,nin1)
1637 s1=zin(2,j,nin1)
1638 r2=zin(2,j,nin2)
1639 s2=zin(1,j,nin2)
1640 r3=zin(1,j,nin3)
1641 s3=zin(2,j,nin3)
1642 r=r2 - r3
1643 s=s2 + s3
1644 zout(1,j,nout1) = r + r1
1645 zout(2,j,nout1) = s1 - s
1646 r1=r1 - .5_real64*r
1647 s1=s1 + .5_real64*s
1648 r2=bb*(r2+r3)
1649 s2=bb*(s2-s3)
1650 zout(1,j,nout2) = r1 + s2
1651 zout(2,j,nout2) = s1 + r2
1652 zout(1,j,nout3) = r1 - s2
1653 zout(2,j,nout3) = s1 - r2
1654 end do
1655 end do
1656 end if
1657 else if (8*ias == 3*after) then
1658 if (isign == 1) then
1659 nin1=ia-after
1660 nout1=ia-atn
1661 do ib = 1, before
1662 nin1=nin1+after
1663 nin2=nin1+atb
1664 nin3=nin2+atb
1665 nout1=nout1+atn
1666 nout2=nout1+after
1667 nout3=nout2+after
1668 do j = 1, nfft
1669 r1=zin(1,j,nin1)
1670 s1=zin(2,j,nin1)
1671 r=zin(1,j,nin2)
1672 s=zin(2,j,nin2)
1673 r2=(r - s)*rt2i
1674 s2=(r + s)*rt2i
1675 r3=zin(2,j,nin3)
1676 s3=zin(1,j,nin3)
1677 r=r2 - r3
1678 s=s2 + s3
1679 zout(1,j,nout1) = r + r1
1680 zout(2,j,nout1) = s + s1
1681 r1=r1 - .5_real64*r
1682 s1=s1 - .5_real64*s
1683 r2=bb*(r2+r3)
1684 s2=bb*(s2-s3)
1685 zout(1,j,nout2) = r1 - s2
1686 zout(2,j,nout2) = s1 + r2
1687 zout(1,j,nout3) = r1 + s2
1688 zout(2,j,nout3) = s1 - r2
1689 end do
1690 end do
1691 else
1692 nin1=ia-after
1693 nout1=ia-atn
1694 do ib = 1, before
1695 nin1=nin1+after
1696 nin2=nin1+atb
1697 nin3=nin2+atb
1698 nout1=nout1+atn
1699 nout2=nout1+after
1700 nout3=nout2+after
1701 do j = 1, nfft
1702 r1=zin(1,j,nin1)
1703 s1=zin(2,j,nin1)
1704 r=zin(1,j,nin2)
1705 s=zin(2,j,nin2)
1706 r2=(r + s)*rt2i
1707 s2=(s - r)*rt2i
1708 r3=zin(2,j,nin3)
1709 s3=zin(1,j,nin3)
1710 r=r2 + r3
1711 s=s2 - s3
1712 zout(1,j,nout1) = r + r1
1713 zout(2,j,nout1) = s + s1
1714 r1=r1 - .5_real64*r
1715 s1=s1 - .5_real64*s
1716 r2=bb*(r2-r3)
1717 s2=bb*(s2+s3)
1718 zout(1,j,nout2) = r1 - s2
1719 zout(2,j,nout2) = s1 + r2
1720 zout(1,j,nout3) = r1 + s2
1721 zout(2,j,nout3) = s1 - r2
1722 end do
1723 end do
1724 end if
1725 else
1726 itt=ias*before
1727 itrig=itt+1
1728 cr2=trig(1,itrig)
1729 ci2=trig(2,itrig)
1730 itrig=itrig+itt
1731 cr3=trig(1,itrig)
1732 ci3=trig(2,itrig)
1733 nin1=ia-after
1734 nout1=ia-atn
1735 do ib = 1, before
1736 nin1=nin1+after
1737 nin2=nin1+atb
1738 nin3=nin2+atb
1739 nout1=nout1+atn
1740 nout2=nout1+after
1741 nout3=nout2+after
1742 do j = 1, nfft
1743 r1=zin(1,j,nin1)
1744 s1=zin(2,j,nin1)
1745 r=zin(1,j,nin2)
1746 s=zin(2,j,nin2)
1747 r2=r*cr2 - s*ci2
1748 s2=r*ci2 + s*cr2
1749 r=zin(1,j,nin3)
1750 s=zin(2,j,nin3)
1751 r3=r*cr3 - s*ci3
1752 s3=r*ci3 + s*cr3
1753 r=r2 + r3
1754 s=s2 + s3
1755 zout(1,j,nout1) = r + r1
1756 zout(2,j,nout1) = s + s1
1757 r1=r1 - .5_real64*r
1758 s1=s1 - .5_real64*s
1759 r2=bb*(r2-r3)
1760 s2=bb*(s2-s3)
1761 zout(1,j,nout2) = r1 - s2
1762 zout(2,j,nout2) = s1 + r2
1763 zout(1,j,nout3) = r1 + s2
1764 zout(2,j,nout3) = s1 - r2
1765 end do
1766 end do
1767 end if
1768 end do
1769 else if (now == 5) then
1770 ! cos(2._real64*pi/5._real64)
1771 cos2=0.3090169943749474_real64
1772 ! cos(4._real64*pi/5._real64)
1773 cos4=-0.8090169943749474_real64
1774 ! sin(2._real64*pi/5._real64)
1775 sin2=isign*0.9510565162951536_real64
1776 ! sin(4._real64*pi/5._real64)
1777 sin4=isign*0.5877852522924731_real64
1778 ia=1
1779 nin1=ia-after
1780 nout1=ia-atn
1781 do ib = 1, before
1782 nin1=nin1+after
1783 nin2=nin1+atb
1784 nin3=nin2+atb
1785 nin4=nin3+atb
1786 nin5=nin4+atb
1787 nout1=nout1+atn
1788 nout2=nout1+after
1789 nout3=nout2+after
1790 nout4=nout3+after
1791 nout5=nout4+after
1792 do j = 1, nfft
1793 r1=zin(1,j,nin1)
1794 s1=zin(2,j,nin1)
1795 r2=zin(1,j,nin2)
1796 s2=zin(2,j,nin2)
1797 r3=zin(1,j,nin3)
1798 s3=zin(2,j,nin3)
1799 r4=zin(1,j,nin4)
1800 s4=zin(2,j,nin4)
1801 r5=zin(1,j,nin5)
1802 s5=zin(2,j,nin5)
1803 r25 = r2 + r5
1804 r34 = r3 + r4
1805 s25 = s2 - s5
1806 s34 = s3 - s4
1807 zout(1,j,nout1) = r1 + r25 + r34
1808 r = r1 + cos2*r25 + cos4*r34
1809 s = sin2*s25 + sin4*s34
1810 zout(1,j,nout2) = r - s
1811 zout(1,j,nout5) = r + s
1812 r = r1 + cos4*r25 + cos2*r34
1813 s = sin4*s25 - sin2*s34
1814 zout(1,j,nout3) = r - s
1815 zout(1,j,nout4) = r + s
1816 r25 = r2 - r5
1817 r34 = r3 - r4
1818 s25 = s2 + s5
1819 s34 = s3 + s4
1820 zout(2,j,nout1) = s1 + s25 + s34
1821 r = s1 + cos2*s25 + cos4*s34
1822 s = sin2*r25 + sin4*r34
1823 zout(2,j,nout2) = r + s
1824 zout(2,j,nout5) = r - s
1825 r = s1 + cos4*s25 + cos2*s34
1826 s = sin4*r25 - sin2*r34
1827 zout(2,j,nout3) = r + s
1828 zout(2,j,nout4) = r - s
1829 end do
1830 end do
1831 do ia = 2, after
1832 ias=ia-1
1833 if (8*ias == 5*after) then
1834 if (isign == 1) then
1835 nin1=ia-after
1836 nout1=ia-atn
1837 do ib = 1, before
1838 nin1=nin1+after
1839 nin2=nin1+atb
1840 nin3=nin2+atb
1841 nin4=nin3+atb
1842 nin5=nin4+atb
1843 nout1=nout1+atn
1844 nout2=nout1+after
1845 nout3=nout2+after
1846 nout4=nout3+after
1847 nout5=nout4+after
1848 do j = 1, nfft
1849 r1=zin(1,j,nin1)
1850 s1=zin(2,j,nin1)
1851 r=zin(1,j,nin2)
1852 s=zin(2,j,nin2)
1853 r2=(r - s)*rt2i
1854 s2=(r + s)*rt2i
1855 r3=zin(2,j,nin3)
1856 s3=zin(1,j,nin3)
1857 r=zin(1,j,nin4)
1858 s=zin(2,j,nin4)
1859 r4=(r + s)*rt2i
1860 s4=(r - s)*rt2i
1861 r5=zin(1,j,nin5)
1862 s5=zin(2,j,nin5)
1863 r25 = r2 - r5
1864 r34 = r3 + r4
1865 s25 = s2 + s5
1866 s34 = s3 - s4
1867 zout(1,j,nout1) = r1 + r25 - r34
1868 r = r1 + cos2*r25 - cos4*r34
1869 s = sin2*s25 + sin4*s34
1870 zout(1,j,nout2) = r - s
1871 zout(1,j,nout5) = r + s
1872 r = r1 + cos4*r25 - cos2*r34
1873 s = sin4*s25 - sin2*s34
1874 zout(1,j,nout3) = r - s
1875 zout(1,j,nout4) = r + s
1876 r25 = r2 + r5
1877 r34 = r4 - r3
1878 s25 = s2 - s5
1879 s34 = s3 + s4
1880 zout(2,j,nout1) = s1 + s25 + s34
1881 r = s1 + cos2*s25 + cos4*s34
1882 s = sin2*r25 + sin4*r34
1883 zout(2,j,nout2) = r + s
1884 zout(2,j,nout5) = r - s
1885 r = s1 + cos4*s25 + cos2*s34
1886 s = sin4*r25 - sin2*r34
1887 zout(2,j,nout3) = r + s
1888 zout(2,j,nout4) = r - s
1889 end do
1890 end do
1891 else
1892 nin1=ia-after
1893 nout1=ia-atn
1894 do ib = 1, before
1895 nin1=nin1+after
1896 nin2=nin1+atb
1897 nin3=nin2+atb
1898 nin4=nin3+atb
1899 nin5=nin4+atb
1900 nout1=nout1+atn
1901 nout2=nout1+after
1902 nout3=nout2+after
1903 nout4=nout3+after
1904 nout5=nout4+after
1905 do j = 1, nfft
1906 r1=zin(1,j,nin1)
1907 s1=zin(2,j,nin1)
1908 r=zin(1,j,nin2)
1909 s=zin(2,j,nin2)
1910 r2=(r + s)*rt2i
1911 s2=(s - r)*rt2i
1912 r3=zin(2,j,nin3)
1913 s3=zin(1,j,nin3)
1914 r=zin(1,j,nin4)
1915 s=zin(2,j,nin4)
1916 r4=(s - r)*rt2i
1917 s4=(r + s)*rt2i
1918 r5=zin(1,j,nin5)
1919 s5=zin(2,j,nin5)
1920 r25 = r2 - r5
1921 r34 = r3 + r4
1922 s25 = s2 + s5
1923 s34 = s4 - s3
1924 zout(1,j,nout1) = r1 + r25 + r34
1925 r = r1 + cos2*r25 + cos4*r34
1926 s = sin2*s25 + sin4*s34
1927 zout(1,j,nout2) = r - s
1928 zout(1,j,nout5) = r + s
1929 r = r1 + cos4*r25 + cos2*r34
1930 s = sin4*s25 - sin2*s34
1931 zout(1,j,nout3) = r - s
1932 zout(1,j,nout4) = r + s
1933 r25 = r2 + r5
1934 r34 = r3 - r4
1935 s25 = s2 - s5
1936 s34 = s3 + s4
1937 zout(2,j,nout1) = s1 + s25 - s34
1938 r = s1 + cos2*s25 - cos4*s34
1939 s = sin2*r25 + sin4*r34
1940 zout(2,j,nout2) = r + s
1941 zout(2,j,nout5) = r - s
1942 r = s1 + cos4*s25 - cos2*s34
1943 s = sin4*r25 - sin2*r34
1944 zout(2,j,nout3) = r + s
1945 zout(2,j,nout4) = r - s
1946 end do
1947 end do
1948 end if
1949 else
1950 ias=ia-1
1951 itt=ias*before
1952 itrig=itt+1
1953 cr2=trig(1,itrig)
1954 ci2=trig(2,itrig)
1955 itrig=itrig+itt
1956 cr3=trig(1,itrig)
1957 ci3=trig(2,itrig)
1958 itrig=itrig+itt
1959 cr4=trig(1,itrig)
1960 ci4=trig(2,itrig)
1961 itrig=itrig+itt
1962 cr5=trig(1,itrig)
1963 ci5=trig(2,itrig)
1964 nin1=ia-after
1965 nout1=ia-atn
1966 do ib = 1, before
1967 nin1=nin1+after
1968 nin2=nin1+atb
1969 nin3=nin2+atb
1970 nin4=nin3+atb
1971 nin5=nin4+atb
1972 nout1=nout1+atn
1973 nout2=nout1+after
1974 nout3=nout2+after
1975 nout4=nout3+after
1976 nout5=nout4+after
1977 do j = 1, nfft
1978 r1=zin(1,j,nin1)
1979 s1=zin(2,j,nin1)
1980 r=zin(1,j,nin2)
1981 s=zin(2,j,nin2)
1982 r2=r*cr2 - s*ci2
1983 s2=r*ci2 + s*cr2
1984 r=zin(1,j,nin3)
1985 s=zin(2,j,nin3)
1986 r3=r*cr3 - s*ci3
1987 s3=r*ci3 + s*cr3
1988 r=zin(1,j,nin4)
1989 s=zin(2,j,nin4)
1990 r4=r*cr4 - s*ci4
1991 s4=r*ci4 + s*cr4
1992 r=zin(1,j,nin5)
1993 s=zin(2,j,nin5)
1994 r5=r*cr5 - s*ci5
1995 s5=r*ci5 + s*cr5
1996 r25 = r2 + r5
1997 r34 = r3 + r4
1998 s25 = s2 - s5
1999 s34 = s3 - s4
2000 zout(1,j,nout1) = r1 + r25 + r34
2001 r = r1 + cos2*r25 + cos4*r34
2002 s = sin2*s25 + sin4*s34
2003 zout(1,j,nout2) = r - s
2004 zout(1,j,nout5) = r + s
2005 r = r1 + cos4*r25 + cos2*r34
2006 s = sin4*s25 - sin2*s34
2007 zout(1,j,nout3) = r - s
2008 zout(1,j,nout4) = r + s
2009 r25 = r2 - r5
2010 r34 = r3 - r4
2011 s25 = s2 + s5
2012 s34 = s3 + s4
2013 zout(2,j,nout1) = s1 + s25 + s34
2014 r = s1 + cos2*s25 + cos4*s34
2015 s = sin2*r25 + sin4*r34
2016 zout(2,j,nout2) = r + s
2017 zout(2,j,nout5) = r - s
2018 r = s1 + cos4*s25 + cos2*s34
2019 s = sin4*r25 - sin2*r34
2020 zout(2,j,nout3) = r + s
2021 zout(2,j,nout4) = r - s
2022 end do
2023 end do
2024 end if
2025 end do
2026 else if (now == 6) then
2027 ! .5_real64*sqrt(3._real64)
2028 bb=isign*0.8660254037844387_real64
2029
2030 ia=1
2031 nin1=ia-after
2032 nout1=ia-atn
2033 do ib = 1, before
2034 nin1=nin1+after
2035 nin2=nin1+atb
2036 nin3=nin2+atb
2037 nin4=nin3+atb
2038 nin5=nin4+atb
2039 nin6=nin5+atb
2040 nout1=nout1+atn
2041 nout2=nout1+after
2042 nout3=nout2+after
2043 nout4=nout3+after
2044 nout5=nout4+after
2045 nout6=nout5+after
2046 do j = 1, nfft
2047 r2=zin(1,j,nin3)
2048 s2=zin(2,j,nin3)
2049 r3=zin(1,j,nin5)
2050 s3=zin(2,j,nin5)
2051 r=r2 + r3
2052 s=s2 + s3
2053 r1=zin(1,j,nin1)
2054 s1=zin(2,j,nin1)
2055 ur1 = r + r1
2056 ui1 = s + s1
2057 r1=r1 - .5_real64*r
2058 s1=s1 - .5_real64*s
2059 r=r2-r3
2060 s=s2-s3
2061 ur2 = r1 - s*bb
2062 ui2 = s1 + r*bb
2063 ur3 = r1 + s*bb
2064 ui3 = s1 - r*bb
2065
2066 r2=zin(1,j,nin6)
2067 s2=zin(2,j,nin6)
2068 r3=zin(1,j,nin2)
2069 s3=zin(2,j,nin2)
2070 r=r2 + r3
2071 s=s2 + s3
2072 r1=zin(1,j,nin4)
2073 s1=zin(2,j,nin4)
2074 vr1 = r + r1
2075 vi1 = s + s1
2076 r1=r1 - .5_real64*r
2077 s1=s1 - .5_real64*s
2078 r=r2-r3
2079 s=s2-s3
2080 vr2 = r1 - s*bb
2081 vi2 = s1 + r*bb
2082 vr3 = r1 + s*bb
2083 vi3 = s1 - r*bb
2084
2085 zout(1,j,nout1)=ur1+vr1
2086 zout(2,j,nout1)=ui1+vi1
2087 zout(1,j,nout5)=ur2+vr2
2088 zout(2,j,nout5)=ui2+vi2
2089 zout(1,j,nout3)=ur3+vr3
2090 zout(2,j,nout3)=ui3+vi3
2091 zout(1,j,nout4)=ur1-vr1
2092 zout(2,j,nout4)=ui1-vi1
2093 zout(1,j,nout2)=ur2-vr2
2094 zout(2,j,nout2)=ui2-vi2
2095 zout(1,j,nout6)=ur3-vr3
2096 zout(2,j,nout6)=ui3-vi3
2097 end do
2098 end do
2099 else
2100 stop 'error fftstp'
2101 end if
2102
2103 end subroutine fftstp
2104
2105
2106
2107 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
2108 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1995, 1999
2109 ! This file is distributed under the terms of the
2110 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
2111 subroutine fftrot(mm,nfft,m,nn,n,zin,zout,trig,after,now,before,isign)
2112 integer :: mm, nfft, m, nn, n, isign
2113 integer :: after, before, atn, atb, now
2114 real(real64) :: trig(:,:)
2115 real(real64) :: zin(2, mm, m), zout(2, n, nn)
2116
2117 real(real64) :: rt2i, dp, cp, cm, ci5, cr5, ci6, cr6, am, ap, ci8, cr8
2118 real(real64) :: r, r1, r2, r3, r4, r5, r6, r7, r8_, r25, r34
2119 real(real64) :: s, s1, s2, s3, s4, s5, s6, s7, s8, s25, s34
2120 real(real64) :: bb, bm, dm, bp
2121 real(real64) :: cr2, ci2, cr3, ci3, cr4, ci4, cr7, ci7
2122 real(real64) :: cos2, sin2, cos4, sin4
2123 real(real64) :: ur1, ur2, ur3, ui1, ui2, ui3
2124 real(real64) :: vr1, vr2, vr3, vi1, vi2, vi3
2125 integer :: ia, ib, ib1, j, ias, itrig, itt
2126 integer :: nin1, nin2, nin3, nin4, nin5, nin6, nin7, nin8
2127 integer :: nout1, nout2, nout3, nout4, nout5, nout6, nout7, nout8
2128
2129 atn=after*now
2130 atb=after*before
2131
2132 ! sqrt(.5_real64)
2133 rt2i=0.7071067811865475_real64
2134 if (now == 2) then
2135 ia=1
2136 nin1=ia-after
2137 nout1=ia-atn
2138 do ib=1,before
2139 nin1=nin1+after
2140 nin2=nin1+atb
2141 nout1=nout1+atn
2142 nout2=nout1+after
2143 do j=1,nfft
2144 r1=zin(1,j,nin1)
2145 s1=zin(2,j,nin1)
2146 r2=zin(1,j,nin2)
2147 s2=zin(2,j,nin2)
2148 zout(1,nout1,j)= r2 + r1
2149 zout(2,nout1,j)= s2 + s1
2150 zout(1,nout2,j)= r1 - r2
2151 zout(2,nout2,j)= s1 - s2
2152 end do
2153 end do
2154
2155 do ia=2,after
2156 ias=ia-1
2157 if (2*ias == after) then
2158 if (isign == 1) then
2159 nin1=ia-after
2160 nout1=ia-atn
2161 do ib=1,before
2162 nin1=nin1+after
2163 nin2=nin1+atb
2164 nout1=nout1+atn
2165 nout2=nout1+after
2166 do j=1,nfft
2167 r1=zin(1,j,nin1)
2168 s1=zin(2,j,nin1)
2169 r2=zin(2,j,nin2)
2170 s2=zin(1,j,nin2)
2171 zout(1,nout1,j)= r1 - r2
2172 zout(2,nout1,j)= s2 + s1
2173 zout(1,nout2,j)= r2 + r1
2174 zout(2,nout2,j)= s1 - s2
2175 end do
2176 end do
2177 else
2178 nin1=ia-after
2179 nout1=ia-atn
2180 do ib=1,before
2181 nin1=nin1+after
2182 nin2=nin1+atb
2183 nout1=nout1+atn
2184 nout2=nout1+after
2185 do j=1,nfft
2186 r1=zin(1,j,nin1)
2187 s1=zin(2,j,nin1)
2188 r2=zin(2,j,nin2)
2189 s2=zin(1,j,nin2)
2190 zout(1,nout1,j)= r2 + r1
2191 zout(2,nout1,j)= s1 - s2
2192 zout(1,nout2,j)= r1 - r2
2193 zout(2,nout2,j)= s2 + s1
2194 end do
2195 end do
2196 end if
2197 else if (4*ias == after) then
2198 if (isign == 1) then
2199 nin1=ia-after
2200 nout1=ia-atn
2201 do ib=1,before
2202 nin1=nin1+after
2203 nin2=nin1+atb
2204 nout1=nout1+atn
2205 nout2=nout1+after
2206 do j=1,nfft
2207 r1=zin(1,j,nin1)
2208 s1=zin(2,j,nin1)
2209 r=zin(1,j,nin2)
2210 s=zin(2,j,nin2)
2211 r2=(r - s)*rt2i
2212 s2=(r + s)*rt2i
2213 zout(1,nout1,j)= r2 + r1
2214 zout(2,nout1,j)= s2 + s1
2215 zout(1,nout2,j)= r1 - r2
2216 zout(2,nout2,j)= s1 - s2
2217 end do
2218 end do
2219 else
2220 nin1=ia-after
2221 nout1=ia-atn
2222 do ib=1,before
2223 nin1=nin1+after
2224 nin2=nin1+atb
2225 nout1=nout1+atn
2226 nout2=nout1+after
2227 do j=1,nfft
2228 r1=zin(1,j,nin1)
2229 s1=zin(2,j,nin1)
2230 r=zin(1,j,nin2)
2231 s=zin(2,j,nin2)
2232 r2=(r + s)*rt2i
2233 s2=(s - r)*rt2i
2234 zout(1,nout1,j)= r2 + r1
2235 zout(2,nout1,j)= s2 + s1
2236 zout(1,nout2,j)= r1 - r2
2237 zout(2,nout2,j)= s1 - s2
2238 end do
2239 end do
2240 end if
2241 else if (4*ias == 3*after) then
2242 if (isign == 1) then
2243 nin1=ia-after
2244 nout1=ia-atn
2245 do ib=1,before
2246 nin1=nin1+after
2247 nin2=nin1+atb
2248 nout1=nout1+atn
2249 nout2=nout1+after
2250 do j=1,nfft
2251 r1=zin(1,j,nin1)
2252 s1=zin(2,j,nin1)
2253 r=zin(1,j,nin2)
2254 s=zin(2,j,nin2)
2255 r2=(r + s)*rt2i
2256 s2=(r - s)*rt2i
2257 zout(1,nout1,j)= r1 - r2
2258 zout(2,nout1,j)= s2 + s1
2259 zout(1,nout2,j)= r2 + r1
2260 zout(2,nout2,j)= s1 - s2
2261 end do
2262 end do
2263 else
2264 nin1=ia-after
2265 nout1=ia-atn
2266 do ib=1,before
2267 nin1=nin1+after
2268 nin2=nin1+atb
2269 nout1=nout1+atn
2270 nout2=nout1+after
2271 do j=1,nfft
2272 r1=zin(1,j,nin1)
2273 s1=zin(2,j,nin1)
2274 r=zin(1,j,nin2)
2275 s=zin(2,j,nin2)
2276 r2=(s - r)*rt2i
2277 s2=(r + s)*rt2i
2278 zout(1,nout1,j)= r2 + r1
2279 zout(2,nout1,j)= s1 - s2
2280 zout(1,nout2,j)= r1 - r2
2281 zout(2,nout2,j)= s2 + s1
2282 end do
2283 end do
2284 end if
2285 else
2286 itrig=ias*before+1
2287 cr2=trig(1,itrig)
2288 ci2=trig(2,itrig)
2289 nin1=ia-after
2290 nout1=ia-atn
2291 do ib=1,before
2292 nin1=nin1+after
2293 nin2=nin1+atb
2294 nout1=nout1+atn
2295 nout2=nout1+after
2296 do j=1,nfft
2297 r1=zin(1,j,nin1)
2298 s1=zin(2,j,nin1)
2299 r=zin(1,j,nin2)
2300 s=zin(2,j,nin2)
2301 r2=r*cr2 - s*ci2
2302 s2=r*ci2 + s*cr2
2303 zout(1,nout1,j)= r2 + r1
2304 zout(2,nout1,j)= s2 + s1
2305 zout(1,nout2,j)= r1 - r2
2306 zout(2,nout2,j)= s1 - s2
2307 end do
2308 end do
2309 end if
2310 end do
2311 else if (now == 4) then
2312 if (isign == 1) then
2313 ia=1
2314 nin1=ia-after
2315 nout1=ia-atn
2316 do ib=1,before
2317 nin1=nin1+after
2318 nin2=nin1+atb
2319 nin3=nin2+atb
2320 nin4=nin3+atb
2321 nout1=nout1+atn
2322 nout2=nout1+after
2323 nout3=nout2+after
2324 nout4=nout3+after
2325 do j=1,nfft
2326 r1=zin(1,j,nin1)
2327 s1=zin(2,j,nin1)
2328 r2=zin(1,j,nin2)
2329 s2=zin(2,j,nin2)
2330 r3=zin(1,j,nin3)
2331 s3=zin(2,j,nin3)
2332 r4=zin(1,j,nin4)
2333 s4=zin(2,j,nin4)
2334 r=r1 + r3
2335 s=r2 + r4
2336 zout(1,nout1,j) = r + s
2337 zout(1,nout3,j) = r - s
2338 r=r1 - r3
2339 s=s2 - s4
2340 zout(1,nout2,j) = r - s
2341 zout(1,nout4,j) = r + s
2342 r=s1 + s3
2343 s=s2 + s4
2344 zout(2,nout1,j) = r + s
2345 zout(2,nout3,j) = r - s
2346 r=s1 - s3
2347 s=r2 - r4
2348 zout(2,nout2,j) = r + s
2349 zout(2,nout4,j) = r - s
2350 end do
2351 end do
2352 do ia=2,after
2353 ias=ia-1
2354 if (2*ias == after) then
2355 nin1=ia-after
2356 nout1=ia-atn
2357 do ib=1,before
2358 nin1=nin1+after
2359 nin2=nin1+atb
2360 nin3=nin2+atb
2361 nin4=nin3+atb
2362 nout1=nout1+atn
2363 nout2=nout1+after
2364 nout3=nout2+after
2365 nout4=nout3+after
2366 do j=1,nfft
2367 r1=zin(1,j,nin1)
2368 s1=zin(2,j,nin1)
2369 r=zin(1,j,nin2)
2370 s=zin(2,j,nin2)
2371 r2=(r-s)*rt2i
2372 s2=(r+s)*rt2i
2373 r3=zin(2,j,nin3)
2374 s3=zin(1,j,nin3)
2375 r=zin(1,j,nin4)
2376 s=zin(2,j,nin4)
2377 r4=(r + s)*rt2i
2378 s4=(r - s)*rt2i
2379 r=r1 - r3
2380 s=r2 - r4
2381 zout(1,nout1,j) = r + s
2382 zout(1,nout3,j) = r - s
2383 r=r1 + r3
2384 s=s2 - s4
2385 zout(1,nout2,j) = r - s
2386 zout(1,nout4,j) = r + s
2387 r=s1 + s3
2388 s=s2 + s4
2389 zout(2,nout1,j) = r + s
2390 zout(2,nout3,j) = r - s
2391 r=s1 - s3
2392 s=r2 + r4
2393 zout(2,nout2,j) = r + s
2394 zout(2,nout4,j) = r - s
2395 end do
2396 end do
2397 else
2398 itt=ias*before
2399 itrig=itt+1
2400 cr2=trig(1,itrig)
2401 ci2=trig(2,itrig)
2402 itrig=itrig+itt
2403 cr3=trig(1,itrig)
2404 ci3=trig(2,itrig)
2405 itrig=itrig+itt
2406 cr4=trig(1,itrig)
2407 ci4=trig(2,itrig)
2408 nin1=ia-after
2409 nout1=ia-atn
2410 do ib=1,before
2411 nin1=nin1+after
2412 nin2=nin1+atb
2413 nin3=nin2+atb
2414 nin4=nin3+atb
2415 nout1=nout1+atn
2416 nout2=nout1+after
2417 nout3=nout2+after
2418 nout4=nout3+after
2419 do j=1,nfft
2420 r1=zin(1,j,nin1)
2421 s1=zin(2,j,nin1)
2422 r=zin(1,j,nin2)
2423 s=zin(2,j,nin2)
2424 r2=r*cr2 - s*ci2
2425 s2=r*ci2 + s*cr2
2426 r=zin(1,j,nin3)
2427 s=zin(2,j,nin3)
2428 r3=r*cr3 - s*ci3
2429 s3=r*ci3 + s*cr3
2430 r=zin(1,j,nin4)
2431 s=zin(2,j,nin4)
2432 r4=r*cr4 - s*ci4
2433 s4=r*ci4 + s*cr4
2434 r=r1 + r3
2435 s=r2 + r4
2436 zout(1,nout1,j) = r + s
2437 zout(1,nout3,j) = r - s
2438 r=r1 - r3
2439 s=s2 - s4
2440 zout(1,nout2,j) = r - s
2441 zout(1,nout4,j) = r + s
2442 r=s1 + s3
2443 s=s2 + s4
2444 zout(2,nout1,j) = r + s
2445 zout(2,nout3,j) = r - s
2446 r=s1 - s3
2447 s=r2 - r4
2448 zout(2,nout2,j) = r + s
2449 zout(2,nout4,j) = r - s
2450 end do
2451 end do
2452 end if
2453 end do
2454 else
2455 ia=1
2456 nin1=ia-after
2457 nout1=ia-atn
2458 do ib=1,before
2459 nin1=nin1+after
2460 nin2=nin1+atb
2461 nin3=nin2+atb
2462 nin4=nin3+atb
2463 nout1=nout1+atn
2464 nout2=nout1+after
2465 nout3=nout2+after
2466 nout4=nout3+after
2467 do j=1,nfft
2468 r1=zin(1,j,nin1)
2469 s1=zin(2,j,nin1)
2470 r2=zin(1,j,nin2)
2471 s2=zin(2,j,nin2)
2472 r3=zin(1,j,nin3)
2473 s3=zin(2,j,nin3)
2474 r4=zin(1,j,nin4)
2475 s4=zin(2,j,nin4)
2476 r=r1 + r3
2477 s=r2 + r4
2478 zout(1,nout1,j) = r + s
2479 zout(1,nout3,j) = r - s
2480 r=r1 - r3
2481 s=s2 - s4
2482 zout(1,nout2,j) = r + s
2483 zout(1,nout4,j) = r - s
2484 r=s1 + s3
2485 s=s2 + s4
2486 zout(2,nout1,j) = r + s
2487 zout(2,nout3,j) = r - s
2488 r=s1 - s3
2489 s=r2 - r4
2490 zout(2,nout2,j) = r - s
2491 zout(2,nout4,j) = r + s
2492 end do
2493 end do
2494 do ia=2,after
2495 ias=ia-1
2496 if (2*ias == after) then
2497 nin1=ia-after
2498 nout1=ia-atn
2499 do ib=1,before
2500 nin1=nin1+after
2501 nin2=nin1+atb
2502 nin3=nin2+atb
2503 nin4=nin3+atb
2504 nout1=nout1+atn
2505 nout2=nout1+after
2506 nout3=nout2+after
2507 nout4=nout3+after
2508 do j=1,nfft
2509 r1=zin(1,j,nin1)
2510 s1=zin(2,j,nin1)
2511 r=zin(1,j,nin2)
2512 s=zin(2,j,nin2)
2513 r2=(r + s)*rt2i
2514 s2=(s - r)*rt2i
2515 r3=zin(2,j,nin3)
2516 s3=zin(1,j,nin3)
2517 r=zin(1,j,nin4)
2518 s=zin(2,j,nin4)
2519 r4=(s - r)*rt2i
2520 s4=(r + s)*rt2i
2521 r=r1 + r3
2522 s=r2 + r4
2523 zout(1,nout1,j) = r + s
2524 zout(1,nout3,j) = r - s
2525 r=r1 - r3
2526 s=s2 + s4
2527 zout(1,nout2,j) = r + s
2528 zout(1,nout4,j) = r - s
2529 r=s1 - s3
2530 s=s2 - s4
2531 zout(2,nout1,j) = r + s
2532 zout(2,nout3,j) = r - s
2533 r=s1 + s3
2534 s=r2 - r4
2535 zout(2,nout2,j) = r - s
2536 zout(2,nout4,j) = r + s
2537 end do
2538 end do
2539 else
2540 itt=ias*before
2541 itrig=itt+1
2542 cr2=trig(1,itrig)
2543 ci2=trig(2,itrig)
2544 itrig=itrig+itt
2545 cr3=trig(1,itrig)
2546 ci3=trig(2,itrig)
2547 itrig=itrig+itt
2548 cr4=trig(1,itrig)
2549 ci4=trig(2,itrig)
2550 nin1=ia-after
2551 nout1=ia-atn
2552 do ib=1,before
2553 nin1=nin1+after
2554 nin2=nin1+atb
2555 nin3=nin2+atb
2556 nin4=nin3+atb
2557 nout1=nout1+atn
2558 nout2=nout1+after
2559 nout3=nout2+after
2560 nout4=nout3+after
2561 do j=1,nfft
2562 r1=zin(1,j,nin1)
2563 s1=zin(2,j,nin1)
2564 r=zin(1,j,nin2)
2565 s=zin(2,j,nin2)
2566 r2=r*cr2 - s*ci2
2567 s2=r*ci2 + s*cr2
2568 r=zin(1,j,nin3)
2569 s=zin(2,j,nin3)
2570 r3=r*cr3 - s*ci3
2571 s3=r*ci3 + s*cr3
2572 r=zin(1,j,nin4)
2573 s=zin(2,j,nin4)
2574 r4=r*cr4 - s*ci4
2575 s4=r*ci4 + s*cr4
2576 r=r1 + r3
2577 s=r2 + r4
2578 zout(1,nout1,j) = r + s
2579 zout(1,nout3,j) = r - s
2580 r=r1 - r3
2581 s=s2 - s4
2582 zout(1,nout2,j) = r + s
2583 zout(1,nout4,j) = r - s
2584 r=s1 + s3
2585 s=s2 + s4
2586 zout(2,nout1,j) = r + s
2587 zout(2,nout3,j) = r - s
2588 r=s1 - s3
2589 s=r2 - r4
2590 zout(2,nout2,j) = r - s
2591 zout(2,nout4,j) = r + s
2592 end do
2593 end do
2594 end if
2595 end do
2596 end if
2597 else if (now == 8) then
2598 if (isign == -1) then
2599 ia=1
2600 nin1=ia-after
2601 nout1=ia-atn
2602 do ib1=1,before
2603 nin1=nin1+after
2604 nin2=nin1+atb
2605 nin3=nin2+atb
2606 nin4=nin3+atb
2607 nin5=nin4+atb
2608 nin6=nin5+atb
2609 nin7=nin6+atb
2610 nin8=nin7+atb
2611 nout1=nout1+atn
2612 nout2=nout1+after
2613 nout3=nout2+after
2614 nout4=nout3+after
2615 nout5=nout4+after
2616 nout6=nout5+after
2617 nout7=nout6+after
2618 nout8=nout7+after
2619 do j=1,nfft
2620 r1=zin(1,j,nin1)
2621 s1=zin(2,j,nin1)
2622 r2=zin(1,j,nin2)
2623 s2=zin(2,j,nin2)
2624 r3=zin(1,j,nin3)
2625 s3=zin(2,j,nin3)
2626 r4=zin(1,j,nin4)
2627 s4=zin(2,j,nin4)
2628 r5=zin(1,j,nin5)
2629 s5=zin(2,j,nin5)
2630 r6=zin(1,j,nin6)
2631 s6=zin(2,j,nin6)
2632 r7=zin(1,j,nin7)
2633 s7=zin(2,j,nin7)
2634 r8_=zin(1,j,nin8)
2635 s8=zin(2,j,nin8)
2636 r=r1 + r5
2637 s=r3 + r7
2638 ap=r + s
2639 am=r - s
2640 r=r2 + r6
2641 s=r4 + r8_
2642 bp=r + s
2643 bm=r - s
2644 r=s1 + s5
2645 s=s3 + s7
2646 cp=r + s
2647 cm=r - s
2648 r=s2 + s6
2649 s=s4 + s8
2650 dp=r + s
2651 dm=r - s
2652 zout(1,nout1,j) = ap + bp
2653 zout(2,nout1,j) = cp + dp
2654 zout(1,nout5,j) = ap - bp
2655 zout(2,nout5,j) = cp - dp
2656 zout(1,nout3,j) = am + dm
2657 zout(2,nout3,j) = cm - bm
2658 zout(1,nout7,j) = am - dm
2659 zout(2,nout7,j) = cm + bm
2660 r=r1 - r5
2661 s=s3 - s7
2662 ap=r + s
2663 am=r - s
2664 r=s1 - s5
2665 s=r3 - r7
2666 bp=r + s
2667 bm=r - s
2668 r=s4 - s8
2669 s=r2 - r6
2670 cp=r + s
2671 cm=r - s
2672 r=s2 - s6
2673 s=r4 - r8_
2674 dp=r + s
2675 dm=r - s
2676 r = ( cp + dm)*rt2i
2677 s = ( dm - cp)*rt2i
2678 cp= ( cm + dp)*rt2i
2679 dp= ( cm - dp)*rt2i
2680 zout(1,nout2,j) = ap + r
2681 zout(2,nout2,j) = bm + s
2682 zout(1,nout6,j) = ap - r
2683 zout(2,nout6,j) = bm - s
2684 zout(1,nout4,j) = am + cp
2685 zout(2,nout4,j) = bp + dp
2686 zout(1,nout8,j) = am - cp
2687 zout(2,nout8,j) = bp - dp
2688 end do
2689 end do
2690 do ia=2,after
2691 ias=ia-1
2692 itt=ias*before
2693 itrig=itt+1
2694 cr2=trig(1,itrig)
2695 ci2=trig(2,itrig)
2696 itrig=itrig+itt
2697 cr3=trig(1,itrig)
2698 ci3=trig(2,itrig)
2699 itrig=itrig+itt
2700 cr4=trig(1,itrig)
2701 ci4=trig(2,itrig)
2702 itrig=itrig+itt
2703 cr5=trig(1,itrig)
2704 ci5=trig(2,itrig)
2705 itrig=itrig+itt
2706 cr6=trig(1,itrig)
2707 ci6=trig(2,itrig)
2708 itrig=itrig+itt
2709 cr7=trig(1,itrig)
2710 ci7=trig(2,itrig)
2711 itrig=itrig+itt
2712 cr8=trig(1,itrig)
2713 ci8=trig(2,itrig)
2714 nin1=ia-after
2715 nout1=ia-atn
2716 do ib=1,before
2717 nin1=nin1+after
2718 nin2=nin1+atb
2719 nin3=nin2+atb
2720 nin4=nin3+atb
2721 nin5=nin4+atb
2722 nin6=nin5+atb
2723 nin7=nin6+atb
2724 nin8=nin7+atb
2725 nout1=nout1+atn
2726 nout2=nout1+after
2727 nout3=nout2+after
2728 nout4=nout3+after
2729 nout5=nout4+after
2730 nout6=nout5+after
2731 nout7=nout6+after
2732 nout8=nout7+after
2733 do j=1,nfft
2734 r1=zin(1,j,nin1)
2735 s1=zin(2,j,nin1)
2736 r=zin(1,j,nin2)
2737 s=zin(2,j,nin2)
2738 r2=r*cr2 - s*ci2
2739 s2=r*ci2 + s*cr2
2740 r=zin(1,j,nin3)
2741 s=zin(2,j,nin3)
2742 r3=r*cr3 - s*ci3
2743 s3=r*ci3 + s*cr3
2744 r=zin(1,j,nin4)
2745 s=zin(2,j,nin4)
2746 r4=r*cr4 - s*ci4
2747 s4=r*ci4 + s*cr4
2748 r=zin(1,j,nin5)
2749 s=zin(2,j,nin5)
2750 r5=r*cr5 - s*ci5
2751 s5=r*ci5 + s*cr5
2752 r=zin(1,j,nin6)
2753 s=zin(2,j,nin6)
2754 r6=r*cr6 - s*ci6
2755 s6=r*ci6 + s*cr6
2756 r=zin(1,j,nin7)
2757 s=zin(2,j,nin7)
2758 r7=r*cr7 - s*ci7
2759 s7=r*ci7 + s*cr7
2760 r=zin(1,j,nin8)
2761 s=zin(2,j,nin8)
2762 r8_=r*cr8 - s*ci8
2763 s8=r*ci8 + s*cr8
2764 r=r1 + r5
2765 s=r3 + r7
2766 ap=r + s
2767 am=r - s
2768 r=r2 + r6
2769 s=r4 + r8_
2770 bp=r + s
2771 bm=r - s
2772 r=s1 + s5
2773 s=s3 + s7
2774 cp=r + s
2775 cm=r - s
2776 r=s2 + s6
2777 s=s4 + s8
2778 dp=r + s
2779 dm=r - s
2780 zout(1,nout1,j) = ap + bp
2781 zout(2,nout1,j) = cp + dp
2782 zout(1,nout5,j) = ap - bp
2783 zout(2,nout5,j) = cp - dp
2784 zout(1,nout3,j) = am + dm
2785 zout(2,nout3,j) = cm - bm
2786 zout(1,nout7,j) = am - dm
2787 zout(2,nout7,j) = cm + bm
2788 r=r1 - r5
2789 s=s3 - s7
2790 ap=r + s
2791 am=r - s
2792 r=s1 - s5
2793 s=r3 - r7
2794 bp=r + s
2795 bm=r - s
2796 r=s4 - s8
2797 s=r2 - r6
2798 cp=r + s
2799 cm=r - s
2800 r=s2 - s6
2801 s=r4 - r8_
2802 dp=r + s
2803 dm=r - s
2804 r = ( cp + dm)*rt2i
2805 s = ( dm - cp)*rt2i
2806 cp= ( cm + dp)*rt2i
2807 dp= ( cm - dp)*rt2i
2808 zout(1,nout2,j) = ap + r
2809 zout(2,nout2,j) = bm + s
2810 zout(1,nout6,j) = ap - r
2811 zout(2,nout6,j) = bm - s
2812 zout(1,nout4,j) = am + cp
2813 zout(2,nout4,j) = bp + dp
2814 zout(1,nout8,j) = am - cp
2815 zout(2,nout8,j) = bp - dp
2816 end do
2817 end do
2818 end do
2819
2820 else
2821 ia=1
2822 nin1=ia-after
2823 nout1=ia-atn
2824 do ib=1,before
2825 nin1=nin1+after
2826 nin2=nin1+atb
2827 nin3=nin2+atb
2828 nin4=nin3+atb
2829 nin5=nin4+atb
2830 nin6=nin5+atb
2831 nin7=nin6+atb
2832 nin8=nin7+atb
2833 nout1=nout1+atn
2834 nout2=nout1+after
2835 nout3=nout2+after
2836 nout4=nout3+after
2837 nout5=nout4+after
2838 nout6=nout5+after
2839 nout7=nout6+after
2840 nout8=nout7+after
2841 do j=1,nfft
2842 r1=zin(1,j,nin1)
2843 s1=zin(2,j,nin1)
2844 r2=zin(1,j,nin2)
2845 s2=zin(2,j,nin2)
2846 r3=zin(1,j,nin3)
2847 s3=zin(2,j,nin3)
2848 r4=zin(1,j,nin4)
2849 s4=zin(2,j,nin4)
2850 r5=zin(1,j,nin5)
2851 s5=zin(2,j,nin5)
2852 r6=zin(1,j,nin6)
2853 s6=zin(2,j,nin6)
2854 r7=zin(1,j,nin7)
2855 s7=zin(2,j,nin7)
2856 r8_=zin(1,j,nin8)
2857 s8=zin(2,j,nin8)
2858 r=r1 + r5
2859 s=r3 + r7
2860 ap=r + s
2861 am=r - s
2862 r=r2 + r6
2863 s=r4 + r8_
2864 bp=r + s
2865 bm=r - s
2866 r=s1 + s5
2867 s=s3 + s7
2868 cp=r + s
2869 cm=r - s
2870 r=s2 + s6
2871 s=s4 + s8
2872 dp=r + s
2873 dm=r - s
2874 zout(1,nout1,j) = ap + bp
2875 zout(2,nout1,j) = cp + dp
2876 zout(1,nout5,j) = ap - bp
2877 zout(2,nout5,j) = cp - dp
2878 zout(1,nout3,j) = am - dm
2879 zout(2,nout3,j) = cm + bm
2880 zout(1,nout7,j) = am + dm
2881 zout(2,nout7,j) = cm - bm
2882 r= r1 - r5
2883 s=-s3 + s7
2884 ap=r + s
2885 am=r - s
2886 r=s1 - s5
2887 s=r7 - r3
2888 bp=r + s
2889 bm=r - s
2890 r=-s4 + s8
2891 s= r2 - r6
2892 cp=r + s
2893 cm=r - s
2894 r=-s2 + s6
2895 s= r4 - r8_
2896 dp=r + s
2897 dm=r - s
2898 r = ( cp + dm)*rt2i
2899 s = ( cp - dm)*rt2i
2900 cp= ( cm + dp)*rt2i
2901 dp= ( dp - cm)*rt2i
2902 zout(1,nout2,j) = ap + r
2903 zout(2,nout2,j) = bm + s
2904 zout(1,nout6,j) = ap - r
2905 zout(2,nout6,j) = bm - s
2906 zout(1,nout4,j) = am + cp
2907 zout(2,nout4,j) = bp + dp
2908 zout(1,nout8,j) = am - cp
2909 zout(2,nout8,j) = bp - dp
2910 end do
2911 end do
2912
2913 do ia=2,after
2914 ias=ia-1
2915 itt=ias*before
2916 itrig=itt+1
2917 cr2=trig(1,itrig)
2918 ci2=trig(2,itrig)
2919 itrig=itrig+itt
2920 cr3=trig(1,itrig)
2921 ci3=trig(2,itrig)
2922 itrig=itrig+itt
2923 cr4=trig(1,itrig)
2924 ci4=trig(2,itrig)
2925 itrig=itrig+itt
2926 cr5=trig(1,itrig)
2927 ci5=trig(2,itrig)
2928 itrig=itrig+itt
2929 cr6=trig(1,itrig)
2930 ci6=trig(2,itrig)
2931 itrig=itrig+itt
2932 cr7=trig(1,itrig)
2933 ci7=trig(2,itrig)
2934 itrig=itrig+itt
2935 cr8=trig(1,itrig)
2936 ci8=trig(2,itrig)
2937 nin1=ia-after
2938 nout1=ia-atn
2939 do ib=1,before
2940 nin1=nin1+after
2941 nin2=nin1+atb
2942 nin3=nin2+atb
2943 nin4=nin3+atb
2944 nin5=nin4+atb
2945 nin6=nin5+atb
2946 nin7=nin6+atb
2947 nin8=nin7+atb
2948 nout1=nout1+atn
2949 nout2=nout1+after
2950 nout3=nout2+after
2951 nout4=nout3+after
2952 nout5=nout4+after
2953 nout6=nout5+after
2954 nout7=nout6+after
2955 nout8=nout7+after
2956 do j=1,nfft
2957 r1=zin(1,j,nin1)
2958 s1=zin(2,j,nin1)
2959 r=zin(1,j,nin2)
2960 s=zin(2,j,nin2)
2961 r2=r*cr2 - s*ci2
2962 s2=r*ci2 + s*cr2
2963 r=zin(1,j,nin3)
2964 s=zin(2,j,nin3)
2965 r3=r*cr3 - s*ci3
2966 s3=r*ci3 + s*cr3
2967 r=zin(1,j,nin4)
2968 s=zin(2,j,nin4)
2969 r4=r*cr4 - s*ci4
2970 s4=r*ci4 + s*cr4
2971 r=zin(1,j,nin5)
2972 s=zin(2,j,nin5)
2973 r5=r*cr5 - s*ci5
2974 s5=r*ci5 + s*cr5
2975 r=zin(1,j,nin6)
2976 s=zin(2,j,nin6)
2977 r6=r*cr6 - s*ci6
2978 s6=r*ci6 + s*cr6
2979 r=zin(1,j,nin7)
2980 s=zin(2,j,nin7)
2981 r7=r*cr7 - s*ci7
2982 s7=r*ci7 + s*cr7
2983 r=zin(1,j,nin8)
2984 s=zin(2,j,nin8)
2985 r8_=r*cr8 - s*ci8
2986 s8=r*ci8 + s*cr8
2987 r=r1 + r5
2988 s=r3 + r7
2989 ap=r + s
2990 am=r - s
2991 r=r2 + r6
2992 s=r4 + r8_
2993 bp=r + s
2994 bm=r - s
2995 r=s1 + s5
2996 s=s3 + s7
2997 cp=r + s
2998 cm=r - s
2999 r=s2 + s6
3000 s=s4 + s8
3001 dp=r + s
3002 dm=r - s
3003 zout(1,nout1,j) = ap + bp
3004 zout(2,nout1,j) = cp + dp
3005 zout(1,nout5,j) = ap - bp
3006 zout(2,nout5,j) = cp - dp
3007 zout(1,nout3,j) = am - dm
3008 zout(2,nout3,j) = cm + bm
3009 zout(1,nout7,j) = am + dm
3010 zout(2,nout7,j) = cm - bm
3011 r= r1 - r5
3012 s=-s3 + s7
3013 ap=r + s
3014 am=r - s
3015 r=s1 - s5
3016 s=r7 - r3
3017 bp=r + s
3018 bm=r - s
3019 r=-s4 + s8
3020 s= r2 - r6
3021 cp=r + s
3022 cm=r - s
3023 r=-s2 + s6
3024 s= r4 - r8_
3025 dp=r + s
3026 dm=r - s
3027 r = ( cp + dm)*rt2i
3028 s = ( cp - dm)*rt2i
3029 cp= ( cm + dp)*rt2i
3030 dp= ( dp - cm)*rt2i
3031 zout(1,nout2,j) = ap + r
3032 zout(2,nout2,j) = bm + s
3033 zout(1,nout6,j) = ap - r
3034 zout(2,nout6,j) = bm - s
3035 zout(1,nout4,j) = am + cp
3036 zout(2,nout4,j) = bp + dp
3037 zout(1,nout8,j) = am - cp
3038 zout(2,nout8,j) = bp - dp
3039 end do
3040 end do
3041 end do
3042
3043 end if
3044 else if (now == 3) then
3045! .5_real64*sqrt(3._real64)
3046 bb=isign*0.8660254037844387_real64
3047 ia=1
3048 nin1=ia-after
3049 nout1=ia-atn
3050 do ib=1,before
3051 nin1=nin1+after
3052 nin2=nin1+atb
3053 nin3=nin2+atb
3054 nout1=nout1+atn
3055 nout2=nout1+after
3056 nout3=nout2+after
3057 do j=1,nfft
3058 r1=zin(1,j,nin1)
3059 s1=zin(2,j,nin1)
3060 r2=zin(1,j,nin2)
3061 s2=zin(2,j,nin2)
3062 r3=zin(1,j,nin3)
3063 s3=zin(2,j,nin3)
3064 r=r2 + r3
3065 s=s2 + s3
3066 zout(1,nout1,j) = r + r1
3067 zout(2,nout1,j) = s + s1
3068 r1=r1 - .5_real64*r
3069 s1=s1 - .5_real64*s
3070 r2=bb*(r2-r3)
3071 s2=bb*(s2-s3)
3072 zout(1,nout2,j) = r1 - s2
3073 zout(2,nout2,j) = s1 + r2
3074 zout(1,nout3,j) = r1 + s2
3075 zout(2,nout3,j) = s1 - r2
3076 end do
3077 end do
3078 do ia=2,after
3079 ias=ia-1
3080 if (4*ias == 3*after) then
3081 if (isign == 1) then
3082 nin1=ia-after
3083 nout1=ia-atn
3084 do ib=1,before
3085 nin1=nin1+after
3086 nin2=nin1+atb
3087 nin3=nin2+atb
3088 nout1=nout1+atn
3089 nout2=nout1+after
3090 nout3=nout2+after
3091 do j=1,nfft
3092 r1=zin(1,j,nin1)
3093 s1=zin(2,j,nin1)
3094 r2=zin(2,j,nin2)
3095 s2=zin(1,j,nin2)
3096 r3=zin(1,j,nin3)
3097 s3=zin(2,j,nin3)
3098 r=r2 + r3
3099 s=s2 - s3
3100 zout(1,nout1,j) = r1 - r
3101 zout(2,nout1,j) = s + s1
3102 r1=r1 + .5_real64*r
3103 s1=s1 - .5_real64*s
3104 r2=bb*(r2-r3)
3105 s2=bb*(s2+s3)
3106 zout(1,nout2,j) = r1 - s2
3107 zout(2,nout2,j) = s1 - r2
3108 zout(1,nout3,j) = r1 + s2
3109 zout(2,nout3,j) = s1 + r2
3110 end do
3111 end do
3112 else
3113 nin1=ia-after
3114 nout1=ia-atn
3115 do ib=1,before
3116 nin1=nin1+after
3117 nin2=nin1+atb
3118 nin3=nin2+atb
3119 nout1=nout1+atn
3120 nout2=nout1+after
3121 nout3=nout2+after
3122 do j=1,nfft
3123 r1=zin(1,j,nin1)
3124 s1=zin(2,j,nin1)
3125 r2=zin(2,j,nin2)
3126 s2=zin(1,j,nin2)
3127 r3=zin(1,j,nin3)
3128 s3=zin(2,j,nin3)
3129 r=r2 - r3
3130 s=s2 + s3
3131 zout(1,nout1,j) = r + r1
3132 zout(2,nout1,j) = s1 - s
3133 r1=r1 - .5_real64*r
3134 s1=s1 + .5_real64*s
3135 r2=bb*(r2+r3)
3136 s2=bb*(s2-s3)
3137 zout(1,nout2,j) = r1 + s2
3138 zout(2,nout2,j) = s1 + r2
3139 zout(1,nout3,j) = r1 - s2
3140 zout(2,nout3,j) = s1 - r2
3141 end do
3142 end do
3143 end if
3144 else if (8*ias == 3*after) then
3145 if (isign == 1) then
3146 nin1=ia-after
3147 nout1=ia-atn
3148 do ib=1,before
3149 nin1=nin1+after
3150 nin2=nin1+atb
3151 nin3=nin2+atb
3152 nout1=nout1+atn
3153 nout2=nout1+after
3154 nout3=nout2+after
3155 do j=1,nfft
3156 r1=zin(1,j,nin1)
3157 s1=zin(2,j,nin1)
3158 r=zin(1,j,nin2)
3159 s=zin(2,j,nin2)
3160 r2=(r - s)*rt2i
3161 s2=(r + s)*rt2i
3162 r3=zin(2,j,nin3)
3163 s3=zin(1,j,nin3)
3164 r=r2 - r3
3165 s=s2 + s3
3166 zout(1,nout1,j) = r + r1
3167 zout(2,nout1,j) = s + s1
3168 r1=r1 - .5_real64*r
3169 s1=s1 - .5_real64*s
3170 r2=bb*(r2+r3)
3171 s2=bb*(s2-s3)
3172 zout(1,nout2,j) = r1 - s2
3173 zout(2,nout2,j) = s1 + r2
3174 zout(1,nout3,j) = r1 + s2
3175 zout(2,nout3,j) = s1 - r2
3176 end do
3177 end do
3178 else
3179 nin1=ia-after
3180 nout1=ia-atn
3181 do ib=1,before
3182 nin1=nin1+after
3183 nin2=nin1+atb
3184 nin3=nin2+atb
3185 nout1=nout1+atn
3186 nout2=nout1+after
3187 nout3=nout2+after
3188 do j=1,nfft
3189 r1=zin(1,j,nin1)
3190 s1=zin(2,j,nin1)
3191 r=zin(1,j,nin2)
3192 s=zin(2,j,nin2)
3193 r2=(r + s)*rt2i
3194 s2=(s - r)*rt2i
3195 r3=zin(2,j,nin3)
3196 s3=zin(1,j,nin3)
3197 r=r2 + r3
3198 s=s2 - s3
3199 zout(1,nout1,j) = r + r1
3200 zout(2,nout1,j) = s + s1
3201 r1=r1 - .5_real64*r
3202 s1=s1 - .5_real64*s
3203 r2=bb*(r2-r3)
3204 s2=bb*(s2+s3)
3205 zout(1,nout2,j) = r1 - s2
3206 zout(2,nout2,j) = s1 + r2
3207 zout(1,nout3,j) = r1 + s2
3208 zout(2,nout3,j) = s1 - r2
3209 end do
3210 end do
3211 end if
3212 else
3213 itt=ias*before
3214 itrig=itt+1
3215 cr2=trig(1,itrig)
3216 ci2=trig(2,itrig)
3217 itrig=itrig+itt
3218 cr3=trig(1,itrig)
3219 ci3=trig(2,itrig)
3220 nin1=ia-after
3221 nout1=ia-atn
3222 do ib=1,before
3223 nin1=nin1+after
3224 nin2=nin1+atb
3225 nin3=nin2+atb
3226 nout1=nout1+atn
3227 nout2=nout1+after
3228 nout3=nout2+after
3229 do j=1,nfft
3230 r1=zin(1,j,nin1)
3231 s1=zin(2,j,nin1)
3232 r=zin(1,j,nin2)
3233 s=zin(2,j,nin2)
3234 r2=r*cr2 - s*ci2
3235 s2=r*ci2 + s*cr2
3236 r=zin(1,j,nin3)
3237 s=zin(2,j,nin3)
3238 r3=r*cr3 - s*ci3
3239 s3=r*ci3 + s*cr3
3240 r=r2 + r3
3241 s=s2 + s3
3242 zout(1,nout1,j) = r + r1
3243 zout(2,nout1,j) = s + s1
3244 r1=r1 - .5_real64*r
3245 s1=s1 - .5_real64*s
3246 r2=bb*(r2-r3)
3247 s2=bb*(s2-s3)
3248 zout(1,nout2,j) = r1 - s2
3249 zout(2,nout2,j) = s1 + r2
3250 zout(1,nout3,j) = r1 + s2
3251 zout(2,nout3,j) = s1 - r2
3252 end do
3253 end do
3254 end if
3255 end do
3256 else if (now == 5) then
3257! cos(2._real64*pi/5._real64)
3258 cos2=0.3090169943749474_real64
3259! cos(4._real64*pi/5._real64)
3260 cos4=-0.8090169943749474_real64
3261! sin(2._real64*pi/5._real64)
3262 sin2=isign*0.9510565162951536_real64
3263! sin(4._real64*pi/5._real64)
3264 sin4=isign*0.5877852522924731_real64
3265 ia=1
3266 nin1=ia-after
3267 nout1=ia-atn
3268 do ib=1,before
3269 nin1=nin1+after
3270 nin2=nin1+atb
3271 nin3=nin2+atb
3272 nin4=nin3+atb
3273 nin5=nin4+atb
3274 nout1=nout1+atn
3275 nout2=nout1+after
3276 nout3=nout2+after
3277 nout4=nout3+after
3278 nout5=nout4+after
3279 do j=1,nfft
3280 r1=zin(1,j,nin1)
3281 s1=zin(2,j,nin1)
3282 r2=zin(1,j,nin2)
3283 s2=zin(2,j,nin2)
3284 r3=zin(1,j,nin3)
3285 s3=zin(2,j,nin3)
3286 r4=zin(1,j,nin4)
3287 s4=zin(2,j,nin4)
3288 r5=zin(1,j,nin5)
3289 s5=zin(2,j,nin5)
3290 r25 = r2 + r5
3291 r34 = r3 + r4
3292 s25 = s2 - s5
3293 s34 = s3 - s4
3294 zout(1,nout1,j) = r1 + r25 + r34
3295 r = r1 + cos2*r25 + cos4*r34
3296 s = sin2*s25 + sin4*s34
3297 zout(1,nout2,j) = r - s
3298 zout(1,nout5,j) = r + s
3299 r = r1 + cos4*r25 + cos2*r34
3300 s = sin4*s25 - sin2*s34
3301 zout(1,nout3,j) = r - s
3302 zout(1,nout4,j) = r + s
3303 r25 = r2 - r5
3304 r34 = r3 - r4
3305 s25 = s2 + s5
3306 s34 = s3 + s4
3307 zout(2,nout1,j) = s1 + s25 + s34
3308 r = s1 + cos2*s25 + cos4*s34
3309 s = sin2*r25 + sin4*r34
3310 zout(2,nout2,j) = r + s
3311 zout(2,nout5,j) = r - s
3312 r = s1 + cos4*s25 + cos2*s34
3313 s = sin4*r25 - sin2*r34
3314 zout(2,nout3,j) = r + s
3315 zout(2,nout4,j) = r - s
3316 end do
3317 end do
3318 do ia=2,after
3319 ias=ia-1
3320 if (8*ias == 5*after) then
3321 if (isign == 1) then
3322 nin1=ia-after
3323 nout1=ia-atn
3324 do ib=1,before
3325 nin1=nin1+after
3326 nin2=nin1+atb
3327 nin3=nin2+atb
3328 nin4=nin3+atb
3329 nin5=nin4+atb
3330 nout1=nout1+atn
3331 nout2=nout1+after
3332 nout3=nout2+after
3333 nout4=nout3+after
3334 nout5=nout4+after
3335 do j=1,nfft
3336 r1=zin(1,j,nin1)
3337 s1=zin(2,j,nin1)
3338 r=zin(1,j,nin2)
3339 s=zin(2,j,nin2)
3340 r2=(r - s)*rt2i
3341 s2=(r + s)*rt2i
3342 r3=zin(2,j,nin3)
3343 s3=zin(1,j,nin3)
3344 r=zin(1,j,nin4)
3345 s=zin(2,j,nin4)
3346 r4=(r + s)*rt2i
3347 s4=(r - s)*rt2i
3348 r5=zin(1,j,nin5)
3349 s5=zin(2,j,nin5)
3350 r25 = r2 - r5
3351 r34 = r3 + r4
3352 s25 = s2 + s5
3353 s34 = s3 - s4
3354 zout(1,nout1,j) = r1 + r25 - r34
3355 r = r1 + cos2*r25 - cos4*r34
3356 s = sin2*s25 + sin4*s34
3357 zout(1,nout2,j) = r - s
3358 zout(1,nout5,j) = r + s
3359 r = r1 + cos4*r25 - cos2*r34
3360 s = sin4*s25 - sin2*s34
3361 zout(1,nout3,j) = r - s
3362 zout(1,nout4,j) = r + s
3363 r25 = r2 + r5
3364 r34 = r4 - r3
3365 s25 = s2 - s5
3366 s34 = s3 + s4
3367 zout(2,nout1,j) = s1 + s25 + s34
3368 r = s1 + cos2*s25 + cos4*s34
3369 s = sin2*r25 + sin4*r34
3370 zout(2,nout2,j) = r + s
3371 zout(2,nout5,j) = r - s
3372 r = s1 + cos4*s25 + cos2*s34
3373 s = sin4*r25 - sin2*r34
3374 zout(2,nout3,j) = r + s
3375 zout(2,nout4,j) = r - s
3376 end do
3377 end do
3378 else
3379 nin1=ia-after
3380 nout1=ia-atn
3381 do ib=1,before
3382 nin1=nin1+after
3383 nin2=nin1+atb
3384 nin3=nin2+atb
3385 nin4=nin3+atb
3386 nin5=nin4+atb
3387 nout1=nout1+atn
3388 nout2=nout1+after
3389 nout3=nout2+after
3390 nout4=nout3+after
3391 nout5=nout4+after
3392 do j=1,nfft
3393 r1=zin(1,j,nin1)
3394 s1=zin(2,j,nin1)
3395 r=zin(1,j,nin2)
3396 s=zin(2,j,nin2)
3397 r2=(r + s)*rt2i
3398 s2=(s - r)*rt2i
3399 r3=zin(2,j,nin3)
3400 s3=zin(1,j,nin3)
3401 r=zin(1,j,nin4)
3402 s=zin(2,j,nin4)
3403 r4=(s - r)*rt2i
3404 s4=(r + s)*rt2i
3405 r5=zin(1,j,nin5)
3406 s5=zin(2,j,nin5)
3407 r25 = r2 - r5
3408 r34 = r3 + r4
3409 s25 = s2 + s5
3410 s34 = s4 - s3
3411 zout(1,nout1,j) = r1 + r25 + r34
3412 r = r1 + cos2*r25 + cos4*r34
3413 s = sin2*s25 + sin4*s34
3414 zout(1,nout2,j) = r - s
3415 zout(1,nout5,j) = r + s
3416 r = r1 + cos4*r25 + cos2*r34
3417 s = sin4*s25 - sin2*s34
3418 zout(1,nout3,j) = r - s
3419 zout(1,nout4,j) = r + s
3420 r25 = r2 + r5
3421 r34 = r3 - r4
3422 s25 = s2 - s5
3423 s34 = s3 + s4
3424 zout(2,nout1,j) = s1 + s25 - s34
3425 r = s1 + cos2*s25 - cos4*s34
3426 s = sin2*r25 + sin4*r34
3427 zout(2,nout2,j) = r + s
3428 zout(2,nout5,j) = r - s
3429 r = s1 + cos4*s25 - cos2*s34
3430 s = sin4*r25 - sin2*r34
3431 zout(2,nout3,j) = r + s
3432 zout(2,nout4,j) = r - s
3433 end do
3434 end do
3435 end if
3436 else
3437 ias=ia-1
3438 itt=ias*before
3439 itrig=itt+1
3440 cr2=trig(1,itrig)
3441 ci2=trig(2,itrig)
3442 itrig=itrig+itt
3443 cr3=trig(1,itrig)
3444 ci3=trig(2,itrig)
3445 itrig=itrig+itt
3446 cr4=trig(1,itrig)
3447 ci4=trig(2,itrig)
3448 itrig=itrig+itt
3449 cr5=trig(1,itrig)
3450 ci5=trig(2,itrig)
3451 nin1=ia-after
3452 nout1=ia-atn
3453 do ib=1,before
3454 nin1=nin1+after
3455 nin2=nin1+atb
3456 nin3=nin2+atb
3457 nin4=nin3+atb
3458 nin5=nin4+atb
3459 nout1=nout1+atn
3460 nout2=nout1+after
3461 nout3=nout2+after
3462 nout4=nout3+after
3463 nout5=nout4+after
3464 do j=1,nfft
3465 r1=zin(1,j,nin1)
3466 s1=zin(2,j,nin1)
3467 r=zin(1,j,nin2)
3468 s=zin(2,j,nin2)
3469 r2=r*cr2 - s*ci2
3470 s2=r*ci2 + s*cr2
3471 r=zin(1,j,nin3)
3472 s=zin(2,j,nin3)
3473 r3=r*cr3 - s*ci3
3474 s3=r*ci3 + s*cr3
3475 r=zin(1,j,nin4)
3476 s=zin(2,j,nin4)
3477 r4=r*cr4 - s*ci4
3478 s4=r*ci4 + s*cr4
3479 r=zin(1,j,nin5)
3480 s=zin(2,j,nin5)
3481 r5=r*cr5 - s*ci5
3482 s5=r*ci5 + s*cr5
3483 r25 = r2 + r5
3484 r34 = r3 + r4
3485 s25 = s2 - s5
3486 s34 = s3 - s4
3487 zout(1,nout1,j) = r1 + r25 + r34
3488 r = r1 + cos2*r25 + cos4*r34
3489 s = sin2*s25 + sin4*s34
3490 zout(1,nout2,j) = r - s
3491 zout(1,nout5,j) = r + s
3492 r = r1 + cos4*r25 + cos2*r34
3493 s = sin4*s25 - sin2*s34
3494 zout(1,nout3,j) = r - s
3495 zout(1,nout4,j) = r + s
3496 r25 = r2 - r5
3497 r34 = r3 - r4
3498 s25 = s2 + s5
3499 s34 = s3 + s4
3500 zout(2,nout1,j) = s1 + s25 + s34
3501 r = s1 + cos2*s25 + cos4*s34
3502 s = sin2*r25 + sin4*r34
3503 zout(2,nout2,j) = r + s
3504 zout(2,nout5,j) = r - s
3505 r = s1 + cos4*s25 + cos2*s34
3506 s = sin4*r25 - sin2*r34
3507 zout(2,nout3,j) = r + s
3508 zout(2,nout4,j) = r - s
3509 end do
3510 end do
3511 end if
3512 end do
3513 else if (now == 6) then
3514! .5_real64*sqrt(3._real64)
3515 bb=isign*0.8660254037844387_real64
3516
3517 ia=1
3518 nin1=ia-after
3519 nout1=ia-atn
3520 do ib=1,before
3521 nin1=nin1+after
3522 nin2=nin1+atb
3523 nin3=nin2+atb
3524 nin4=nin3+atb
3525 nin5=nin4+atb
3526 nin6=nin5+atb
3527 nout1=nout1+atn
3528 nout2=nout1+after
3529 nout3=nout2+after
3530 nout4=nout3+after
3531 nout5=nout4+after
3532 nout6=nout5+after
3533 do j=1,nfft
3534 r2=zin(1,j,nin3)
3535 s2=zin(2,j,nin3)
3536 r3=zin(1,j,nin5)
3537 s3=zin(2,j,nin5)
3538 r=r2 + r3
3539 s=s2 + s3
3540 r1=zin(1,j,nin1)
3541 s1=zin(2,j,nin1)
3542 ur1 = r + r1
3543 ui1 = s + s1
3544 r1=r1 - .5_real64*r
3545 s1=s1 - .5_real64*s
3546 r=r2-r3
3547 s=s2-s3
3548 ur2 = r1 - s*bb
3549 ui2 = s1 + r*bb
3550 ur3 = r1 + s*bb
3551 ui3 = s1 - r*bb
3552
3553 r2=zin(1,j,nin6)
3554 s2=zin(2,j,nin6)
3555 r3=zin(1,j,nin2)
3556 s3=zin(2,j,nin2)
3557 r=r2 + r3
3558 s=s2 + s3
3559 r1=zin(1,j,nin4)
3560 s1=zin(2,j,nin4)
3561 vr1 = r + r1
3562 vi1 = s + s1
3563 r1=r1 - .5_real64*r
3564 s1=s1 - .5_real64*s
3565 r=r2-r3
3566 s=s2-s3
3567 vr2 = r1 - s*bb
3568 vi2 = s1 + r*bb
3569 vr3 = r1 + s*bb
3570 vi3 = s1 - r*bb
3571
3572 zout(1,nout1,j)=ur1+vr1
3573 zout(2,nout1,j)=ui1+vi1
3574 zout(1,nout5,j)=ur2+vr2
3575 zout(2,nout5,j)=ui2+vi2
3576 zout(1,nout3,j)=ur3+vr3
3577 zout(2,nout3,j)=ui3+vi3
3578 zout(1,nout4,j)=ur1-vr1
3579 zout(2,nout4,j)=ui1-vi1
3580 zout(1,nout2,j)=ur2-vr2
3581 zout(2,nout2,j)=ui2-vi2
3582 zout(1,nout6,j)=ur3-vr3
3583 zout(2,nout6,j)=ui3-vi3
3584 end do
3585 end do
3586
3587 else
3588 stop 'error fftrot'
3589 end if
3590
3591 end subroutine
3592
3593
3594 ! FFT PART RELATED TO THE CONVOLUTION--------------------------------------------
3595
3596 integer function ncache_optimal()
3597 ncache_optimal = 8*1024
3598 end function ncache_optimal
3599
3600!!!HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION)
3601
3602 !!****h* BigDFT/convolxc_off
3603 !! NAME
3604 !! convolxc_off
3605 !!
3606 !! FUNCTION
3607 !! (Based on suitable modifications of S.Goedecker routines)
3608 !! Applies the local FFT space Kernel to the density in Real space.
3609 !! Does NOT calculate the LDA exchange-correlation terms
3610 !!
3611 !! SYNOPSIS
3612 !! zf: Density (input/output)
3613 !! ZF(i1,i3,i2)
3614 !! i1=1,md1 , i2=1,md2/nproc , i3=1,md3
3615 !! pot: Kernel, only the distributed part (REAL)
3616 !! POT(i1,i2,i3)
3617 !! i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
3618 !! nproc: number of processors used as returned by MPI_COMM_SIZE
3619 !! iproc: [0:nproc-1] number of processor as returned by MPI_COMM_RANK
3620 !! n1,n2,n3: logical dimension of the transform. As transform lengths
3621 !! most products of the prime factors 2,3,5 are allowed.
3622 !! The detailed table with allowed transform lengths can
3623 !! be found in subroutine CTRIG
3624 !! md1,md2,md3: Dimension of ZF
3625 !! nd1,nd2,nd3: Dimension of POT
3626 !! scal: factor of renormalization of the FFT in order to acheve unitarity
3627 !! and the correct dimension
3628 !! hgrid: grid spacing, used for integrating eharthree
3629 !! comm: MPI communicator to use
3630 !!
3631 !! RESTRICTIONS on USAGE
3632 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
3633 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
3634 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
3635 !! This file is distributed under the terms of the
3636 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
3637 !!
3638 !! AUTHORS
3639 !! S. Goedecker, L. Genovese
3640 !!
3641 !! CREATION DATE
3642 !! February 2006
3643 !!
3644 !! SOURCE
3645 !!
3646 subroutine convolxc_off(n1,n2,n3,nd1,nd2,nd3,md1,md2,md3, &
3647 nproc,iproc,pot,zf,scal,comm)
3648 integer, intent(in) :: n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc
3649 real(real64), intent(in) :: scal
3650 real(real64), dimension(nd1,nd2,nd3/nproc), intent(in) :: pot
3651 real(real64), dimension(md1,md3,md2/nproc), intent(inout) :: zf
3652 type(mpi_comm), intent(in) :: comm
3653
3654 integer :: ncache,lzt,lot,ma,mb,nfft,ic1,ic2,ic3,Jp2stb,J2stb,Jp2stf,J2stf
3655 integer :: j2,j3,i1,i3,i,j,inzee
3656#if defined(HAVE_MPI)
3657 integer :: ierr
3658#endif
3659 real(real64) :: twopion
3660 !work arrays for transpositions
3661 real(real64), allocatable :: zt(:,:,:)
3662 !work arrays for MPI
3663 real(real64), allocatable :: zmpi1(:,:,:,:,:)
3664 real(real64), allocatable :: zmpi2(:,:,:,:)
3665 !cache work array
3666 real(real64), allocatable :: zw(:,:,:)
3667 !FFT work arrays
3668 real(real64), allocatable :: cosinarr(:,:)
3669 real(real64) :: btrig1(2,8192)
3670 real(real64) :: ftrig1(2,8192)
3671 integer :: after1(7)
3672 integer :: now1(7)
3673 integer :: before1(7)
3674 real(real64) :: btrig2(2,8192)
3675 real(real64) :: ftrig2(2,8192)
3676 integer :: after2(7)
3677 integer :: now2(7)
3678 integer :: before2(7)
3679 real(real64) :: btrig3(2,8192)
3680 real(real64) :: ftrig3(2,8192)
3681 integer :: after3(7)
3682 integer :: now3(7)
3683 integer :: before3(7)
3684
3685 call profiling_in("SG_PCONV")
3686
3687 ! check input
3688 if (mod(n1,2) /= 0) stop 'Parallel convolution:ERROR:n1'
3689 if (mod(n2,2) /= 0) stop 'Parallel convolution:ERROR:n2'
3690 if (mod(n3,2) /= 0) stop 'Parallel convolution:ERROR:n3'
3691 if (nd1 < n1/2+1) stop 'Parallel convolution:ERROR:nd1'
3692 if (nd2 < n2/2+1) stop 'Parallel convolution:ERROR:nd2'
3693 if (nd3 < n3/2+1) stop 'Parallel convolution:ERROR:nd3'
3694 if (md1 < n1/2) stop 'Parallel convolution:ERROR:md1'
3695 if (md2 < n2/2) stop 'Parallel convolution:ERROR:md2'
3696 if (md3 < n3/2) stop 'Parallel convolution:ERROR:md3'
3697 if (mod(nd3,nproc) /= 0) stop 'Parallel convolution:ERROR:nd3'
3698 if (mod(md2,nproc) /= 0) stop 'Parallel convolution:ERROR:md2'
3699
3700 !defining work arrays dimensions
3701 ncache = ncache_optimal()
3702 if (ncache <= max(n1,n2,n3/2)*4) ncache=max(n1,n2,n3/2)*4
3703
3704 ! if (timing_flag == 1 .and. iproc ==0) print *,'parallel ncache=',ncache
3705
3706 lzt = n2/2
3707 if (mod(n2/2,2) == 0) lzt=lzt+1
3708 if (mod(n2/2,4) == 0) lzt=lzt+1
3709
3710 safe_allocate(zw(1:2, 1:ncache/4, 1:2))
3711 safe_allocate(zt(1:2, 1:lzt, 1:n1))
3712 safe_allocate(zmpi2(1:2, 1:n1, 1:md2/nproc, 1:nd3))
3713 safe_allocate(cosinarr(1:2, 1:n3/2))
3714
3715 if (nproc > 1) then
3716 safe_allocate(zmpi1(1:2, 1:n1, 1:md2/nproc, 1:nd3/nproc, 1:nproc))
3717 end if
3718
3719 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
3720 call ctrig(n3/2,btrig3,after3,before3,now3,1,ic3)
3721 call ctrig(n1,btrig1,after1,before1,now1,1,ic1)
3722 call ctrig(n2,btrig2,after2,before2,now2,1,ic2)
3723 do j=1,n1
3724 ftrig1(1,j)= btrig1(1,j)
3725 ftrig1(2,j)=-btrig1(2,j)
3726 end do
3727 do j=1,n2
3728 ftrig2(1,j)= btrig2(1,j)
3729 ftrig2(2,j)=-btrig2(2,j)
3730 end do
3731 do j=1,n3
3732 ftrig3(1,j)= btrig3(1,j)
3733 ftrig3(2,j)=-btrig3(2,j)
3734 end do
3735
3736 !Calculating array of phases for HalFFT decoding
3737 twopion=8._real64*datan(1.0_8)/real(n3, real64)
3738 do i3 = 1,n3/2
3739 cosinarr(1,i3)=dcos(twopion*(i3-1))
3740 cosinarr(2,i3)=-dsin(twopion*(i3-1))
3741 end do
3742
3743 !initializing integral
3744!!! ehartree=0.0_8
3745
3746 ! transform along z axis
3747 lot=ncache/(2*n3)
3748 if (lot < 1) then
3749 write(6,*) &
3750 'convolxc_off:ncache has to be enlarged to be able to hold at' // &
3751 'least one 1-d FFT of this size even though this will' // &
3752 'reduce the performance for shorter transform lengths'
3753 stop
3754 end if
3755
3756 do j2=1,md2/nproc
3757 !this condition ensures that we manage only the interesting part for the FFT
3758 if (iproc*(md2/nproc)+j2 <= n2/2) then
3759 do i1 = 1,(n1/2),lot
3760 ma=i1
3761 mb=min(i1+(lot-1),(n1/2))
3762 nfft=mb-ma+1
3763
3764 !inserting real data into complex array of half length
3765 call halfill_upcorn(md1,md3,lot,nfft,n3,zf(i1,1,j2),zw(1,1,1))
3766
3767 !performing FFT
3768 !input: I1,I3,J2,(Jp2)
3769 inzee=1
3770 do i = 1,ic3
3771 call fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), &
3772 btrig3,after3(i),now3(i),before3(i),1)
3773 inzee=3-inzee
3774 end do
3775 !output: I1,i3,J2,(Jp2)
3776
3777 !unpacking FFT in order to restore correct result,
3778 !while exchanging components
3779 !input: I1,i3,J2,(Jp2)
3780 call scramble_unpack(i1,j2,lot,nfft,n1/2,n3,md2,nproc,nd3,zw(1,1,inzee),zmpi2,cosinarr)
3781 !output: I1,J2,i3,(Jp2)
3782 end do
3783 end if
3784 end do
3785
3786 !Interprocessor data transposition
3787 !input: I1,J2,j3,jp3,(Jp2)
3788 if (nproc > 1) then
3789 call profiling_in("SG_ALLTOALL")
3790 !communication scheduling
3791#if defined(HAVE_MPI)
3792 call mpi_alltoall(zmpi2(1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3793 mpi_double_precision, &
3794 zmpi1(1, 1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3795 mpi_double_precision, comm, ierr)
3796#endif
3797 call profiling_out("SG_ALLTOALL")
3798 end if
3799 !output: I1,J2,j3,Jp2,(jp3)
3800
3801 !now each process perform complete convolution of its planes
3802 do j3=1,nd3/nproc
3803 !this condition ensures that we manage only the interesting part for the FFT
3804 if (iproc*(nd3/nproc)+j3 <= n3/2+1) then
3805 jp2stb=1
3806 j2stb=1
3807 jp2stf=1
3808 j2stf=1
3809
3810 ! transform along x axis
3811 lot=ncache/(4*n1)
3812 if (lot < 1) then
3813 write(6,*) &
3814 'convolxc_off:ncache has to be enlarged to be able to hold at' // &
3815 'least one 1-d FFT of this size even though this will' // &
3816 'reduce the performance for shorter transform lengths'
3817 stop
3818 end if
3819
3820 do j = 1,n2/2,lot
3821 ma=j
3822 mb=min(j+(lot-1),n2/2)
3823 nfft=mb-ma+1
3824
3825 !reverse index ordering, leaving the planes to be transformed at the end
3826 !input: I1,J2,j3,Jp2,(jp3)
3827 if (nproc == 1) then
3828 call mpiswitch_upcorn(j3,nfft,jp2stb,j2stb,lot,n1,md2,nd3,nproc,zmpi2,zw(1,1,1))
3829 else
3830 call mpiswitch_upcorn(j3,nfft,jp2stb,j2stb,lot,n1,md2,nd3,nproc,zmpi1,zw(1,1,1))
3831 end if
3832 !output: J2,Jp2,I1,j3,(jp3)
3833
3834 !performing FFT
3835 !input: I2,I1,j3,(jp3)
3836 inzee=1
3837 do i = 1,ic1-1
3838 call fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), &
3839 btrig1,after1(i),now1(i),before1(i),1)
3840 inzee=3-inzee
3841 end do
3842
3843 !storing the last step into zt array
3844 i=ic1
3845 call fftstp(lot,nfft,n1,lzt,n1,zw(1,1,inzee),zt(1,j,1), &
3846 btrig1,after1(i),now1(i),before1(i),1)
3847 !output: I2,i1,j3,(jp3)
3848 end do
3849
3850 !transform along y axis
3851 lot=ncache/(4*n2)
3852 if (lot < 1) then
3853 write(6,*) &
3854 'convolxc_off:ncache has to be enlarged to be able to hold at' // &
3855 'least one 1-d FFT of this size even though this will' // &
3856 'reduce the performance for shorter transform lengths'
3857 stop
3858 end if
3859
3860 do j = 1,n1,lot
3861 ma=j
3862 mb=min(j+(lot-1),n1)
3863 nfft=mb-ma+1
3864
3865 !reverse ordering
3866 !input: I2,i1,j3,(jp3)
3867 call switch_upcorn(nfft,n2,lot,n1,lzt,zt(1,1,j),zw(1,1,1))
3868 !output: i1,I2,j3,(jp3)
3869
3870 !performing FFT
3871 !input: i1,I2,j3,(jp3)
3872 inzee=1
3873 do i = 1,ic2
3874 call fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), &
3875 btrig2,after2(i),now2(i),before2(i),1)
3876 inzee=3-inzee
3877 end do
3878 !output: i1,i2,j3,(jp3)
3879
3880 !Multiply with kernel in fourier space
3881 call multkernel(nd1,nd2,n1,n2,lot,nfft,j,pot(1,1,j3),zw(1,1,inzee))
3882
3883 !TRANSFORM BACK IN REAL SPACE
3884
3885 !transform along y axis
3886 !input: i1,i2,j3,(jp3)
3887 do i = 1,ic2
3888 call fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), &
3889 ftrig2,after2(i),now2(i),before2(i),-1)
3890 inzee=3-inzee
3891 end do
3892
3893 !reverse ordering
3894 !input: i1,I2,j3,(jp3)
3895 call unswitch_downcorn(nfft,n2,lot,n1,lzt,zw(1,1,inzee),zt(1,1,j))
3896 !output: I2,i1,j3,(jp3)
3897 end do
3898
3899 !transform along x axis
3900 !input: I2,i1,j3,(jp3)
3901 lot=ncache/(4*n1)
3902 do j = 1,n2/2,lot
3903 ma=j
3904 mb=min(j+(lot-1),n2/2)
3905 nfft=mb-ma+1
3906
3907 !performing FFT
3908 i=1
3909 call fftstp(lzt,nfft,n1,lot,n1,zt(1,j,1),zw(1,1,1), &
3910 ftrig1,after1(i),now1(i),before1(i),-1)
3911
3912 inzee=1
3913 do i=2,ic1
3914 call fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), &
3915 ftrig1,after1(i),now1(i),before1(i),-1)
3916 inzee=3-inzee
3917 end do
3918 !output: I2,I1,j3,(jp3)
3919
3920 !reverse ordering
3921 !input: J2,Jp2,I1,j3,(jp3)
3922 if (nproc == 1) then
3923 call unmpiswitch_downcorn(j3,nfft,jp2stf,j2stf,lot,n1,md2,nd3,nproc,zw(1,1,inzee),zmpi2)
3924 else
3925 call unmpiswitch_downcorn(j3,nfft,jp2stf,j2stf,lot,n1,md2,nd3,nproc,zw(1,1,inzee),zmpi1)
3926 end if
3927 ! output: I1,J2,j3,Jp2,(jp3)
3928 end do
3929 end if
3930 end do
3931
3932
3933 !Interprocessor data transposition
3934 !input: I1,J2,j3,Jp2,(jp3)
3935 if (nproc > 1) then
3936 call profiling_in("SG_ALLTOALL")
3937 !communication scheduling
3938#if defined(HAVE_MPI)
3939 call mpi_alltoall(zmpi1(1, 1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3940 mpi_double_precision, &
3941 zmpi2(1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3942 mpi_double_precision, comm, ierr)
3943 !output: I1,J2,j3,jp3,(Jp2)
3944#endif
3945 call profiling_out("SG_ALLTOALL")
3946 end if
3947
3948 !transform along z axis
3949 !input: I1,J2,i3,(Jp2)
3950 lot=ncache/(2*n3)
3951 do j2=1,md2/nproc
3952 !this condition ensures that we manage only the interesting part for the FFT
3953 if (iproc*(md2/nproc)+j2 <= n2/2) then
3954 do i1 = 1,(n1/2),lot
3955 ma=i1
3956 mb=min(i1+(lot-1),(n1/2))
3957 nfft=mb-ma+1
3958
3959 !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
3960 !input: I1,J2,i3,(Jp2)
3961 call unscramble_pack(i1,j2,lot,nfft,n1/2,n3,md2,nproc,nd3,zmpi2,zw(1,1,1),cosinarr)
3962 !output: I1,i3,J2,(Jp2)
3963
3964 !performing FFT
3965 !input: I1,i3,J2,(Jp2)
3966 inzee=1
3967 do i = 1,ic3
3968 call fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), &
3969 ftrig3,after3(i),now3(i),before3(i),-1)
3970 inzee=3-inzee
3971 end do
3972 !output: I1,I3,J2,(Jp2)
3973
3974 !calculates the Hartree energy locally and rebuild the output array
3975 call unfill_downcorn(md1, md3, lot, nfft, n3, zw(1,1,inzee), zf(i1,1,j2), scal)
3976
3977 end do
3978 end if
3979 end do
3980
3981 safe_deallocate_a(zmpi2)
3982 safe_deallocate_a(zw)
3983 safe_deallocate_a(zt)
3984 safe_deallocate_a(cosinarr)
3985 safe_deallocate_a(zmpi1)
3986
3987 call profiling_out("SG_PCONV")
3988
3989
3990 end subroutine convolxc_off
3991
3992
3993 !!****h* BigDFT/multkernel
3994 !! NAME
3995 !! multkernel
3996 !!
3997 !! FUNCTION
3998 !! (Based on suitable modifications of S.Goedecker routines)
3999 !! Multiply with the kernel taking into account its symmetry
4000 !! Conceived to be used into convolution loops
4001 !!
4002 !! SYNOPSIS
4003 !! pot: Kernel, symmetric and real, half the length
4004 !! zw: Work array (input/output)
4005 !! n1,n2: logical dimension of the FFT transform, reference for zw
4006 !! nd1,nd2: Dimensions of POT
4007 !! jS, nfft: starting point of the plane and number of remaining lines
4008 !!
4009 !! RESTRICTIONS on USAGE
4010 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4011 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4012 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4013 !! This file is distributed under the terms of the
4014 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4015 !!
4016 !! AUTHORS
4017 !! S. Goedecker, L. Genovese
4018 !!
4019 !! CREATION DATE
4020 !! February 2006
4021 !!
4022 !! SOURCE
4023 !!
4024 subroutine multkernel(nd1,nd2,n1,n2,lot,nfft,jS,pot,zw)
4025 integer, intent(in) :: nd1,nd2,n1,n2,lot,nfft,js
4026 real(real64), dimension(nd1,nd2), intent(in) :: pot
4027 real(real64), dimension(2,lot,n2), intent(inout) :: zw
4028
4029 !Local variables
4030 integer :: j,j1,i2,j2
4031
4032 !case i2=1
4033 do j = 1,nfft
4034 j1=n1/2+1-abs(n1/2+2-js-j)!this stands for j1=min(jS-1+j,n1+3-jS-j)
4035 zw(1,j,1)=zw(1,j,1)*pot(j1,1)
4036 zw(2,j,1)=zw(2,j,1)*pot(j1,1)
4037 end do
4038
4039 !generic case
4040 do i2=2,n2/2
4041 do j = 1,nfft
4042 j1=n1/2+1-abs(n1/2+2-js-j)
4043 j2=n2+2-i2
4044 zw(1,j,i2)=zw(1,j,i2)*pot(j1,i2)
4045 zw(2,j,i2)=zw(2,j,i2)*pot(j1,i2)
4046 zw(1,j,j2)=zw(1,j,j2)*pot(j1,i2)
4047 zw(2,j,j2)=zw(2,j,j2)*pot(j1,i2)
4048 end do
4049 end do
4050
4051 !case i2=n2/2+1
4052 do j = 1,nfft
4053 j1=n1/2+1-abs(n1/2+2-js-j)
4054 j2=n2/2+1
4055 zw(1,j,j2)=zw(1,j,j2)*pot(j1,j2)
4056 zw(2,j,j2)=zw(2,j,j2)*pot(j1,j2)
4057 end do
4058
4059 end subroutine multkernel
4060
4061
4062 subroutine switch_upcorn(nfft,n2,lot,n1,lzt,zt,zw)
4063 integer :: lot, n1, n2, lzt, j, nfft, i
4064 real(real64) :: zw(2,lot,n2), zt(2,lzt,n1)
4065
4066 ! WARNING: Assuming that high frequencies are in the corners
4067 ! and that n2 is multiple of 2
4068
4069 ! Low frequencies
4070 do j = 1, nfft
4071 do i = n2/2 + 1, n2
4072 zw(1,j,i)=zt(1,i-n2/2,j)
4073 zw(2,j,i)=zt(2,i-n2/2,j)
4074 end do
4075 end do
4076
4077 ! High frequencies
4078 do i = 1, n2/2
4079 do j = 1, nfft
4080 zw(1,j,i)=0.0_8
4081 zw(2,j,i)=0.0_8
4082 end do
4083 end do
4084
4085 end subroutine switch_upcorn
4086
4087 ! ----------------------------------------------------------------------------
4088
4089 subroutine mpiswitch_upcorn(j3,nfft,Jp2stb,J2stb,lot,n1,md2,nd3,nproc,zmpi1,zw)
4090 integer :: n1, nd3, nproc, lot, mfft, jp2, jp2stb, j2
4091 integer :: j2stb, nfft, i1, j3, md2
4092 real(real64) :: zmpi1(2,n1/2,md2/nproc,nd3/nproc,nproc),zw(2,lot,n1)
4093
4094 ! WARNING: Assuming that high frequencies are in the corners
4095 ! and that n1 is multiple of 2
4096
4097 mfft=0
4098 do jp2 = jp2stb, nproc
4099 do j2 = j2stb, md2/nproc
4100 mfft = mfft + 1
4101 if (mfft > nfft) then
4102 jp2stb=jp2
4103 j2stb=j2
4104 return
4105 end if
4106 do i1=1,n1/2
4107 zw(1,mfft,i1)=0.0_8
4108 zw(2,mfft,i1)=0.0_8
4109 end do
4110 do i1=n1/2+1,n1
4111 zw(1,mfft,i1)=zmpi1(1,i1-n1/2,j2,j3,jp2)
4112 zw(2,mfft,i1)=zmpi1(2,i1-n1/2,j2,j3,jp2)
4113 end do
4114 end do
4115 j2stb=1
4116 end do
4118 end subroutine mpiswitch_upcorn
4119
4120 ! -------------------------------------------------------------------
4121
4122 subroutine halfill_upcorn(md1,md3,lot,nfft,n3,zf,zw)
4123 integer :: lot, n3, md1, md3, i3, i1, nfft
4124 real(real64) :: zw(2,lot,n3/2),zf(md1,md3)
4125
4126 ! WARNING: Assuming that high frequencies are in the corners
4127 ! and that n3 is multiple of 4
4128 !in principle we can relax this condition
4129
4130 do i3 = 1, n3/4
4131 do i1 = 1,nfft
4132 zw(1,i1,i3)=0.0_8
4133 zw(2,i1,i3)=0.0_8
4134 end do
4135 end do
4136
4137 do i3 = n3/4+1,n3/2
4138 do i1 = 1,nfft
4139 zw(1,i1,i3)=zf(i1,2*i3-1-n3/2)
4140 zw(2,i1,i3)=zf(i1,2*i3-n3/2)
4141 end do
4142 end do
4143
4144 end subroutine halfill_upcorn
4145
4146 ! -------------------------------------------------------------------
4147 !!****h* BigDFT/scramble_unpack
4148 !! NAME
4149 !! scramble_unpack
4150 !!
4151 !! FUNCTION
4152 !! (Based on suitable modifications of S.Goedecker routines)
4153 !! Assign the correct planes to the work array zmpi2
4154 !! in order to prepare for interprocessor data transposition.
4155 !! In the meanwhile, it unpacks the data of the HalFFT in order to prepare for
4156 !! multiplication with the kernel
4157 !!
4158 !! SYNOPSIS
4159 !! zmpi2: Work array for multiprocessor manipulation (output)
4160 !! zw: Work array (input)
4161 !! cosinarr: Array of the phases needed for unpacking
4162 !! n1,n3: logical dimension of the FFT transform, reference for work arrays
4163 !! md2,nd3: Dimensions of real grid and of the kernel, respectively
4164 !! i1,j2,lot,nfft: Starting points of the plane and number of remaining lines
4165 !!
4166 !! RESTRICTIONS on USAGE
4167 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4168 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4169 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4170 !! This file is distributed under the terms of the
4171 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4172 !!
4173 !! AUTHORS
4174 !! S. Goedecker, L. Genovese
4175 !!
4176 !! CREATION DATE
4177 !! February 2006
4178 !!
4179 !! SOURCE
4180 !!
4181 subroutine scramble_unpack(i1,j2,lot,nfft,n1,n3,md2,nproc,nd3,zw,zmpi2,cosinarr)
4182 integer, intent(in) :: i1,j2,lot,nfft,n1,n3,md2,nproc,nd3
4183 real(real64), dimension(2,lot,n3/2), intent(in) :: zw
4184 real(real64), dimension(2,n3/2), intent(in) :: cosinarr
4185 real(real64), dimension(2,n1,md2/nproc,nd3), intent(out) :: zmpi2
4186
4187 integer :: i3,i,ind1,ind2
4188 real(real64) :: a,b,c,d,cp,sp,feR,feI,foR,foI,fR,fI
4189
4190 !case i3=1 and i3=n3/2+1
4191 do i=0,nfft-1
4192 a=zw(1,i+1,1)
4193 b=zw(2,i+1,1)
4194 zmpi2(1,i1+i,j2,1)=a+b
4195 zmpi2(2,i1+i,j2,1)=0.0_8
4196 zmpi2(1,i1+i,j2,n3/2+1)=a-b
4197 zmpi2(2,i1+i,j2,n3/2+1)=0.0_8
4198 end do
4199
4200 !case 2<=i3<=n3/2
4201 do i3 = 2,n3/2
4202 ind1 = i3
4203 ind2 = n3/2-i3+2
4204 cp = cosinarr(1,i3)
4205 sp = cosinarr(2,i3)
4206 do i = 0, nfft-1
4207 a = zw(1,i+1,ind1)
4208 b = zw(2,i+1,ind1)
4209 c = zw(1,i+1,ind2)
4210 d = zw(2,i+1,ind2)
4211 fer = .5_real64*(a+c)
4212 fei = .5_real64*(b-d)
4213 for = .5_real64*(a-c)
4214 foi = .5_real64*(b+d)
4215 fr = fer+cp*foi-sp*for
4216 fi = fei-cp*for-sp*foi
4217 zmpi2(1,i1+i,j2,ind1) = fr
4218 zmpi2(2,i1+i,j2,ind1) = fi
4219 end do
4220 end do
4221
4222 end subroutine scramble_unpack
4223
4224
4225 !!****h* BigDFT/unscramble_pack
4226 !! NAME
4227 !! unscramble_pack
4228 !!
4229 !! FUNCTION
4230 !! (Based on suitable modifications of S.Goedecker routines)
4231 !! Insert the correct planes of the work array zmpi2
4232 !! in order to prepare for backward FFT transform
4233 !! In the meanwhile, it packs the data in order to be transformed with the HalFFT
4234 !! procedure
4235 !!
4236 !! SYNOPSIS
4237 !! zmpi2: Work array for multiprocessor manipulation (input)
4238 !! zw: Work array (output)
4239 !! cosinarr: Array of the phases needed for packing
4240 !! n1,n3: logical dimension of the FFT transform, reference for work arrays
4241 !! md2,nd3: Dimensions of real grid and of the kernel, respectively
4242 !! i1,j2,lot,nfft: Starting points of the plane and number of remaining lines
4243 !!
4244 !! RESTRICTIONS on USAGE
4245 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4246 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4247 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4248 !! This file is distributed under the terms of the
4249 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4250 !!
4251 !! AUTHORS
4252 !! S. Goedecker, L. Genovese
4253 !!
4254 !! CREATION DATE
4255 !! February 2006
4256 !!
4257 !! SOURCE
4258 !!
4259 subroutine unscramble_pack(i1,j2,lot,nfft,n1,n3,md2,nproc,nd3,zmpi2,zw,cosinarr)
4260 integer, intent(in) :: i1,j2,lot,nfft,n1,n3,md2,nproc,nd3
4261 real(real64), dimension(2,lot,n3/2), intent(out) :: zw
4262 real(real64), dimension(2,n3/2), intent(in) :: cosinarr
4263 real(real64), dimension(2,n1,md2/nproc,nd3), intent(in) :: zmpi2
4264
4265 integer :: i3,i,indA,indB
4266 real(real64) :: a,b,c,d,cp,sp,re,ie,ro,io,rh,ih
4267
4268 do i3 = 1,n3/2
4269 inda=i3
4270 indb=n3/2+2-i3
4271 cp=cosinarr(1,i3)
4272 sp=cosinarr(2,i3)
4273 do i=0,nfft-1
4274 a=zmpi2(1,i1+i,j2,inda)
4275 b=zmpi2(2,i1+i,j2,inda)
4276 c= zmpi2(1,i1+i,j2,indb)
4277 d=-zmpi2(2,i1+i,j2,indb)
4278 re=(a+c)
4279 ie=(b+d)
4280 ro=(a-c)*cp-(b-d)*sp
4281 io=(a-c)*sp+(b-d)*cp
4282 rh=re-io
4283 ih=ie+ro
4284 zw(1,i+1,inda)=rh
4285 zw(2,i+1,inda)=ih
4286 end do
4287 end do
4288
4289 end subroutine unscramble_pack
4290
4291 ! --------------------------------------------------------------------
4292
4293 subroutine unswitch_downcorn(nfft,n2,lot,n1,lzt,zw,zt)
4294 integer :: lot, n2, lzt, n1, j, nfft, i
4295 real(real64) :: zw(2,lot,n2),zt(2,lzt,n1)
4296 ! WARNING: Assuming that high frequencies are in the corners
4297 ! and that n2 is multiple of 2
4298
4299 ! Low frequencies
4300 do j = 1, nfft
4301 do i = 1, n2/2
4302 zt(1,i,j)=zw(1,j,i)
4303 zt(2,i,j)=zw(2,j,i)
4304 end do
4305 end do
4306
4307 end subroutine unswitch_downcorn
4308
4309
4310 subroutine unmpiswitch_downcorn(j3,nfft,Jp2stf,J2stf,lot,n1,md2,nd3,nproc,zw,zmpi1)
4311 integer :: n1, md2, nproc, nd3, lot, mfft, i1, j3
4312 integer :: jp2, j2, nfft, jp2stf, j2stf
4313 real(real64) :: zmpi1(2,n1/2,md2/nproc,nd3/nproc,nproc),zw(2,lot,n1)
4314 ! WARNING: Assuming that high frequencies are in the corners
4315 ! and that n1 is multiple of 2
4316
4317 mfft=0
4318 do jp2=jp2stf,nproc
4319 do j2 = j2stf, md2/nproc
4320 mfft=mfft+1
4321 if (mfft > nfft) then
4322 jp2stf=jp2
4323 j2stf=j2
4324 return
4325 end if
4326 do i1 = 1, n1/2
4327 zmpi1(1,i1,j2,j3,jp2)=zw(1,mfft,i1)
4328 zmpi1(2,i1,j2,j3,jp2)=zw(2,mfft,i1)
4329 end do
4330 end do
4331 j2stf=1
4332 end do
4333 end subroutine unmpiswitch_downcorn
4334
4335
4336 !!****h* BigDFT/unfill_downcorn
4337 !! NAME
4338 !! unfill_downcorn
4339 !!
4340 !! FUNCTION
4341 !! (Based on suitable modifications of S.Goedecker routines)
4342 !! Restore data into output array, calculating in the meanwhile
4343 !! Hartree energy of the potential
4344 !!
4345 !! SYNOPSIS
4346 !! zf: Original distributed density as well as
4347 !! Distributed solution of the poisson equation (inout)
4348 !! zw: FFT work array
4349 !! n3: (twice the) dimension of the last FFTtransform.
4350 !! md1,md3: Dimensions of the undistributed part of the real grid
4351 !! nfft: number of planes
4352 !! scal: Needed to achieve unitarity and correct dimensions
4353 !!
4354 !! WARNING
4355 !! Assuming that high frequencies are in the corners
4356 !! and that n3 is multiple of 4
4357 !!
4358 !! RESTRICTIONS on USAGE
4359 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4360 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4361 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4362 !! This file is distributed under the terms of the
4363 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4364 !!
4365 !! AUTHORS
4366 !! S. Goedecker, L. Genovese
4367 !!
4368 !! CREATION DATE
4369 !! February 2006
4370 !!
4371 !! SOURCE
4372 !!
4373 subroutine unfill_downcorn(md1,md3,lot,nfft,n3,zw,zf, scal)
4374 integer, intent(in) :: md1,md3,lot,nfft,n3
4375 real(real64), dimension(2,lot,n3/2), intent(in) :: zw
4376 real(real64), dimension(md1,md3),intent(inout) :: zf
4377 real(real64), intent(in) :: scal
4378
4379 integer :: i3,i1
4380 real(real64) :: pot1
4381
4382 do i3 = 1,n3/4
4383 do i1 = 1,nfft
4384 pot1 = scal*zw(1,i1,i3)
4385 zf(i1, 2*i3 - 1) = pot1
4386 pot1 = scal*zw(2, i1, i3)
4387 zf(i1, 2*i3) = pot1
4388 end do
4389 end do
4390
4391 end subroutine unfill_downcorn
4392
4393! FFT PART RELATED TO THE KERNEL -----------------------------------------------------------------
4394!!****h* BigDFT/kernelfft
4395!! NAME
4396!! kernelfft
4397!!
4398!! FUNCTION
4399!! (Based on suitable modifications of S.Goedecker routines)
4400!! Calculates the FFT of the distributed kernel
4401!!
4402!! SYNOPSIS
4403!! zf: Real kernel (input)
4404!! zf(i1,i2,i3)
4405!! i1=1,nd1 , i2=1,nd2/nproc , i3=1,nd3
4406!! zr: Distributed Kernel FFT
4407!! zr(2,i1,i2,i3)
4408!! i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
4409!! nproc: number of processors used as returned by MPI_COMM_SIZE
4410!! iproc: [0:nproc-1] number of processor as returned by MPI_COMM_RANK
4411!! n1,n2,n3: logical dimension of the transform. As transform lengths
4412!! most products of the prime factors 2,3,5 are allowed.
4413!! The detailed table with allowed transform lengths can
4414!! be found in subroutine CTRIG
4415!! nd1,nd2,nd3: Dimensions of zr
4416!! comm: MPI communicator to use
4417!!
4418!! RESTRICTIONS on USAGE
4419!! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4420!! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4421!! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4422!! This file is distributed under the terms of the
4423!! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4424!!
4425!! AUTHORS
4426!! S. Goedecker, L. Genovese
4427!!
4428!! CREATION DATE
4429!! February 2006
4430!!
4431!! SOURCE
4432!!
4433 subroutine kernelfft(n1,n2,n3,nd1,nd2,nd3,nproc,iproc,zf,zr,comm)
4434 integer, intent(in) :: n1,n2,n3,nd1,nd2,nd3,nproc,iproc
4435 real(real64), dimension(nd1,n3,nd2/nproc), intent(in) :: zf
4436 real(real64), dimension(2,nd1,nd2,nd3/nproc), intent(out) :: zr
4437 type(mpi_comm), intent(in) :: comm
4438
4439 !Local variables
4440 integer :: ncache,lzt,lot,ma,mb,nfft,ic1,ic2,ic3,Jp2st,J2st
4441 integer :: j2,j3,i1,i3,i,j,inzee
4442#if defined(HAVE_MPI)
4443 integer :: ierr
4444#endif
4445 real(real64) :: twopion
4446 !work arrays for transpositions
4447 real(real64), dimension(:,:,:), allocatable :: zt
4448 !work arrays for MPI
4449 real(real64), dimension(:,:,:,:,:), allocatable :: zmpi1
4450 real(real64), dimension(:,:,:,:), allocatable :: zmpi2
4451 !cache work array
4452 real(real64), dimension(:,:,:), allocatable :: zw
4453 !FFT work arrays
4454 real(real64), dimension(:,:), allocatable :: trig1,trig2,trig3,cosinarr
4455 integer, dimension(:), allocatable :: after1,now1,before1, &
4456 after2,now2,before2,after3,now3,before3
4457
4458 !check input
4459 if (nd1 < n1) stop 'ERROR:nd1'
4460 if (nd2 < n2) stop 'ERROR:nd2'
4461 if (nd3 < n3/2+1) stop 'ERROR:nd3'
4462 if (mod(nd3,nproc) /= 0) stop 'ERROR:nd3'
4463 if (mod(nd2,nproc) /= 0) stop 'ERROR:nd2'
4464
4465 !defining work arrays dimensions
4467 if (ncache <= max(n1,n2,n3/2)*4) ncache=max(n1,n2,n3/2)*4
4468 lzt=n2
4469 if (mod(n2,2) == 0) lzt=lzt+1
4470 if (mod(n2,4) == 0) lzt=lzt+1
4471
4472 !Allocations
4473 safe_allocate(trig1(1:2,1:8192))
4474 safe_allocate(after1(1:7))
4475 safe_allocate(now1(1:7))
4476 safe_allocate(before1(1:7))
4477 safe_allocate(trig2(1:2,1:8192))
4478 safe_allocate(after2(1:7))
4479 safe_allocate(now2(1:7))
4480 safe_allocate(before2(1:7))
4481 safe_allocate(trig3(1:2,1:8192))
4482 safe_allocate(after3(1:7))
4483 safe_allocate(now3(1:7))
4484 safe_allocate(before3(1:7))
4485 safe_allocate(zw(1:2,1:ncache/4,1:2))
4486 safe_allocate(zt(1:2,1:lzt,1:n1))
4487 safe_allocate(zmpi2(1:2,1:n1,1:nd2/nproc,1:nd3))
4488 safe_allocate(cosinarr(1:2,1:n3/2))
4489 if (nproc > 1) then
4490 safe_allocate(zmpi1(1:2,1:n1,1:nd2/nproc,1:nd3/nproc,1:nproc))
4491 end if
4492
4493 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
4494 call ctrig(n3/2,trig3,after3,before3,now3,1,ic3)
4495 call ctrig(n1,trig1,after1,before1,now1,1,ic1)
4496 call ctrig(n2,trig2,after2,before2,now2,1,ic2)
4497
4498 !Calculating array of phases for HalFFT decoding
4499 twopion=8._real64*datan(1.0_8)/real(n3, real64)
4500 do i3 = 1,n3/2
4501 cosinarr(1,i3)=dcos(twopion*(i3-1))
4502 cosinarr(2,i3)=-dsin(twopion*(i3-1))
4503 end do
4504
4505 !transform along z axis
4506
4507 lot=ncache/(2*n3)
4508 if (lot < 1) stop 'kernelfft:enlarge ncache for z'
4509
4510 do j2=1,nd2/nproc
4511 !this condition ensures that we manage only the interesting part for the FFT
4512 ! if (iproc*(nd2/nproc)+j2 <= n2) then
4513 do i1 = 1,n1,lot
4514 ma=i1
4515 mb=min(i1+(lot-1),n1)
4516 nfft = mb-ma+1
4517
4518 !inserting real data into complex array of half length
4519 !input: I1,I3,J2,(Jp2)
4520 call inserthalf(nd1,lot,nfft,n3,zf(i1,1,j2),zw(1,1,1))
4521
4522 !performing FFT
4523 inzee = 1
4524 do i = 1, ic3
4525 call fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), &
4526 trig3,after3(i),now3(i),before3(i),1)
4527 inzee = 3 - inzee
4528 end do
4529 !output: I1,i3,J2,(Jp2)
4530
4531 !unpacking FFT in order to restore correct result,
4532 !while exchanging components
4533 !input: I1,i3,J2,(Jp2)
4534 call scramble_unpack(i1,j2,lot,nfft,n1,n3,nd2,nproc,nd3,zw(1,1,inzee),zmpi2,cosinarr)
4535 !output: I1,J2,i3,(Jp2)
4536 end do
4537! end if
4538 end do
4539
4540 !Interprocessor data transposition
4541 !input: I1,J2,j3,jp3,(Jp2)
4542 if (nproc > 1) then
4543 !communication scheduling
4544#if defined(HAVE_MPI)
4545 call mpi_alltoall(zmpi2(:, 1, 1, 1),2*n1*(nd2/nproc)*(nd3/nproc), &
4546 mpi_double_precision, &
4547 zmpi1(:, 1, 1, 1, 1),2*n1*(nd2/nproc)*(nd3/nproc), &
4548 mpi_double_precision,comm,ierr)
4549 ! output: I1,J2,j3,Jp2,(jp3)
4550#endif
4551 end if
4552
4553
4554 do j3 = 1, nd3/nproc
4555 !this condition ensures that we manage only the interesting part for the FFT
4556 if (iproc*(nd3/nproc)+j3 <= n3/2+1) then
4557 jp2st = 1
4558 j2st = 1
4559
4560 !transform along x axis
4561 lot = ncache/(4*n1)
4562 if (lot < 1) stop 'kernelfft:enlarge ncache for x'
4563
4564 do j = 1, n2, lot
4565 ma = j
4566 mb = min(j+(lot-1),n2)
4567 nfft = mb-ma+1
4568
4569 !reverse ordering
4570 !input: I1,J2,j3,Jp2,(jp3)
4571 if (nproc == 1) then
4572 call mpiswitch(j3,nfft,jp2st,j2st,lot,n1,nd2,nd3,nproc,zmpi2,zw(1,1,1))
4573 else
4574 call mpiswitch(j3,nfft,jp2st,j2st,lot,n1,nd2,nd3,nproc,zmpi1,zw(1,1,1))
4575 end if
4576 !output: J2,Jp2,I1,j3,(jp3)
4577
4578 !performing FFT
4579 !input: I2,I1,j3,(jp3)
4580 inzee = 1
4581 do i = 1, ic1-1
4582 call fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), &
4583 trig1,after1(i),now1(i),before1(i),1)
4584 inzee = 3-inzee
4585 end do
4586 !storing the last step into zt
4587 i = ic1
4588 call fftstp(lot,nfft,n1,lzt,n1,zw(1,1,inzee),zt(1,j,1), &
4589 trig1,after1(i),now1(i),before1(i),1)
4590 !output: I2,i1,j3,(jp3)
4591 end do
4592
4593 !transform along y axis
4594 lot = ncache/(4*n2)
4595 if (lot < 1) stop 'kernelfft:enlarge ncache for y'
4596
4597 do j = 1, n1, lot
4598 ma = j
4599 mb = min(j+(lot-1), n1)
4600 nfft = mb - ma + 1
4601
4602 !reverse ordering
4603 !input: I2,i1,j3,(jp3)
4604 call switch(nfft,n2,lot,n1,lzt,zt(1,1,j),zw(1,1,1))
4605 !output: i1,I2,j3,(jp3)
4606
4607 !performing FFT
4608 !input: i1,I2,j3,(jp3)
4609 inzee = 1
4610 do i = 1, ic2-1
4611 call fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), &
4612 trig2,after2(i),now2(i),before2(i),1)
4613 inzee = 3 - inzee
4614 end do
4615 !storing the last step into output array
4616 i = ic2
4617 call fftstp(lot,nfft,n2,nd1,nd2,zw(1,1,inzee),zr(1,j,1,j3), &
4618 trig2,after2(i),now2(i),before2(i),1)
4619
4620 end do
4621 !output: i1,i2,j3,(jp3)
4622 end if
4623 end do
4624
4625 !De-allocations
4626 safe_deallocate_a(trig1)
4627 safe_deallocate_a(after1)
4628 safe_deallocate_a(now1)
4629 safe_deallocate_a(before1)
4630 safe_deallocate_a(trig2)
4631 safe_deallocate_a(after2)
4632 safe_deallocate_a(now2)
4633 safe_deallocate_a(before2)
4634 safe_deallocate_a(trig3)
4635 safe_deallocate_a(after3)
4636 safe_deallocate_a(now3)
4637 safe_deallocate_a(before3)
4638 safe_deallocate_a(zmpi2)
4639 safe_deallocate_a(zw)
4640 safe_deallocate_a(zt)
4641 safe_deallocate_a(cosinarr)
4642 if (nproc > 1) then
4643 safe_deallocate_a(zmpi1)
4644 end if
4645 end subroutine kernelfft
4646
4647 ! ---------------------------------------------------------------
4648
4649 subroutine switch(nfft,n2,lot,n1,lzt,zt,zw)
4650 integer :: lot, n2, lzt, n1, j, nfft, i
4651 real(real64) :: zw(1:2, 1:lot, 1:n2), zt(1:2, 1:lzt, 1:n1)
4652
4653 do j = 1, nfft
4654 do i = 1, n2
4655 zw(1,j,i) = zt(1,i,j)
4656 zw(2,j,i) = zt(2,i,j)
4657 end do
4658 end do
4659 end subroutine switch
4660
4661 ! ---------------------------------------------------------------
4662
4663 subroutine mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi1,zw)
4664 integer :: n1, nd2, nproc, nd3, lot, mfft, jp2, jp2st
4665 integer :: j2st, nfft, i1, j3, j2
4666 real(real64) :: zmpi1(2,n1,nd2/nproc,nd3/nproc,nproc),zw(2,lot,n1)
4667
4668 mfft = 0
4669 do jp2 = jp2st, nproc
4670 do j2 = j2st, nd2/nproc
4671 mfft = mfft + 1
4672 if (mfft > nfft) then
4673 jp2st = jp2
4674 j2st = j2
4675 return
4676 end if
4677 do i1 = 1, n1
4678 zw(1,mfft,i1) = zmpi1(1, i1, j2, j3, jp2)
4679 zw(2,mfft,i1) = zmpi1(2, i1, j2, j3, jp2)
4680 end do
4681 end do
4682 j2st = 1
4683 end do
4684 end subroutine mpiswitch
4685
4686 ! ---------------------------------------------------------------
4687
4688 subroutine inserthalf(nd1,lot,nfft,n3,zf,zw)
4689 integer :: lot, n3, nd1, i3, i1, nfft
4690 real(real64) :: zw(1:2, 1:lot, 1:n3/2), zf(1:nd1, 1:n3)
4691
4692 do i3 = 1, n3/2
4693 do i1 = 1, nfft
4694 zw(1, i1, i3) = zf(i1, 2*i3 - 1)
4695 zw(2, i1, i3) = zf(i1, 2*i3)
4696 end do
4697 end do
4698
4699 end subroutine inserthalf
4700
4701end module sgfft_oct_m
4702
4703!! Local Variables:
4704!! mode: f90
4705!! coding: utf-8
4706!! End:
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:623
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:552
These routines are part of the ISF poisson solver, eventually they will be integrated with the other ...
Definition: sgfft.F90:117
subroutine, public fourier_dim(n, n_next)
Definition: sgfft.F90:149
subroutine mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
Definition: sgfft.F90:4183
subroutine multkernel(nd1, nd2, n1, n2, lot, nfft, jS, pot, zw)
Definition: sgfft.F90:4118
subroutine ctrig(n, trig, after, before, now, isign, ic)
Definition: sgfft.F90:562
subroutine switch(nfft, n2, lot, n1, lzt, zt, zw)
Definition: sgfft.F90:4743
subroutine, public fft(n1, n2, n3, nd1, nd2, nd3, z, isign, inzee)
Definition: sgfft.F90:271
subroutine switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
Definition: sgfft.F90:4156
subroutine halfill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
Definition: sgfft.F90:4216
subroutine unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
Definition: sgfft.F90:4404
subroutine fftrot(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
Definition: sgfft.F90:2205
subroutine unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
Definition: sgfft.F90:4387
subroutine unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw, cosinarr)
Definition: sgfft.F90:4353
subroutine fftstp(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
Definition: sgfft.F90:720
subroutine, public convolxc_off(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, comm)
Definition: sgfft.F90:3741
subroutine unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
Definition: sgfft.F90:4467
subroutine scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
Definition: sgfft.F90:4275
subroutine mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw)
Definition: sgfft.F90:4757
subroutine, public kernelfft(n1, n2, n3, nd1, nd2, nd3, nproc, iproc, zf, zr, comm)
Definition: sgfft.F90:4527
subroutine inserthalf(nd1, lot, nfft, n3, zf, zw)
Definition: sgfft.F90:4782
integer function ncache_optimal()
Definition: sgfft.F90:3690