Octopus
phase.F90
Go to the documentation of this file.
1!! Copyright (C) 2009 X. Andrade
2!! Copyright (C) 2024 N. Tancogne-Dejean
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module phase_oct_m
23 use accel_oct_m
24 use batch_oct_m
26 use comm_oct_m
27 use debug_oct_m
31 use global_oct_m
32 use grid_oct_m
35 use math_oct_m
36 use mesh_oct_m
38 use mpi_oct_m
40 use space_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50
51 public :: &
53
84 type phase_t
85 private
86 complex(real64), allocatable :: phase(:, :)
89 complex(real64), public, allocatable :: phase_corr(:,:)
92 complex(real64), allocatable :: phase_spiral(:,:)
95 type(accel_mem_t) :: buff_phase
96 type(accel_mem_t) :: buff_phase_spiral
97 type(accel_mem_t), public :: buff_phase_corr
98 integer :: buff_phase_qn_start
99 real(real64), public, pointer :: spin(:,:,:) => null()
100 contains
101 procedure :: init => phase_init_phases
102
103 procedure :: update => phase_update_phases
104
105 procedure :: end => phase_end
106
107 procedure :: set_phase_corr => phase_set_phase_corr
108
109 procedure :: unset_phase_corr => phase_unset_phase_corr
110
111 procedure :: apply_to => phase_apply_batch
112
113 procedure :: apply_to_single => phase_apply_mf
114
115 procedure :: apply_phase_spiral => phase_phase_spiral
116
117 procedure :: is_allocated => phase_is_allocated
118 end type phase_t
119
120contains
121
122 ! ---------------------------------------------------------
124 subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
125 class(phase_t), intent(inout) :: phase
126 type(grid_t), intent(in) :: gr
127 type(distributed_t), intent(in) :: kpt
128 type(kpoints_t), intent(in) :: kpoints
129 type(states_elec_dim_t), intent(in) :: d
130 type(space_t), intent(in) :: space
131
132 integer :: ip, ik, sp
133 integer(int64) :: ip_inner_global
134 real(real64) :: kpoint(space%dim), x_global(space%dim)
135
136 push_sub(phase_init_phases)
137
138 ! no e^ik phase needed for Gamma-point-only periodic calculations
139 ! unless for velocity-gauge for lasers
140 if (accel_is_enabled()) then
141 phase%buff_phase_qn_start = kpt%start
142 end if
143 if(kpoints%gamma_only()) then
144 pop_sub(phase_init_phases)
145 return
146 end if
147
148 safe_allocate(phase%phase(1:gr%np_part, kpt%start:kpt%end))
149 safe_allocate(phase%phase_corr(gr%np+1:gr%np_part, kpt%start:kpt%end))
150 !$omp parallel private(ip)
151 do ik = kpt%start, kpt%end
152 !$omp do
153 do ip = gr%np + 1, gr%np_part
154 phase%phase_corr(ip, ik) = m_one
155 end do
156 !$omp end do nowait
157 end do
158 !$omp end parallel
159
160 if (gr%der%boundaries%spiralBC) then
161 sp = gr%np
162 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
163
164 ! We decided to allocate the array from 1:np_part-sp as this is less error prone when passing
165 ! the array to other routines, or in particular creating a C-style pointer from phase_spiral(1,1).
166 ! We will also update phase_corr and possible other similar arrays.
167
168 safe_allocate(phase%phase_spiral(1:gr%np_part-sp, 1:2))
169
170 ! loop over boundary points
171 do ip = sp + 1, gr%np_part
172 ! get corresponding inner point
173 ip_inner_global = mesh_periodic_point(gr, space, ip)
174 x_global = mesh_x_global(gr, ip_inner_global)
175 phase%phase_spiral(ip-sp, 1) = &
176 exp(m_zi * sum((gr%x(ip, 1:space%dim)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
177 phase%phase_spiral(ip-sp, 2) = &
178 exp(-m_zi * sum((gr%x(ip, 1:space%dim)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
179 end do
180
182 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, (gr%np_part-sp)*2)
183 call accel_write_buffer(phase%buff_phase_spiral, gr%np_part-sp, 2, phase%phase_spiral)
184 end if
185 end if
186
188 kpoint(1:space%dim) = m_zero
189
190 sp = gr%np
191 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
193 do ik = kpt%start, kpt%end
194 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
195 !$omp parallel private(ip, ip_inner_global, x_global)
196 !$omp do
197 do ip = 1, gr%np_part
198 phase%phase(ip, ik) = exp(-m_zi * sum(gr%x(ip, 1:space%dim) * kpoint(1:space%dim)))
199 end do
200 !$omp end do
201
202 ! loop over boundary points
203 !$omp do
204 do ip = sp + 1, gr%np_part
205 ! get corresponding inner point
206 ip_inner_global = mesh_periodic_point(gr, space, ip)
207
208 ! compute phase correction from global coordinate (opposite sign!)
209 x_global = mesh_x_global(gr, ip_inner_global)
210 phase%phase_corr(ip, ik) = phase%phase(ip, ik)* &
211 exp(m_zi * sum(x_global(1:space%dim) * kpoint(1:space%dim)))
212 end do
213 !$omp end do nowait
214 !$omp end parallel
215 end do
216
217 if (accel_is_enabled()) then
218 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, gr%np_part*kpt%nlocal)
219 call accel_write_buffer(phase%buff_phase, gr%np_part, kpt%nlocal, phase%phase)
220 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, (gr%np_part - gr%np)*kpt%nlocal)
221 call accel_write_buffer(phase%buff_phase_corr, gr%np_part - gr%np, kpt%nlocal, phase%phase_corr)
222 end if
223
224 pop_sub(phase_init_phases)
225 end subroutine phase_init_phases
226
227 ! ----------------------------------------------------------------------------------
229 subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
230 class(phase_t), intent(inout) :: phase
231 class(mesh_t), intent(in) :: mesh
232 type(distributed_t), intent(in) :: kpt
233 type(kpoints_t), intent(in) :: kpoints
234 type(states_elec_dim_t), intent(in) :: d
235 type(space_t), intent(in) :: space
236 real(real64), allocatable, intent(in) :: uniform_vector_potential(:)
237
238 integer :: ik, ip, sp, wgsize
239 integer(int64) :: ip_inner_global
240 real(real64) :: kpoint(space%dim)
241 real(real64), allocatable :: x_global(:,:), kpt_vec_pot(:,:), x(:,:)
242 type(accel_mem_t) :: buff_vec_pot, buff_x_global, buff_x
243 type(accel_kernel_t), save :: kernel
244 real(real64) :: tmp_sum
245
246 if (.not. allocated(uniform_vector_potential)) return
247
248 push_sub_with_profile(phase_update_phases)
249
250
251 if (.not. allocated(phase%phase)) then
252 safe_allocate(phase%phase(1:mesh%np_part, kpt%start:kpt%end))
253 if (accel_is_enabled()) then
254 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, &
255 mesh%np_part*kpt%nlocal)
256 end if
257 end if
258
259 if (.not. allocated(phase%phase_corr)) then
260 safe_allocate(phase%phase_corr(mesh%np+1:mesh%np_part, kpt%start:kpt%end))
261 if (accel_is_enabled()) then
262 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
263 (mesh%np_part - mesh%np)*kpt%nlocal)
264 end if
265 end if
266
267 ! TODO: We should not recompute this every time-step. We should store it.
268
269 ! loop over boundary points
270 sp = mesh%np
271 ! skip ghost points
272 if (mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
273
274 safe_allocate(x_global(1:space%dim,(sp + 1):mesh%np_part))
275
276 ! TODO: Introduce a permanent value and start replacing mesh%x by mesh%x_trans
277 safe_allocate(x(1:space%dim, 1:mesh%np_part))
278 x = transpose(mesh%x)
279
280 !$omp parallel do schedule(static) private(ip_inner_global)
281 do ip = sp + 1, mesh%np_part
282 ! get corresponding inner point
283 ip_inner_global = mesh_periodic_point(mesh, space, ip)
284 ! compute the difference between the global coordinate and the local coordinate
285 x_global(:,ip) = mesh_x_global(mesh, ip_inner_global) - mesh%x(ip, 1:space%dim)
286 end do
287
288
289 if (.not. accel_is_enabled()) then
290 !$omp parallel private(ik, ip, kpoint, tmp_sum)
291 do ik = kpt%start, kpt%end
292 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
293 !We add the vector potential
294 kpoint(1:space%dim) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
295
296 !$omp do schedule(static)
297 do ip = 1, mesh%np_part
298 tmp_sum = sum(x(1:space%dim, ip)*kpoint(1:space%dim))
299 phase%phase(ip, ik) = cmplx(cos(tmp_sum), -sin(tmp_sum), real64)
300 end do
301 !$omp end do
302
303 !$omp do schedule(static)
304 do ip = sp + 1, mesh%np_part
305 tmp_sum = sum(x_global(1:space%dim, ip)*kpoint(1:space%dim))
306 phase%phase_corr(ip, ik) = cmplx(cos(tmp_sum), sin(tmp_sum), real64)
307 end do
308 !$omp end do nowait
309 end do
310 !$omp end parallel
311
312 else !accel_is enabled
313
314 call accel_create_buffer(buff_vec_pot, accel_mem_read_only, type_float, space%dim*kpt%nlocal)
315 safe_allocate(kpt_vec_pot(1:space%dim,kpt%start:kpt%end))
316 do ik = kpt%start, kpt%end
317 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
318 kpt_vec_pot(1:space%dim, ik) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
319 end do
320 call accel_write_buffer(buff_vec_pot, space%dim, kpt%nlocal, kpt_vec_pot, async=.true.)
321
322 ! Note: this should be globally stored
323 call accel_create_buffer(buff_x, accel_mem_read_only, type_float, space%dim*(mesh%np_part))
324 call accel_write_buffer(buff_x, space%dim, mesh%np_part, x, async=.true.)
325
326 call accel_create_buffer(buff_x_global, accel_mem_read_only, type_float, space%dim*(mesh%np_part-sp))
327 call accel_write_buffer(buff_x_global, space%dim, mesh%np_part-sp, x_global(1:space%dim,(sp + 1):mesh%np_part), async=.true.)
328
329 call accel_kernel_start_call(kernel, 'phase.cl', 'update_phases')
330
331 call accel_set_kernel_arg(kernel, 0, space%dim)
332 call accel_set_kernel_arg(kernel, 1, mesh%np)
333 call accel_set_kernel_arg(kernel, 2, mesh%np_part)
334 call accel_set_kernel_arg(kernel, 3, kpt%start)
335 call accel_set_kernel_arg(kernel, 4, kpt%end)
336 call accel_set_kernel_arg(kernel, 5, sp)
337 call accel_set_kernel_arg(kernel, 6, buff_vec_pot)
338 call accel_set_kernel_arg(kernel, 7, buff_x)
339 call accel_set_kernel_arg(kernel, 8, buff_x_global)
340 call accel_set_kernel_arg(kernel, 9, phase%buff_phase)
341 call accel_set_kernel_arg(kernel, 10, phase%buff_phase_corr)
342
343 wgsize = accel_kernel_workgroup_size(kernel)
344
345 call accel_kernel_run(kernel, (/pad(mesh%np_part, wgsize), kpt%nlocal/), (/wgsize, 1/))
346
347 call accel_read_buffer(phase%buff_phase, mesh%np_part, kpt%nlocal, phase%phase, async=.true.)
348 call accel_read_buffer(phase%buff_phase_corr, mesh%np_part - mesh%np, kpt%nlocal, phase%phase_corr)
349
350 call accel_release_buffer(buff_vec_pot)
351 call accel_release_buffer(buff_x)
352 call accel_release_buffer(buff_x_global)
353 safe_deallocate_a(kpt_vec_pot)
354 end if
355
356 safe_deallocate_a(x_global)
357 safe_deallocate_a(x)
358
359 pop_sub_with_profile(phase_update_phases)
360 end subroutine phase_update_phases
361
362 ! ----------------------------------------------------------------------------------
364 subroutine phase_end(phase)
365 class(phase_t), intent(inout) :: phase
366
367 push_sub(phase_end)
368
369 if (phase%is_allocated() .and. accel_is_enabled()) then
370 call accel_release_buffer(phase%buff_phase)
371 call accel_release_buffer(phase%buff_phase_corr)
372 end if
373
374 if (allocated(phase%phase_spiral) .and. accel_is_enabled()) then
375 call accel_release_buffer(phase%buff_phase_spiral)
376 end if
377
378 safe_deallocate_a(phase%phase)
379 safe_deallocate_a(phase%phase_corr)
380 safe_deallocate_a(phase%phase_spiral)
381
382 pop_sub(phase_end)
383 end subroutine phase_end
384
385 ! ----------------------------------------------------------------------------------
387 !
388 subroutine phase_set_phase_corr(phase, mesh, psib, async)
389 class(phase_t), intent(in) :: phase
390 class(mesh_t), intent(in) :: mesh
391 type(wfs_elec_t), intent(inout) :: psib
392 logical, optional, intent(in) :: async
393
394
395 logical :: phase_correction
396
397 push_sub(phase_set_phase_corr)
398
399 ! check if we only want a phase correction for the boundary points
400 phase_correction = phase%is_allocated()
401
402 !We apply the phase only to np points, and the phase for the np+1 to np_part points
403 !will be treated as a phase correction in the Hamiltonian
404 if (phase_correction) then
405 call phase%apply_to(mesh, mesh%np, .false., psib, async=async)
406 end if
407
408 pop_sub(phase_set_phase_corr)
409 end subroutine phase_set_phase_corr
410
411 ! ----------------------------------------------------------------------------------
413 !
414 subroutine phase_unset_phase_corr(phase, mesh, psib, async)
415 class(phase_t), intent(in) :: phase
416 class(mesh_t), intent(in) :: mesh
417 type(wfs_elec_t), intent(inout) :: psib
418 logical, optional, intent(in) :: async
419
420 logical :: phase_correction
421
422 push_sub(phase_unset_phase_corr)
423
424 ! check if we only want a phase correction for the boundary points
425 phase_correction = phase%is_allocated()
426
427 !We apply the phase only to np points, and the phase for the np+1 to np_part points
428 !will be treated as a phase correction in the Hamiltonian
429 if (phase_correction) then
430 call phase%apply_to(mesh, mesh%np, .true., psib, async=async)
431 end if
432
434 end subroutine phase_unset_phase_corr
435
436 ! ---------------------------------------------------------------------------------------
438 !
439 subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
440 class(phase_t), intent(in) :: this
441 class(mesh_t), intent(in) :: mesh
442 integer, intent(in) :: np
443 logical, intent(in) :: conjugate
444 type(wfs_elec_t), target, intent(inout) :: psib
445 type(wfs_elec_t), optional, target, intent(in) :: src
446 logical, optional, intent(in) :: async
447
448 integer :: ip, ii, sp
449 type(wfs_elec_t), pointer :: src_
450 complex(real64) :: phase
451 integer(int64) :: wgsize, dim2, dim3
452 type(accel_kernel_t), save :: ker_phase
453
454 push_sub(phase_apply_batch)
455 call profiling_in("PHASE_APPLY_BATCH")
456
457 call profiling_count_operations(6*np*psib%nst_linear)
458
459 assert(np <= mesh%np_part)
460 assert(psib%type() == type_cmplx)
461 assert(psib%ik >= lbound(this%phase, dim=2))
462 assert(psib%ik <= ubound(this%phase, dim=2))
463
464 src_ => psib
465 if (present(src)) src_ => src
466
467 assert(src_%has_phase .eqv. conjugate)
468 assert(src_%ik == psib%ik)
469 assert(src_%type() == type_cmplx)
470
471 ! We want to skip the ghost points for setting the phase
472 sp = min(np, mesh%np)
473 if (np > mesh%np .and. mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
474
475 select case (psib%status())
476 case (batch_packed)
477
478 if (conjugate) then
479
480 !$omp parallel private(ii, phase)
481 !$omp do
482 do ip = 1, min(mesh%np, np)
483 phase = conjg(this%phase(ip, psib%ik))
484 !$omp simd
485 do ii = 1, psib%nst_linear
486 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
487 end do
488 end do
489 !$omp end do nowait
490
491 ! Boundary points, if requested
492 !$omp do
493 do ip = sp+1, np
494 phase = conjg(this%phase(ip, psib%ik))
495 !$omp simd
496 do ii = 1, psib%nst_linear
497 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
498 end do
499 end do
500 !$omp end parallel
501
502 else
503
504 !$omp parallel private(ii, phase)
505 !$omp do
506 do ip = 1, min(mesh%np, np)
507 phase = this%phase(ip, psib%ik)
508 !$omp simd
509 do ii = 1, psib%nst_linear
510 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
511 end do
512 end do
513 !$omp end do nowait
514
515 ! Boundary points, if requested
516 !$omp do
517 do ip = sp+1, np
518 phase = this%phase(ip, psib%ik)
519 !$omp simd
520 do ii = 1, psib%nst_linear
521 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
522 end do
523 end do
524 !$omp end parallel
525
526 end if
527
528 case (batch_not_packed)
529
530 if (conjugate) then
531
532 !$omp parallel private(ii, ip)
533 do ii = 1, psib%nst_linear
534 !$omp do simd
535 do ip = 1, min(mesh%np, np)
536 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
537 end do
538 !$omp end do simd nowait
539
540 ! Boundary points, if requested
541 !$omp do simd
542 do ip = sp+1, np
543 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
544 end do
545 !$omp end do simd nowait
546 end do
547 !$omp end parallel
548
549 else
550 !$omp parallel private(ii, ip)
551 do ii = 1, psib%nst_linear
552 !$omp do simd
553 do ip = 1, min(mesh%np, np)
554 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
555 end do
556 !$omp end do simd nowait
557
558 ! Boundary points, if requested
559 !$omp do simd
560 do ip = sp+1, np
561 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
562 end do
563 !$omp end do simd nowait
564
565 end do
566 !$omp end parallel
567
568 end if
569
571 call accel_kernel_start_call(ker_phase, 'phase.cl', 'phase_hamiltonian')
572
573 if (conjugate) then
574 call accel_set_kernel_arg(ker_phase, 0, 1_4)
575 else
576 call accel_set_kernel_arg(ker_phase, 0, 0_4)
577 end if
578
579 call accel_set_kernel_arg(ker_phase, 1, (psib%ik - this%buff_phase_qn_start)*mesh%np_part)
580 call accel_set_kernel_arg(ker_phase, 2, np)
581 call accel_set_kernel_arg(ker_phase, 3, this%buff_phase)
582 call accel_set_kernel_arg(ker_phase, 4, src_%ff_device)
583 call accel_set_kernel_arg(ker_phase, 5, log2(int(src_%pack_size(1), int32)))
584 call accel_set_kernel_arg(ker_phase, 6, psib%ff_device)
585 call accel_set_kernel_arg(ker_phase, 7, log2(int(psib%pack_size(1), int32)))
586
587 wgsize = accel_kernel_workgroup_size(ker_phase)/psib%pack_size(1)
588
589 dim3 = np/(accel_max_size_per_dim(2)*wgsize) + 1
590 dim2 = min(accel_max_size_per_dim(2)*wgsize, pad(np, wgsize))
591
592 call accel_kernel_run(ker_phase, (/psib%pack_size(1), dim2, dim3/), (/psib%pack_size(1), wgsize, 1_int64/))
593
594 if(.not. optional_default(async, .false.)) call accel_finish()
595 end select
596
597 psib%has_phase = .not. conjugate
598
599 call profiling_out("PHASE_APPLY_BATCH")
600 pop_sub(phase_apply_batch)
601 end subroutine phase_apply_batch
602
608 !
609 subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
610 class(phase_t), intent(in) :: this
611 complex(real64), intent(inout) :: psi(:, :)
612 integer, intent(in) :: np
613 integer, intent(in) :: dim
614 integer, intent(in) :: ik
615 logical, intent(in) :: conjugate
616
617 integer :: idim, ip
618
619 push_sub(phase_apply_mf)
620
621 assert(ik >= lbound(this%phase, dim=2))
622 assert(ik <= ubound(this%phase, dim=2))
623
624 call profiling_in("PHASE_APPLY_SINGLE")
625
626 if (conjugate) then
627 ! Apply the phase that contains both the k-point and vector-potential terms.
628 do idim = 1, dim
629 !$omp parallel do
630 do ip = 1, np
631 psi(ip, idim) = conjg(this%phase(ip, ik))*psi(ip, idim)
632 end do
633 !$omp end parallel do
634 end do
635 else
636 ! Apply the conjugate of (i.e. remove) the phase that contains both the k-point and vector-potential terms.
637 do idim = 1, dim
638 !$omp parallel do
639 do ip = 1, np
640 psi(ip, idim) = this%phase(ip, ik)*psi(ip, idim)
641 end do
642 !$omp end parallel do
643 end do
644 end if
645
646 call profiling_out("PHASE_APPLY_SINGLE")
647
648 pop_sub(phase_apply_mf)
649 end subroutine phase_apply_mf
650
651
652 ! ---------------------------------------------------------------------------------------
654 !
655 subroutine phase_phase_spiral(this, der, psib)
656 class(phase_t), intent(in) :: this
657 type(derivatives_t), intent(in) :: der
658 class(wfs_elec_t), intent(inout) :: psib
659
660 integer :: ip, ii, sp
661 integer, allocatable :: spin_label(:)
662 type(accel_mem_t) :: spin_label_buffer
663 integer(int64) :: wgsize
664
665 push_sub(phase_phase_spiral)
666 call profiling_in("PBC_PHASE_SPIRAL")
667
668 call profiling_count_operations(6*(der%mesh%np_part-der%mesh%np)*psib%nst_linear)
669
670 assert(der%boundaries%spiral)
671 assert(psib%type() == type_cmplx)
672
673 sp = der%mesh%np
674 if (der%mesh%parallel_in_domains) sp = der%mesh%np + der%mesh%pv%np_ghost
675
676
677 select case (psib%status())
678 case (batch_packed)
679
680 !$omp parallel do private(ip, ii)
681 do ip = sp + 1, der%mesh%np_part
682 do ii = 1, psib%nst_linear, 2
683 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
684 psib%zff_pack(ii+1, ip) = psib%zff_pack(ii+1, ip)*this%phase_spiral(ip-sp, 1)
685 else
686 psib%zff_pack(ii, ip) = psib%zff_pack(ii, ip)*this%phase_spiral(ip-sp, 2)
687 end if
688 end do
689 end do
690 !$omp end parallel do
691
692 case (batch_not_packed)
693
694 !$omp parallel private(ii, ip)
695 do ii = 1, psib%nst_linear, 2
696 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
697 !$omp do
698 do ip = sp + 1, der%mesh%np_part
699 psib%zff_linear(ip, ii+1) = psib%zff_linear(ip, ii+1)*this%phase_spiral(ip-sp, 1)
700 end do
701 !$omp end do nowait
702 else
703 !$omp do
704 do ip = sp + 1, der%mesh%np_part
705 psib%zff_linear(ip, ii) = psib%zff_linear(ip, ii)*this%phase_spiral(ip-sp, 2)
706 end do
707 !$omp end do nowait
708 end if
709 end do
710 !$omp end parallel
711
713
714 assert(accel_is_enabled())
715
716 ! generate array of offsets for access of psib and phase_spiral:
717 ! TODO: Move this to the routine where spin(:,:,:) is generated
718 ! and also move the buffer to the GPU at this point to
719 ! avoid unecessary latency here!
720
721 safe_allocate(spin_label(1:psib%nst_linear))
722 spin_label = 0
723 do ii = 1, psib%nst_linear, 2
724 if (this%spin(3, psib%linear_to_ist(ii), psib%ik) > 0) spin_label(ii)=1
725 end do
726
727 call accel_create_buffer(spin_label_buffer, accel_mem_read_only, type_integer, psib%nst_linear)
728 call accel_write_buffer(spin_label_buffer, psib%nst_linear, spin_label)
729
730 call accel_kernel_start_call(kernel_phase_spiral, 'phase_spiral.cl', 'phase_spiral_apply')
731
734 call accel_set_kernel_arg(kernel_phase_spiral, 2, der%mesh%np_part)
735 call accel_set_kernel_arg(kernel_phase_spiral, 3, psib%ff_device)
736 call accel_set_kernel_arg(kernel_phase_spiral, 4, log2(psib%pack_size(1)))
737 call accel_set_kernel_arg(kernel_phase_spiral, 5, this%buff_phase_spiral)
738 call accel_set_kernel_arg(kernel_phase_spiral, 6, spin_label_buffer)
739
740 wgsize = accel_kernel_workgroup_size(kernel_phase_spiral)/psib%pack_size(1)
741
743 (/psib%pack_size(1)/2, pad(der%mesh%np_part - sp, 2*wgsize)/), &
744 (/psib%pack_size(1)/2, 2*wgsize/))
745
746 call accel_finish()
747
748 call accel_release_buffer(spin_label_buffer)
749
750 safe_deallocate_a(spin_label)
751
752 end select
753
754 call profiling_out("PBC_PHASE_SPIRAL")
755 pop_sub(phase_phase_spiral)
756 end subroutine phase_phase_spiral
757
758
759 ! ---------------------------------------------------------------------------------------
760 logical pure function phase_is_allocated(this)
761 class(phase_t), intent(in) :: this
762
763 phase_is_allocated = allocated(this%phase)
764 end function phase_is_allocated
765
766end module phase_oct_m
767
768!! Local Variables:
769!! mode: f90
770!! coding: utf-8
771!! End:
double exp(double __x) __attribute__((__nothrow__
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1377
subroutine, public accel_finish()
Definition: accel.F90:986
integer, parameter, public accel_mem_read_write
Definition: accel.F90:196
integer pure function, public accel_max_size_per_dim(dim)
Definition: accel.F90:1412
type(accel_kernel_t), target, save, public kernel_phase_spiral
Definition: accel.F90:296
subroutine, public accel_release_buffer(this, async)
Definition: accel.F90:920
pure logical function, public accel_is_enabled()
Definition: accel.F90:419
integer function, public accel_kernel_workgroup_size(kernel)
Definition: accel.F90:1106
integer, parameter, public accel_mem_read_only
Definition: accel.F90:196
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
real(real64), parameter, public m_zero
Definition: global.F90:191
complex(real64), parameter, public m_zi
Definition: global.F90:205
real(real64), parameter, public m_one
Definition: global.F90:192
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer(int64) function, public mesh_periodic_point(mesh, space, ip)
This function returns the point inside the grid corresponding to a boundary point when PBCs are used....
Definition: mesh.F90:723
real(real64) function, dimension(1:mesh%box%dim), public mesh_x_global(mesh, ipg)
Given a global point index, this function returns the coordinates of the point.
Definition: mesh.F90:816
subroutine phase_phase_spiral(this, der, psib)
apply spiral phase
Definition: phase.F90:751
subroutine phase_unset_phase_corr(phase, mesh, psib, async)
unset the phase correction (if necessary)
Definition: phase.F90:510
subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
Initiliaze the phase arrays and copy to GPU the data.
Definition: phase.F90:220
subroutine phase_end(phase)
Releases the memory of the phase object.
Definition: phase.F90:460
subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
Update the phases.
Definition: phase.F90:325
logical pure function phase_is_allocated(this)
Definition: phase.F90:856
subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
apply (remove) the phase to the wave functions before (after) applying the Hamiltonian
Definition: phase.F90:535
subroutine phase_set_phase_corr(phase, mesh, psib, async)
set the phase correction (if necessary)
Definition: phase.F90:484
subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
apply (or remove) the phase to a wave function psi
Definition: phase.F90:705
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:625
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
This module handles spin dimensions of the states and the k-point distribution.
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_cmplx
Definition: types.F90:136
type(type_t), public type_integer
Definition: types.F90:137
class representing derivatives
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Describes mesh distribution to nodes.
Definition: mesh.F90:187
A container for the phase.
Definition: phase.F90:179
class for organizing spins and k-points
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)