Octopus
phase.F90
Go to the documentation of this file.
1!! Copyright (C) 2009 X. Andrade
2!! Copyright (C) 2024 N. Tancogne-Dejean
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module phase_oct_m
23 use accel_oct_m
24 use batch_oct_m
27 use comm_oct_m
28 use debug_oct_m
32 use global_oct_m
33 use grid_oct_m
36 use math_oct_m
37 use mesh_oct_m
39 use mpi_oct_m
41 use space_oct_m
45 use types_oct_m
47
48 implicit none
49
50 private
51
52 public :: &
54
85 type phase_t
86 private
87 complex(real64), allocatable :: phase(:, :)
90 complex(real64), public, allocatable :: phase_corr(:,:)
93 complex(real64), allocatable :: phase_spiral(:,:)
96 type(accel_mem_t) :: buff_phase
97 type(accel_mem_t) :: buff_phase_spiral
98 type(accel_mem_t), public :: buff_phase_corr
99 integer :: buff_phase_qn_start
100 real(real64), public, pointer :: spin(:,:,:) => null()
101 contains
102 procedure :: init => phase_init_phases
103
104 procedure :: update => phase_update_phases
105
106 procedure :: end => phase_end
107
108 procedure :: set_phase_corr => phase_set_phase_corr
109
110 procedure :: unset_phase_corr => phase_unset_phase_corr
111
112 procedure :: apply_to => phase_apply_batch
113
114 procedure :: apply_to_single => phase_apply_mf
115
116 procedure :: apply_phase_spiral => phase_phase_spiral
118 procedure :: is_allocated => phase_is_allocated
119
120 procedure :: copy_and_set_phase => phase_copy_and_set_phase
121 end type phase_t
122
123contains
124
125 ! ---------------------------------------------------------
127 subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
128 class(phase_t), intent(inout) :: phase
129 class(mesh_t), intent(in) :: gr
130 type(distributed_t), intent(in) :: kpt
131 type(kpoints_t), intent(in) :: kpoints
132 type(states_elec_dim_t), intent(in) :: d
133 type(space_t), intent(in) :: space
134
135 integer :: ip, ik, sp
136 integer(int64) :: ip_inner_global
137 real(real64) :: kpoint(space%dim), x_global(space%dim)
138
139 push_sub(phase_init_phases)
140
141 ! no e^ik phase needed for Gamma-point-only periodic calculations
142 ! unless for velocity-gauge for lasers
143 if (accel_is_enabled()) then
144 phase%buff_phase_qn_start = kpt%start
145 end if
146 if(kpoints%gamma_only()) then
147 pop_sub(phase_init_phases)
148 return
149 end if
150
151 safe_allocate(phase%phase(1:gr%np_part, kpt%start:kpt%end))
152 safe_allocate(phase%phase_corr(gr%np+1:gr%np_part, kpt%start:kpt%end))
153 !$omp parallel private(ip)
154 do ik = kpt%start, kpt%end
155 !$omp do
156 do ip = gr%np + 1, gr%np_part
157 phase%phase_corr(ip, ik) = m_one
158 end do
159 !$omp end do nowait
160 end do
161 !$omp end parallel
162
163 ! Only when gr is a grid_t type, we can access gr%der
164 select type(gr)
165 class is(grid_t)
166 if (gr%der%boundaries%spiralBC) then
167 sp = gr%np
168 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
169
170 ! We decided to allocate the array from 1:np_part-sp as this is less error prone when passing
171 ! the array to other routines, or in particular creating a C-style pointer from phase_spiral(1,1).
172 ! We will also update phase_corr and possible other similar arrays.
173
174 safe_allocate(phase%phase_spiral(1:gr%np_part-sp, 1:2))
175
176 ! loop over boundary points
177 do ip = sp + 1, gr%np_part
178 ! get corresponding inner point
179 ip_inner_global = mesh_periodic_point(gr, space, ip)
180 x_global = mesh_x_global(gr, ip_inner_global)
181 phase%phase_spiral(ip-sp, 1) = &
182 exp(m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
183 phase%phase_spiral(ip-sp, 2) = &
184 exp(-m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
185 end do
186
187 if (accel_is_enabled()) then
188 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, (gr%np_part-sp)*2)
189 call accel_write_buffer(phase%buff_phase_spiral, gr%np_part-sp, 2, phase%phase_spiral)
190 end if
191 end if
192 class default
193 ! Do nothing
194 end select
196
197 kpoint(1:space%dim) = m_zero
198
199 sp = gr%np
200 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
202 !$omp parallel private(ip, ip_inner_global, x_global, kpoint)
203 do ik = kpt%start, kpt%end
204 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
205 !$omp do
206 do ip = 1, gr%np_part
207 phase%phase(ip, ik) = exp(-m_zi * sum(gr%x(1:space%dim, ip) * kpoint(1:space%dim)))
208 end do
209 !$omp end do
210
211 ! loop over boundary points
212 !$omp do
213 do ip = sp + 1, gr%np_part
214 ! get corresponding inner point
215 ip_inner_global = mesh_periodic_point(gr, space, ip)
216
217 ! compute phase correction from global coordinate (opposite sign!)
218 x_global = mesh_x_global(gr, ip_inner_global)
219 phase%phase_corr(ip, ik) = phase%phase(ip, ik)* &
220 exp(m_zi * sum(x_global(1:space%dim) * kpoint(1:space%dim)))
221 end do
222 !$omp end do nowait
223 end do
224 !$omp end parallel
225
226 if (accel_is_enabled()) then
227 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, gr%np_part*kpt%nlocal)
228 call accel_write_buffer(phase%buff_phase, gr%np_part, kpt%nlocal, phase%phase)
229 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, (gr%np_part - gr%np)*kpt%nlocal)
230 call accel_write_buffer(phase%buff_phase_corr, gr%np_part - gr%np, kpt%nlocal, phase%phase_corr)
231 end if
232
233 pop_sub(phase_init_phases)
234 end subroutine phase_init_phases
235
236 ! ----------------------------------------------------------------------------------
238 subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
239 class(phase_t), intent(inout) :: phase
240 class(mesh_t), intent(in) :: mesh
241 type(distributed_t), intent(in) :: kpt
242 type(kpoints_t), intent(in) :: kpoints
243 type(states_elec_dim_t), intent(in) :: d
244 type(space_t), intent(in) :: space
245 real(real64), allocatable, intent(in) :: uniform_vector_potential(:)
246
247 integer :: ik, ip, sp, wgsize
248 integer(int64) :: ip_inner_global
249 real(real64) :: kpoint(space%dim)
250 real(real64), allocatable :: x_global(:,:), kpt_vec_pot(:,:)
251 type(accel_mem_t) :: buff_vec_pot, buff_x_global, buff_x
252 type(accel_kernel_t), save :: kernel
253 real(real64) :: tmp_sum
254
255 if (.not. allocated(uniform_vector_potential)) return
256
257 push_sub_with_profile(phase_update_phases)
258
259
260 if (.not. allocated(phase%phase)) then
261 safe_allocate(phase%phase(1:mesh%np_part, kpt%start:kpt%end))
262 if (accel_is_enabled()) then
263 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, &
264 mesh%np_part*kpt%nlocal)
265 end if
266 end if
267
268 if (.not. allocated(phase%phase_corr)) then
269 safe_allocate(phase%phase_corr(mesh%np+1:mesh%np_part, kpt%start:kpt%end))
270 if (accel_is_enabled()) then
271 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
272 (mesh%np_part - mesh%np)*kpt%nlocal)
273 end if
274 end if
275
276 ! TODO: We should not recompute this every time-step. We should store it.
277
278 ! loop over boundary points
279 sp = mesh%np
280 ! skip ghost points
281 if (mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
282
283 safe_allocate(x_global(1:space%dim,(sp + 1):mesh%np_part))
284
285 !$omp parallel do schedule(static) private(ip_inner_global)
286 do ip = sp + 1, mesh%np_part
287 ! get corresponding inner point
288 ip_inner_global = mesh_periodic_point(mesh, space, ip)
289 ! compute the difference between the global coordinate and the local coordinate
290 x_global(:,ip) = mesh_x_global(mesh, ip_inner_global) - mesh%x(1:space%dim, ip)
291 end do
292
293
294 if (.not. accel_is_enabled()) then
295 !$omp parallel private(ik, ip, kpoint, tmp_sum)
296 do ik = kpt%start, kpt%end
297 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
298 !We add the vector potential
299 kpoint(1:space%dim) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
300
301 !$omp do schedule(static)
302 do ip = 1, mesh%np_part
303 tmp_sum = sum(mesh%x(1:space%dim, ip)*kpoint(1:space%dim))
304 phase%phase(ip, ik) = cmplx(cos(tmp_sum), -sin(tmp_sum), real64)
305 end do
306 !$omp end do
307
308 !$omp do schedule(static)
309 do ip = sp + 1, mesh%np_part
310 tmp_sum = sum(x_global(1:space%dim, ip)*kpoint(1:space%dim))
311 phase%phase_corr(ip, ik) = cmplx(cos(tmp_sum), sin(tmp_sum), real64)
312 end do
313 !$omp end do nowait
314 end do
315 !$omp end parallel
316
317 else !accel_is enabled
318
319 call accel_create_buffer(buff_vec_pot, accel_mem_read_only, type_float, space%dim*kpt%nlocal)
320 safe_allocate(kpt_vec_pot(1:space%dim,kpt%start:kpt%end))
321 do ik = kpt%start, kpt%end
322 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
323 kpt_vec_pot(1:space%dim, ik) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
324 end do
325 call accel_write_buffer(buff_vec_pot, space%dim, kpt%nlocal, kpt_vec_pot, async=.true.)
326
327 ! Note: this should be globally stored
328 call accel_create_buffer(buff_x, accel_mem_read_only, type_float, space%dim*(mesh%np_part))
329 call accel_write_buffer(buff_x, space%dim, mesh%np_part, mesh%x, async=.true.)
330
331 call accel_create_buffer(buff_x_global, accel_mem_read_only, type_float, space%dim*(mesh%np_part-sp))
332 call accel_write_buffer(buff_x_global, space%dim, mesh%np_part-sp, x_global(1:space%dim,(sp + 1):mesh%np_part), async=.true.)
334 call accel_kernel_start_call(kernel, 'phase.cu', 'update_phases')
335
336 call accel_set_kernel_arg(kernel, 0, space%dim)
337 call accel_set_kernel_arg(kernel, 1, mesh%np)
338 call accel_set_kernel_arg(kernel, 2, mesh%np_part)
339 call accel_set_kernel_arg(kernel, 3, kpt%start)
340 call accel_set_kernel_arg(kernel, 4, kpt%end)
341 call accel_set_kernel_arg(kernel, 5, sp)
342 call accel_set_kernel_arg(kernel, 6, buff_vec_pot)
343 call accel_set_kernel_arg(kernel, 7, buff_x)
344 call accel_set_kernel_arg(kernel, 8, buff_x_global)
345 call accel_set_kernel_arg(kernel, 9, phase%buff_phase)
346 call accel_set_kernel_arg(kernel, 10, phase%buff_phase_corr)
347
348 wgsize = accel_kernel_workgroup_size(kernel)
349
350 call accel_kernel_run(kernel, (/pad(mesh%np_part, wgsize), kpt%nlocal/), (/wgsize, 1/))
351
352 call accel_read_buffer(phase%buff_phase, mesh%np_part, kpt%nlocal, phase%phase, async=.true.)
353 call accel_read_buffer(phase%buff_phase_corr, mesh%np_part - mesh%np, kpt%nlocal, phase%phase_corr)
354
355 call accel_release_buffer(buff_vec_pot)
356 call accel_release_buffer(buff_x)
357 call accel_release_buffer(buff_x_global)
358 safe_deallocate_a(kpt_vec_pot)
359 end if
360
361 safe_deallocate_a(x_global)
362
363 pop_sub_with_profile(phase_update_phases)
364 end subroutine phase_update_phases
365
366 ! ----------------------------------------------------------------------------------
368 subroutine phase_end(phase)
369 class(phase_t), intent(inout) :: phase
370
371 push_sub(phase_end)
372
373 if (phase%is_allocated() .and. accel_is_enabled()) then
374 call accel_release_buffer(phase%buff_phase)
375 call accel_release_buffer(phase%buff_phase_corr)
376 end if
377
378 if (allocated(phase%phase_spiral) .and. accel_is_enabled()) then
379 call accel_release_buffer(phase%buff_phase_spiral)
380 end if
381
382 safe_deallocate_a(phase%phase)
383 safe_deallocate_a(phase%phase_corr)
384 safe_deallocate_a(phase%phase_spiral)
385
386 pop_sub(phase_end)
387 end subroutine phase_end
388
389 ! ----------------------------------------------------------------------------------
391 !
392 subroutine phase_set_phase_corr(phase, mesh, psib, async)
393 class(phase_t), intent(in) :: phase
394 class(mesh_t), intent(in) :: mesh
395 type(wfs_elec_t), intent(inout) :: psib
396 logical, optional, intent(in) :: async
397
398
399 logical :: phase_correction
400
401 push_sub(phase_set_phase_corr)
402
403 ! check if we only want a phase correction for the boundary points
404 phase_correction = phase%is_allocated()
405
406 !We apply the phase only to np points, and the phase for the np+1 to np_part points
407 !will be treated as a phase correction in the Hamiltonian
408 if (phase_correction) then
409 call phase%apply_to(mesh, mesh%np, .false., psib, async=async)
410 end if
411
412 pop_sub(phase_set_phase_corr)
413 end subroutine phase_set_phase_corr
414
415 ! ----------------------------------------------------------------------------------
417 !
418 subroutine phase_unset_phase_corr(phase, mesh, psib, async)
419 class(phase_t), intent(in) :: phase
420 class(mesh_t), intent(in) :: mesh
421 type(wfs_elec_t), intent(inout) :: psib
422 logical, optional, intent(in) :: async
423
424 logical :: phase_correction
425
426 push_sub(phase_unset_phase_corr)
427
428 ! check if we only want a phase correction for the boundary points
429 phase_correction = phase%is_allocated()
430
431 !We apply the phase only to np points, and the phase for the np+1 to np_part points
432 !will be treated as a phase correction in the Hamiltonian
433 if (phase_correction) then
434 call phase%apply_to(mesh, mesh%np, .true., psib, async=async)
435 end if
436
438 end subroutine phase_unset_phase_corr
439
440 ! ---------------------------------------------------------------------------------------
442 !
443 subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
444 class(phase_t), intent(in) :: this
445 class(mesh_t), intent(in) :: mesh
446 integer, intent(in) :: np
447 logical, intent(in) :: conjugate
448 type(wfs_elec_t), target, intent(inout) :: psib
449 type(wfs_elec_t), optional, target, intent(in) :: src
450 logical, optional, intent(in) :: async
451
452 integer :: ip, ii, sp
453 type(wfs_elec_t), pointer :: src_
454 complex(real64) :: phase
455 integer(int64) :: wgsize, dim2, dim3
456 type(accel_kernel_t), save :: ker_phase
457
458 push_sub(phase_apply_batch)
459 call profiling_in("PHASE_APPLY_BATCH")
460
461 call profiling_count_operations(6*np*psib%nst_linear)
462
463 assert(np <= mesh%np_part)
464 assert(psib%type() == type_cmplx)
465 assert(psib%ik >= lbound(this%phase, dim=2))
466 assert(psib%ik <= ubound(this%phase, dim=2))
467
468 src_ => psib
469 if (present(src)) src_ => src
470
471 assert(src_%has_phase .eqv. conjugate)
472 assert(src_%ik == psib%ik)
473 assert(src_%type() == type_cmplx)
474
475 ! We want to skip the ghost points for setting the phase
476 sp = min(np, mesh%np)
477 if (np > mesh%np .and. mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
478
479 select case (psib%status())
480 case (batch_packed)
481
482 if (conjugate) then
483
484 !$omp parallel private(ii, phase)
485 !$omp do
486 do ip = 1, min(mesh%np, np)
487 phase = conjg(this%phase(ip, psib%ik))
488 !$omp simd
489 do ii = 1, psib%nst_linear
490 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
491 end do
492 end do
493 !$omp end do nowait
494
495 ! Boundary points, if requested
496 !$omp do
497 do ip = sp+1, np
498 phase = conjg(this%phase(ip, psib%ik))
499 !$omp simd
500 do ii = 1, psib%nst_linear
501 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
502 end do
503 end do
504 !$omp end parallel
505
506 else
507
508 !$omp parallel private(ii, phase)
509 !$omp do
510 do ip = 1, min(mesh%np, np)
511 phase = this%phase(ip, psib%ik)
512 !$omp simd
513 do ii = 1, psib%nst_linear
514 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
515 end do
516 end do
517 !$omp end do nowait
518
519 ! Boundary points, if requested
520 !$omp do
521 do ip = sp+1, np
522 phase = this%phase(ip, psib%ik)
523 !$omp simd
524 do ii = 1, psib%nst_linear
525 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
526 end do
527 end do
528 !$omp end parallel
529
530 end if
531
532 case (batch_not_packed)
533
534 if (conjugate) then
535
536 !$omp parallel private(ii, ip)
537 do ii = 1, psib%nst_linear
538 !$omp do simd
539 do ip = 1, min(mesh%np, np)
540 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
541 end do
542 !$omp end do simd nowait
543
544 ! Boundary points, if requested
545 !$omp do simd
546 do ip = sp+1, np
547 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
548 end do
549 !$omp end do simd nowait
550 end do
551 !$omp end parallel
552
553 else
554 !$omp parallel private(ii, ip)
555 do ii = 1, psib%nst_linear
556 !$omp do simd
557 do ip = 1, min(mesh%np, np)
558 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
559 end do
560 !$omp end do simd nowait
561
562 ! Boundary points, if requested
563 !$omp do simd
564 do ip = sp+1, np
565 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
566 end do
567 !$omp end do simd nowait
568
569 end do
570 !$omp end parallel
571
572 end if
573
575 call accel_kernel_start_call(ker_phase, 'phase.cu', 'phase_hamiltonian')
576
577 if (conjugate) then
578 call accel_set_kernel_arg(ker_phase, 0, 1_4)
579 else
580 call accel_set_kernel_arg(ker_phase, 0, 0_4)
581 end if
582
583 call accel_set_kernel_arg(ker_phase, 1, (psib%ik - this%buff_phase_qn_start)*mesh%np_part)
584 call accel_set_kernel_arg(ker_phase, 2, np)
585 call accel_set_kernel_arg(ker_phase, 3, this%buff_phase)
586 call accel_set_kernel_arg(ker_phase, 4, src_%ff_device)
587 call accel_set_kernel_arg(ker_phase, 5, log2(int(src_%pack_size(1), int32)))
588 call accel_set_kernel_arg(ker_phase, 6, psib%ff_device)
589 call accel_set_kernel_arg(ker_phase, 7, log2(int(psib%pack_size(1), int32)))
590
591 wgsize = accel_kernel_workgroup_size(ker_phase)/psib%pack_size(1)
592
593 dim3 = np/(accel_max_size_per_dim(2)*wgsize) + 1
594 dim2 = min(accel_max_size_per_dim(2)*wgsize, pad(np, wgsize))
595
596 call accel_kernel_run(ker_phase, (/psib%pack_size(1), dim2, dim3/), (/psib%pack_size(1), wgsize, 1_int64/))
597
598 if(.not. optional_default(async, .false.)) call accel_finish()
599 end select
600
601 psib%has_phase = .not. conjugate
602
603 call profiling_out("PHASE_APPLY_BATCH")
604 pop_sub(phase_apply_batch)
605 end subroutine phase_apply_batch
606
612 !
613 subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
614 class(phase_t), intent(in) :: this
615 complex(real64), intent(inout) :: psi(:, :)
616 integer, intent(in) :: np
617 integer, intent(in) :: dim
618 integer, intent(in) :: ik
619 logical, intent(in) :: conjugate
620
621 integer :: idim, ip
622
623 push_sub(phase_apply_mf)
624
625 assert(ik >= lbound(this%phase, dim=2))
626 assert(ik <= ubound(this%phase, dim=2))
627
628 call profiling_in("PHASE_APPLY_SINGLE")
629
630 if (conjugate) then
631 ! Apply the phase that contains both the k-point and vector-potential terms.
632 do idim = 1, dim
633 !$omp parallel do
634 do ip = 1, np
635 psi(ip, idim) = conjg(this%phase(ip, ik))*psi(ip, idim)
636 end do
637 !$omp end parallel do
638 end do
639 else
640 ! Apply the conjugate of (i.e. remove) the phase that contains both the k-point and vector-potential terms.
641 do idim = 1, dim
642 !$omp parallel do
643 do ip = 1, np
644 psi(ip, idim) = this%phase(ip, ik)*psi(ip, idim)
645 end do
646 !$omp end parallel do
647 end do
648 end if
649
650 call profiling_out("PHASE_APPLY_SINGLE")
651
652 pop_sub(phase_apply_mf)
653 end subroutine phase_apply_mf
654
655
656 ! ---------------------------------------------------------------------------------------
658 !
659 subroutine phase_phase_spiral(this, der, psib)
660 class(phase_t), intent(in) :: this
661 type(derivatives_t), intent(in) :: der
662 class(wfs_elec_t), intent(inout) :: psib
663
664 integer :: ip, ii, sp
665 integer, allocatable :: spin_label(:)
666 type(accel_mem_t) :: spin_label_buffer
667 integer(int64) :: wgsize
668
669 push_sub(phase_phase_spiral)
670 call profiling_in("PBC_PHASE_SPIRAL")
671
672 call profiling_count_operations(6*(der%mesh%np_part-der%mesh%np)*psib%nst_linear)
673
674 assert(der%boundaries%spiral)
675 assert(psib%type() == type_cmplx)
676
677 sp = der%mesh%np
678 if (der%mesh%parallel_in_domains) sp = der%mesh%np + der%mesh%pv%np_ghost
679
680
681 select case (psib%status())
682 case (batch_packed)
683
684 !$omp parallel do private(ip, ii)
685 do ip = sp + 1, der%mesh%np_part
686 do ii = 1, psib%nst_linear, 2
687 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
688 psib%zff_pack(ii+1, ip) = psib%zff_pack(ii+1, ip)*this%phase_spiral(ip-sp, 1)
689 else
690 psib%zff_pack(ii, ip) = psib%zff_pack(ii, ip)*this%phase_spiral(ip-sp, 2)
691 end if
692 end do
693 end do
694 !$omp end parallel do
695
696 case (batch_not_packed)
697
698 !$omp parallel private(ii, ip)
699 do ii = 1, psib%nst_linear, 2
700 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
701 !$omp do
702 do ip = sp + 1, der%mesh%np_part
703 psib%zff_linear(ip, ii+1) = psib%zff_linear(ip, ii+1)*this%phase_spiral(ip-sp, 1)
704 end do
705 !$omp end do nowait
706 else
707 !$omp do
708 do ip = sp + 1, der%mesh%np_part
709 psib%zff_linear(ip, ii) = psib%zff_linear(ip, ii)*this%phase_spiral(ip-sp, 2)
710 end do
711 !$omp end do nowait
712 end if
713 end do
714 !$omp end parallel
715
717
718 assert(accel_is_enabled())
719
720 ! generate array of offsets for access of psib and phase_spiral:
721 ! TODO: Move this to the routine where spin(:,:,:) is generated
722 ! and also move the buffer to the GPU at this point to
723 ! avoid unecessary latency here!
724
725 safe_allocate(spin_label(1:psib%nst_linear))
726 spin_label = 0
727 do ii = 1, psib%nst_linear, 2
728 if (this%spin(3, psib%linear_to_ist(ii), psib%ik) > 0) spin_label(ii)=1
729 end do
730
731 call accel_create_buffer(spin_label_buffer, accel_mem_read_only, type_integer, psib%nst_linear)
732 call accel_write_buffer(spin_label_buffer, psib%nst_linear, spin_label)
733
734 call accel_kernel_start_call(kernel_phase_spiral, 'phase_spiral.cu', 'phase_spiral_apply')
735
738 call accel_set_kernel_arg(kernel_phase_spiral, 2, der%mesh%np_part)
739 call accel_set_kernel_arg(kernel_phase_spiral, 3, psib%ff_device)
740 call accel_set_kernel_arg(kernel_phase_spiral, 4, log2(psib%pack_size(1)))
741 call accel_set_kernel_arg(kernel_phase_spiral, 5, this%buff_phase_spiral)
742 call accel_set_kernel_arg(kernel_phase_spiral, 6, spin_label_buffer)
743
744 wgsize = accel_kernel_workgroup_size(kernel_phase_spiral)/psib%pack_size(1)
745
747 (/psib%pack_size(1)/2, pad(der%mesh%np_part - sp, 2*wgsize)/), &
748 (/psib%pack_size(1)/2, 2*wgsize/))
749
750 call accel_finish()
751
752 call accel_release_buffer(spin_label_buffer)
753
754 safe_deallocate_a(spin_label)
755
756 end select
757
758 call profiling_out("PBC_PHASE_SPIRAL")
759 pop_sub(phase_phase_spiral)
760 end subroutine phase_phase_spiral
761
762
763 ! ---------------------------------------------------------------------------------------
764 logical pure function phase_is_allocated(this)
765 class(phase_t), intent(in) :: this
766
767 phase_is_allocated = allocated(this%phase)
768 end function phase_is_allocated
769
770 !----------------------------------------------------------
777 subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
778 class(phase_t), intent(in) :: phase
779 type(grid_t), intent(in) :: gr
780 type(distributed_t), intent(in) :: kpt
781 type(wfs_elec_t), intent(in) :: psib
782 type(wfs_elec_t), intent(out) :: psib_with_phase
783
784 integer :: k_offset, n_boundary_points
785
787
788 call psib%copy_to(psib_with_phase)
789 if (phase%is_allocated()) then
790 call phase%apply_to(gr, gr%np, conjugate = .false., psib = psib_with_phase, src = psib, async=.true.)
791 ! apply phase correction while setting boundary -> memory needs to be
792 ! accessed only once
793 k_offset = psib%ik - kpt%start
794 n_boundary_points = int(gr%np_part - gr%np)
795 call boundaries_set(gr%der%boundaries, gr, psib_with_phase, phase_correction = phase%phase_corr(:, psib%ik), &
796 buff_phase_corr = phase%buff_phase_corr, offset=k_offset*n_boundary_points, async=.true.)
797 else
798 call psib%copy_data_to(gr%np, psib_with_phase)
799 call boundaries_set(gr%der%boundaries, gr, psib_with_phase)
800 end if
801
802 call psib_with_phase%do_pack(copy = .true.)
803
805 end subroutine phase_copy_and_set_phase
806
807
808end module phase_oct_m
809
810!! Local Variables:
811!! mode: f90
812!! coding: utf-8
813!! End:
double exp(double __x) __attribute__((__nothrow__
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1269
subroutine, public accel_finish()
Definition: accel.F90:952
integer, parameter, public accel_mem_read_write
Definition: accel.F90:182
integer pure function, public accel_max_size_per_dim(dim)
Definition: accel.F90:1303
type(accel_kernel_t), target, save, public kernel_phase_spiral
Definition: accel.F90:273
subroutine, public accel_release_buffer(this, async)
Definition: accel.F90:879
pure logical function, public accel_is_enabled()
Definition: accel.F90:392
integer function, public accel_kernel_workgroup_size(kernel)
Definition: accel.F90:1044
integer, parameter, public accel_mem_read_only
Definition: accel.F90:182
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
real(real64), parameter, public m_zero
Definition: global.F90:191
complex(real64), parameter, public m_zi
Definition: global.F90:205
real(real64), parameter, public m_one
Definition: global.F90:192
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer(int64) function, public mesh_periodic_point(mesh, space, ip)
This function returns the point inside the grid corresponding to a boundary point when PBCs are used....
Definition: mesh.F90:724
real(real64) function, dimension(1:mesh%box%dim), public mesh_x_global(mesh, ipg)
Given a global point index, this function returns the coordinates of the point.
Definition: mesh.F90:817
subroutine phase_phase_spiral(this, der, psib)
apply spiral phase
Definition: phase.F90:755
subroutine phase_unset_phase_corr(phase, mesh, psib, async)
unset the phase correction (if necessary)
Definition: phase.F90:514
subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
Copy a batch to another batch and apply the Bloch phase to it.
Definition: phase.F90:873
subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
Initiliaze the phase arrays and copy to GPU the data.
Definition: phase.F90:223
subroutine phase_end(phase)
Releases the memory of the phase object.
Definition: phase.F90:464
subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
Update the phases.
Definition: phase.F90:334
logical pure function phase_is_allocated(this)
Definition: phase.F90:860
subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
apply (remove) the phase to the wave functions before (after) applying the Hamiltonian
Definition: phase.F90:539
subroutine phase_set_phase_corr(phase, mesh, psib, async)
set the phase correction (if necessary)
Definition: phase.F90:488
subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
apply (or remove) the phase to a wave function psi
Definition: phase.F90:709
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
This module handles spin dimensions of the states and the k-point distribution.
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_cmplx
Definition: types.F90:136
type(type_t), public type_integer
Definition: types.F90:137
class representing derivatives
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
Describes mesh distribution to nodes.
Definition: mesh.F90:187
A container for the phase.
Definition: phase.F90:180
class for organizing spins and k-points
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)