Octopus
phase.F90
Go to the documentation of this file.
1!! Copyright (C) 2009 X. Andrade
2!! Copyright (C) 2024 N. Tancogne-Dejean
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module phase_oct_m
23 use accel_oct_m
24 use batch_oct_m
26 use comm_oct_m
27 use debug_oct_m
31 use global_oct_m
32 use grid_oct_m
35 use math_oct_m
36 use mesh_oct_m
38 use mpi_oct_m
40 use space_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50
51 public :: &
53
84 type phase_t
85 private
86 complex(real64), allocatable :: phase(:, :)
89 complex(real64), public, allocatable :: phase_corr(:,:)
92 complex(real64), allocatable :: phase_spiral(:,:)
95 type(accel_mem_t) :: buff_phase
96 type(accel_mem_t) :: buff_phase_spiral
97 type(accel_mem_t), public :: buff_phase_corr
98 integer :: buff_phase_qn_start
99 real(real64), public, pointer :: spin(:,:,:) => null()
100 contains
101 procedure :: init => phase_init_phases
102
103 procedure :: update => phase_update_phases
104
105 procedure :: end => phase_end
106
107 procedure :: set_phase_corr => phase_set_phase_corr
108
109 procedure :: unset_phase_corr => phase_unset_phase_corr
110
111 procedure :: apply_to => phase_apply_batch
112
113 procedure :: apply_to_single => phase_apply_mf
114
115 procedure :: apply_phase_spiral => phase_phase_spiral
116
117 procedure :: is_allocated => phase_is_allocated
118 end type phase_t
119
120contains
121
122 ! ---------------------------------------------------------
124 subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
125 class(phase_t), intent(inout) :: phase
126 type(grid_t), intent(in) :: gr
127 type(distributed_t), intent(in) :: kpt
128 type(kpoints_t), intent(in) :: kpoints
129 type(states_elec_dim_t), intent(in) :: d
130 type(space_t), intent(in) :: space
131
132 integer :: ip, ik, sp
133 integer(int64) :: ip_inner_global
134 real(real64) :: kpoint(space%dim), x_global(space%dim)
135
136 push_sub(phase_init_phases)
137
138 ! no e^ik phase needed for Gamma-point-only periodic calculations
139 ! unless for velocity-gauge for lasers
140 if (accel_is_enabled()) then
141 phase%buff_phase_qn_start = kpt%start
142 end if
143 if(kpoints%gamma_only()) then
144 pop_sub(phase_init_phases)
145 return
146 end if
147
148 safe_allocate(phase%phase(1:gr%np_part, kpt%start:kpt%end))
149 safe_allocate(phase%phase_corr(gr%np+1:gr%np_part, kpt%start:kpt%end))
150 !$omp parallel private(ip)
151 do ik = kpt%start, kpt%end
152 !$omp do
153 do ip = gr%np + 1, gr%np_part
154 phase%phase_corr(ip, ik) = m_one
155 end do
156 !$omp end do nowait
157 end do
158 !$omp end parallel
159
160 if (gr%der%boundaries%spiralBC) then
161 sp = gr%np
162 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
163
164 ! We decided to allocate the array from 1:np_part-sp as this is less error prone when passing
165 ! the array to other routines, or in particular creating a C-style pointer from phase_spiral(1,1).
166 ! We will also update phase_corr and possible other similar arrays.
167
168 safe_allocate(phase%phase_spiral(1:gr%np_part-sp, 1:2))
169
170 ! loop over boundary points
171 do ip = sp + 1, gr%np_part
172 ! get corresponding inner point
173 ip_inner_global = mesh_periodic_point(gr, space, ip)
174 x_global = mesh_x_global(gr, ip_inner_global)
175 phase%phase_spiral(ip-sp, 1) = &
176 exp(m_zi * sum((gr%x(ip, 1:space%dim)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
177 phase%phase_spiral(ip-sp, 2) = &
178 exp(-m_zi * sum((gr%x(ip, 1:space%dim)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
179 end do
180
182 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, (gr%np_part-sp)*2)
183 call accel_write_buffer(phase%buff_phase_spiral, gr%np_part-sp, 2, phase%phase_spiral)
184 end if
185 end if
186
188 kpoint(1:space%dim) = m_zero
189
190 sp = gr%np
191 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
193 do ik = kpt%start, kpt%end
194 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
195 !$omp parallel private(ip, ip_inner_global, x_global)
196 !$omp do
197 do ip = 1, gr%np_part
198 phase%phase(ip, ik) = exp(-m_zi * sum(gr%x(ip, 1:space%dim) * kpoint(1:space%dim)))
199 end do
200 !$omp end do
201
202 ! loop over boundary points
203 !$omp do
204 do ip = sp + 1, gr%np_part
205 ! get corresponding inner point
206 ip_inner_global = mesh_periodic_point(gr, space, ip)
207
208 ! compute phase correction from global coordinate (opposite sign!)
209 x_global = mesh_x_global(gr, ip_inner_global)
210 phase%phase_corr(ip, ik) = phase%phase(ip, ik)* &
211 exp(m_zi * sum(x_global(1:space%dim) * kpoint(1:space%dim)))
212 end do
213 !$omp end do nowait
214 !$omp end parallel
215 end do
216
217 if (accel_is_enabled()) then
218 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, gr%np_part*kpt%nlocal)
219 call accel_write_buffer(phase%buff_phase, gr%np_part, kpt%nlocal, phase%phase)
220 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, (gr%np_part - gr%np)*kpt%nlocal)
221 call accel_write_buffer(phase%buff_phase_corr, gr%np_part - gr%np, kpt%nlocal, phase%phase_corr)
222 end if
223
224 pop_sub(phase_init_phases)
225 end subroutine phase_init_phases
226
227 ! ----------------------------------------------------------------------------------
229 subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
230 class(phase_t), intent(inout) :: phase
231 class(mesh_t), intent(in) :: mesh
232 type(distributed_t), intent(in) :: kpt
233 type(kpoints_t), intent(in) :: kpoints
234 type(states_elec_dim_t), intent(in) :: d
235 type(space_t), intent(in) :: space
236 real(real64), allocatable, intent(in) :: uniform_vector_potential(:)
237
238 integer :: ik, ip, sp, wgsize
239 integer(int64) :: ip_inner_global
240 real(real64) :: kpoint(space%dim)
241 real(real64), allocatable :: x_global(:,:), kpt_vec_pot(:,:)
242 type(accel_mem_t) :: buff_vec_pot, buff_x_global, buff_x
243 type(accel_kernel_t), save :: kernel
244
245 if (.not. allocated(uniform_vector_potential)) return
246
247 push_sub_with_profile(phase_update_phases)
248
249
250 if (.not. allocated(phase%phase)) then
251 safe_allocate(phase%phase(1:mesh%np_part, kpt%start:kpt%end))
252 if (accel_is_enabled()) then
253 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, &
254 mesh%np_part*kpt%nlocal)
255 end if
256 end if
257
258 if (.not. allocated(phase%phase_corr)) then
259 safe_allocate(phase%phase_corr(mesh%np+1:mesh%np_part, kpt%start:kpt%end))
260 if (accel_is_enabled()) then
261 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
262 (mesh%np_part - mesh%np)*kpt%nlocal)
263 end if
264 end if
265
266 ! TODO: We should not recompute this every time-step. We should store it.
267
268 ! loop over boundary points
269 sp = mesh%np
270 ! skip ghost points
271 if (mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
272
273 safe_allocate(x_global(1:space%dim,(sp + 1):mesh%np_part))
274 !$omp parallel do schedule(static) private(ip_inner_global)
275 do ip = sp + 1, mesh%np_part
276 ! get corresponding inner point
277 ip_inner_global = mesh_periodic_point(mesh, space, ip)
278
279 ! compute the difference between the global coordinate and the local coordinate
280 x_global(:,ip) = mesh_x_global(mesh, ip_inner_global) - mesh%x(ip, 1:space%dim)
281 end do
282
283 if (.not. accel_is_enabled()) then
284 !$omp parallel private(ik, ip, kpoint)
285 do ik = kpt%start, kpt%end
286 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
287 !We add the vector potential
288 kpoint(1:space%dim) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
289
290 !$omp do schedule(static)
291 do ip = 1, mesh%np_part
292 phase%phase(ip, ik) = exp(cmplx(m_zero, -sum(mesh%x(ip, 1:space%dim)*kpoint(1:space%dim)), real64))
293 end do
294 !$omp end do
295
296 !$omp do schedule(static)
297 do ip = sp + 1, mesh%np_part
298 phase%phase_corr(ip, ik) = exp(cmplx(m_zero, sum(x_global(1:space%dim, ip) * kpoint(1:space%dim)), real64))
299 end do
300 !$omp end do nowait
301 end do
302 !$omp end parallel
303
304 else !accel_is enabled
305
306 call accel_create_buffer(buff_vec_pot, accel_mem_read_only, type_float, space%dim*kpt%nlocal)
307 safe_allocate(kpt_vec_pot(1:space%dim,kpt%start:kpt%end))
308 do ik = kpt%start, kpt%end
309 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
310 kpt_vec_pot(1:space%dim, ik) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
311 end do
312 call accel_write_buffer(buff_vec_pot, space%dim, kpt%nlocal, kpt_vec_pot, async=.true.)
313
314 ! Note: this should be globally stored
315 call accel_create_buffer(buff_x, accel_mem_read_only, type_float, space%dim*(mesh%np_part))
316 call accel_write_buffer(buff_x, mesh%np_part, space%dim, mesh%x, async=.true.)
317
318 call accel_create_buffer(buff_x_global, accel_mem_read_only, type_float, space%dim*(mesh%np_part-sp))
319 call accel_write_buffer(buff_x_global, space%dim, mesh%np_part-sp, x_global(1:space%dim,(sp + 1):mesh%np_part), async=.true.)
320
321 call accel_kernel_start_call(kernel, 'phase.cl', 'update_phases')
322
323 call accel_set_kernel_arg(kernel, 0, space%dim)
324 call accel_set_kernel_arg(kernel, 1, mesh%np)
325 call accel_set_kernel_arg(kernel, 2, mesh%np_part)
326 call accel_set_kernel_arg(kernel, 3, kpt%start)
327 call accel_set_kernel_arg(kernel, 4, kpt%end)
328 call accel_set_kernel_arg(kernel, 5, sp)
329 call accel_set_kernel_arg(kernel, 6, buff_vec_pot)
330 call accel_set_kernel_arg(kernel, 7, buff_x)
331 call accel_set_kernel_arg(kernel, 8, buff_x_global)
332 call accel_set_kernel_arg(kernel, 9, phase%buff_phase)
333 call accel_set_kernel_arg(kernel, 10, phase%buff_phase_corr)
334
335 wgsize = accel_kernel_workgroup_size(kernel)
336
337 call accel_kernel_run(kernel, (/pad(mesh%np_part, wgsize), kpt%nlocal/), (/wgsize, 1/))
338
339 call accel_read_buffer(phase%buff_phase, mesh%np_part, kpt%nlocal, phase%phase, async=.true.)
340 call accel_read_buffer(phase%buff_phase_corr, mesh%np_part - mesh%np, kpt%nlocal, phase%phase_corr)
341
342 call accel_release_buffer(buff_vec_pot)
343 call accel_release_buffer(buff_x)
344 call accel_release_buffer(buff_x_global)
345 safe_deallocate_a(kpt_vec_pot)
346 end if
347
348 safe_deallocate_a(x_global)
349
350 pop_sub_with_profile(phase_update_phases)
351 end subroutine phase_update_phases
352
353 ! ----------------------------------------------------------------------------------
355 subroutine phase_end(phase)
356 class(phase_t), intent(inout) :: phase
357
358 push_sub(phase_end)
359
360 if (phase%is_allocated() .and. accel_is_enabled()) then
361 call accel_release_buffer(phase%buff_phase)
362 call accel_release_buffer(phase%buff_phase_corr)
363 end if
364
365 if (allocated(phase%phase_spiral) .and. accel_is_enabled()) then
366 call accel_release_buffer(phase%buff_phase_spiral)
367 end if
368
369 safe_deallocate_a(phase%phase)
370 safe_deallocate_a(phase%phase_corr)
371 safe_deallocate_a(phase%phase_spiral)
372
373 pop_sub(phase_end)
374 end subroutine phase_end
375
376 ! ----------------------------------------------------------------------------------
378 !
379 subroutine phase_set_phase_corr(phase, mesh, psib, async)
380 class(phase_t), intent(in) :: phase
381 class(mesh_t), intent(in) :: mesh
382 type(wfs_elec_t), intent(inout) :: psib
383 logical, optional, intent(in) :: async
384
385
386 logical :: phase_correction
387
388 push_sub(phase_set_phase_corr)
389
390 ! check if we only want a phase correction for the boundary points
391 phase_correction = phase%is_allocated()
392
393 !We apply the phase only to np points, and the phase for the np+1 to np_part points
394 !will be treated as a phase correction in the Hamiltonian
395 if (phase_correction) then
396 call phase%apply_to(mesh, mesh%np, .false., psib, async=async)
397 end if
398
399 pop_sub(phase_set_phase_corr)
400 end subroutine phase_set_phase_corr
401
402 ! ----------------------------------------------------------------------------------
404 !
405 subroutine phase_unset_phase_corr(phase, mesh, psib, async)
406 class(phase_t), intent(in) :: phase
407 class(mesh_t), intent(in) :: mesh
408 type(wfs_elec_t), intent(inout) :: psib
409 logical, optional, intent(in) :: async
410
411 logical :: phase_correction
412
413 push_sub(phase_unset_phase_corr)
414
415 ! check if we only want a phase correction for the boundary points
416 phase_correction = phase%is_allocated()
417
418 !We apply the phase only to np points, and the phase for the np+1 to np_part points
419 !will be treated as a phase correction in the Hamiltonian
420 if (phase_correction) then
421 call phase%apply_to(mesh, mesh%np, .true., psib, async=async)
422 end if
423
425 end subroutine phase_unset_phase_corr
426
427 ! ---------------------------------------------------------------------------------------
429 !
430 subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
431 class(phase_t), intent(in) :: this
432 class(mesh_t), intent(in) :: mesh
433 integer, intent(in) :: np
434 logical, intent(in) :: conjugate
435 type(wfs_elec_t), target, intent(inout) :: psib
436 type(wfs_elec_t), optional, target, intent(in) :: src
437 logical, optional, intent(in) :: async
438
439 integer :: ip, ii, sp
440 type(wfs_elec_t), pointer :: src_
441 complex(real64) :: phase
442 integer(int64) :: wgsize, dim2, dim3
443 type(accel_kernel_t), save :: ker_phase
444
445 push_sub(phase_apply_batch)
446 call profiling_in("PHASE_APPLY_BATCH")
447
448 call profiling_count_operations(6*np*psib%nst_linear)
449
450 assert(np <= mesh%np_part)
451 assert(psib%type() == type_cmplx)
452 assert(psib%ik >= lbound(this%phase, dim=2))
453 assert(psib%ik <= ubound(this%phase, dim=2))
454
455 src_ => psib
456 if (present(src)) src_ => src
457
458 assert(src_%has_phase .eqv. conjugate)
459 assert(src_%ik == psib%ik)
460 assert(src_%type() == type_cmplx)
461
462 ! We want to skip the ghost points for setting the phase
463 sp = min(np, mesh%np)
464 if (np > mesh%np .and. mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
465
466 select case (psib%status())
467 case (batch_packed)
468
469 if (conjugate) then
470
471 !$omp parallel private(ii, phase)
472 !$omp do
473 do ip = 1, min(mesh%np, np)
474 phase = conjg(this%phase(ip, psib%ik))
475 !$omp simd
476 do ii = 1, psib%nst_linear
477 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
478 end do
479 end do
480 !$omp end do nowait
481
482 ! Boundary points, if requested
483 !$omp do
484 do ip = sp+1, np
485 phase = conjg(this%phase(ip, psib%ik))
486 !$omp simd
487 do ii = 1, psib%nst_linear
488 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
489 end do
490 end do
491 !$omp end parallel
492
493 else
494
495 !$omp parallel private(ii, phase)
496 !$omp do
497 do ip = 1, min(mesh%np, np)
498 phase = this%phase(ip, psib%ik)
499 !$omp simd
500 do ii = 1, psib%nst_linear
501 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
502 end do
503 end do
504 !$omp end do nowait
505
506 ! Boundary points, if requested
507 !$omp do
508 do ip = sp+1, np
509 phase = this%phase(ip, psib%ik)
510 !$omp simd
511 do ii = 1, psib%nst_linear
512 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
513 end do
514 end do
515 !$omp end parallel
516
517 end if
518
519 case (batch_not_packed)
520
521 if (conjugate) then
522
523 !$omp parallel private(ii, ip)
524 do ii = 1, psib%nst_linear
525 !$omp do simd
526 do ip = 1, min(mesh%np, np)
527 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
528 end do
529 !$omp end do simd nowait
530
531 ! Boundary points, if requested
532 !$omp do simd
533 do ip = sp+1, np
534 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
535 end do
536 !$omp end do simd nowait
537 end do
538 !$omp end parallel
539
540 else
541 !$omp parallel private(ii, ip)
542 do ii = 1, psib%nst_linear
543 !$omp do simd
544 do ip = 1, min(mesh%np, np)
545 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
546 end do
547 !$omp end do simd nowait
548
549 ! Boundary points, if requested
550 !$omp do simd
551 do ip = sp+1, np
552 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
553 end do
554 !$omp end do simd nowait
555
556 end do
557 !$omp end parallel
558
559 end if
560
562 call accel_kernel_start_call(ker_phase, 'phase.cl', 'phase_hamiltonian')
563
564 if (conjugate) then
565 call accel_set_kernel_arg(ker_phase, 0, 1_4)
566 else
567 call accel_set_kernel_arg(ker_phase, 0, 0_4)
568 end if
569
570 call accel_set_kernel_arg(ker_phase, 1, (psib%ik - this%buff_phase_qn_start)*mesh%np_part)
571 call accel_set_kernel_arg(ker_phase, 2, np)
572 call accel_set_kernel_arg(ker_phase, 3, this%buff_phase)
573 call accel_set_kernel_arg(ker_phase, 4, src_%ff_device)
574 call accel_set_kernel_arg(ker_phase, 5, log2(int(src_%pack_size(1), int32)))
575 call accel_set_kernel_arg(ker_phase, 6, psib%ff_device)
576 call accel_set_kernel_arg(ker_phase, 7, log2(int(psib%pack_size(1), int32)))
577
578 wgsize = accel_kernel_workgroup_size(ker_phase)/psib%pack_size(1)
579
580 dim3 = np/(accel_max_size_per_dim(2)*wgsize) + 1
581 dim2 = min(accel_max_size_per_dim(2)*wgsize, pad(np, wgsize))
582
583 call accel_kernel_run(ker_phase, (/psib%pack_size(1), dim2, dim3/), (/psib%pack_size(1), wgsize, 1_int64/))
584
585 if(.not. optional_default(async, .false.)) call accel_finish()
586 end select
587
588 psib%has_phase = .not. conjugate
589
590 call profiling_out("PHASE_APPLY_BATCH")
591 pop_sub(phase_apply_batch)
592 end subroutine phase_apply_batch
593
599 !
600 subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
601 class(phase_t), intent(in) :: this
602 complex(real64), intent(inout) :: psi(:, :)
603 integer, intent(in) :: np
604 integer, intent(in) :: dim
605 integer, intent(in) :: ik
606 logical, intent(in) :: conjugate
607
608 integer :: idim, ip
609
610 push_sub(phase_apply_mf)
611
612 assert(ik >= lbound(this%phase, dim=2))
613 assert(ik <= ubound(this%phase, dim=2))
614
615 call profiling_in("PHASE_APPLY_SINGLE")
616
617 if (conjugate) then
618 ! Apply the phase that contains both the k-point and vector-potential terms.
619 do idim = 1, dim
620 !$omp parallel do
621 do ip = 1, np
622 psi(ip, idim) = conjg(this%phase(ip, ik))*psi(ip, idim)
623 end do
624 !$omp end parallel do
625 end do
626 else
627 ! Apply the conjugate of (i.e. remove) the phase that contains both the k-point and vector-potential terms.
628 do idim = 1, dim
629 !$omp parallel do
630 do ip = 1, np
631 psi(ip, idim) = this%phase(ip, ik)*psi(ip, idim)
632 end do
633 !$omp end parallel do
634 end do
635 end if
636
637 call profiling_out("PHASE_APPLY_SINGLE")
638
639 pop_sub(phase_apply_mf)
640 end subroutine phase_apply_mf
641
642
643 ! ---------------------------------------------------------------------------------------
645 !
646 subroutine phase_phase_spiral(this, der, psib)
647 class(phase_t), intent(in) :: this
648 type(derivatives_t), intent(in) :: der
649 class(wfs_elec_t), intent(inout) :: psib
650
651 integer :: ip, ii, sp
652 integer, allocatable :: spin_label(:)
653 type(accel_mem_t) :: spin_label_buffer
654 integer(int64) :: wgsize
655
656 push_sub(phase_phase_spiral)
657 call profiling_in("PBC_PHASE_SPIRAL")
658
659 call profiling_count_operations(6*(der%mesh%np_part-der%mesh%np)*psib%nst_linear)
660
661 assert(der%boundaries%spiral)
662 assert(psib%type() == type_cmplx)
663
664 sp = der%mesh%np
665 if (der%mesh%parallel_in_domains) sp = der%mesh%np + der%mesh%pv%np_ghost
666
667
668 select case (psib%status())
669 case (batch_packed)
670
671 !$omp parallel do private(ip, ii)
672 do ip = sp + 1, der%mesh%np_part
673 do ii = 1, psib%nst_linear, 2
674 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
675 psib%zff_pack(ii+1, ip) = psib%zff_pack(ii+1, ip)*this%phase_spiral(ip-sp, 1)
676 else
677 psib%zff_pack(ii, ip) = psib%zff_pack(ii, ip)*this%phase_spiral(ip-sp, 2)
678 end if
679 end do
680 end do
681 !$omp end parallel do
682
683 case (batch_not_packed)
684
685 !$omp parallel private(ii, ip)
686 do ii = 1, psib%nst_linear, 2
687 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
688 !$omp do
689 do ip = sp + 1, der%mesh%np_part
690 psib%zff_linear(ip, ii+1) = psib%zff_linear(ip, ii+1)*this%phase_spiral(ip-sp, 1)
691 end do
692 !$omp end do nowait
693 else
694 !$omp do
695 do ip = sp + 1, der%mesh%np_part
696 psib%zff_linear(ip, ii) = psib%zff_linear(ip, ii)*this%phase_spiral(ip-sp, 2)
697 end do
698 !$omp end do nowait
699 end if
700 end do
701 !$omp end parallel
702
704
705 assert(accel_is_enabled())
706
707 ! generate array of offsets for access of psib and phase_spiral:
708 ! TODO: Move this to the routine where spin(:,:,:) is generated
709 ! and also move the buffer to the GPU at this point to
710 ! avoid unecessary latency here!
711
712 safe_allocate(spin_label(1:psib%nst_linear))
713 spin_label = 0
714 do ii = 1, psib%nst_linear, 2
715 if (this%spin(3, psib%linear_to_ist(ii), psib%ik) > 0) spin_label(ii)=1
716 end do
717
718 call accel_create_buffer(spin_label_buffer, accel_mem_read_only, type_integer, psib%nst_linear)
719 call accel_write_buffer(spin_label_buffer, psib%nst_linear, spin_label)
720
721 call accel_kernel_start_call(kernel_phase_spiral, 'phase_spiral.cl', 'phase_spiral_apply')
722
725 call accel_set_kernel_arg(kernel_phase_spiral, 2, der%mesh%np_part)
726 call accel_set_kernel_arg(kernel_phase_spiral, 3, psib%ff_device)
727 call accel_set_kernel_arg(kernel_phase_spiral, 4, log2(psib%pack_size(1)))
728 call accel_set_kernel_arg(kernel_phase_spiral, 5, this%buff_phase_spiral)
729 call accel_set_kernel_arg(kernel_phase_spiral, 6, spin_label_buffer)
730
731 wgsize = accel_kernel_workgroup_size(kernel_phase_spiral)/psib%pack_size(1)
732
734 (/psib%pack_size(1)/2, pad(der%mesh%np_part - sp, 2*wgsize)/), &
735 (/psib%pack_size(1)/2, 2*wgsize/))
736
737 call accel_finish()
738
739 call accel_release_buffer(spin_label_buffer)
740
741 safe_deallocate_a(spin_label)
742
743 end select
744
745 call profiling_out("PBC_PHASE_SPIRAL")
746 pop_sub(phase_phase_spiral)
747 end subroutine phase_phase_spiral
748
749
750 ! ---------------------------------------------------------------------------------------
751 logical pure function phase_is_allocated(this)
752 class(phase_t), intent(in) :: this
753
754 phase_is_allocated = allocated(this%phase)
755 end function phase_is_allocated
756
757end module phase_oct_m
758
759!! Local Variables:
760!! mode: f90
761!! coding: utf-8
762!! End:
double exp(double __x) __attribute__((__nothrow__
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1375
subroutine, public accel_finish()
Definition: accel.F90:984
integer, parameter, public accel_mem_read_write
Definition: accel.F90:195
integer pure function, public accel_max_size_per_dim(dim)
Definition: accel.F90:1410
type(accel_kernel_t), target, save, public kernel_phase_spiral
Definition: accel.F90:295
subroutine, public accel_release_buffer(this, async)
Definition: accel.F90:918
pure logical function, public accel_is_enabled()
Definition: accel.F90:418
integer function, public accel_kernel_workgroup_size(kernel)
Definition: accel.F90:1104
integer, parameter, public accel_mem_read_only
Definition: accel.F90:195
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
real(real64), parameter, public m_zero
Definition: global.F90:190
complex(real64), parameter, public m_zi
Definition: global.F90:204
real(real64), parameter, public m_one
Definition: global.F90:191
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer(int64) function, public mesh_periodic_point(mesh, space, ip)
This function returns the point inside the grid corresponding to a boundary point when PBCs are used....
Definition: mesh.F90:722
real(real64) function, dimension(1:mesh%box%dim), public mesh_x_global(mesh, ipg)
Definition: mesh.F90:814
subroutine phase_phase_spiral(this, der, psib)
apply spiral phase
Definition: phase.F90:742
subroutine phase_unset_phase_corr(phase, mesh, psib, async)
unset the phase correction (if necessary)
Definition: phase.F90:501
subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
Initiliaze the phase arrays and copy to GPU the data.
Definition: phase.F90:220
subroutine phase_end(phase)
Releases the memory of the phase object.
Definition: phase.F90:451
subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
Update the phases.
Definition: phase.F90:325
logical pure function phase_is_allocated(this)
Definition: phase.F90:847
subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
apply (remove) the phase to the wave functions before (after) applying the Hamiltonian
Definition: phase.F90:526
subroutine phase_set_phase_corr(phase, mesh, psib, async)
set the phase correction (if necessary)
Definition: phase.F90:475
subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
apply (or remove) the phase to a wave function psi
Definition: phase.F90:696
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:625
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
This module handles spin dimensions of the states and the k-point distribution.
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_cmplx
Definition: types.F90:136
type(type_t), public type_integer
Definition: types.F90:137
class representing derivatives
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Describes mesh distribution to nodes.
Definition: mesh.F90:188
A container for the phase.
Definition: phase.F90:179
class for organizing spins and k-points
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)