Octopus
phase.F90
Go to the documentation of this file.
1!! Copyright (C) 2009 X. Andrade
2!! Copyright (C) 2024 N. Tancogne-Dejean
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module phase_oct_m
23 use accel_oct_m
24 use batch_oct_m
26 use comm_oct_m
27 use debug_oct_m
31 use global_oct_m
32 use grid_oct_m
35 use math_oct_m
36 use mesh_oct_m
38 use mpi_oct_m
40 use space_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50
51 public :: &
53
84 type phase_t
85 private
86 complex(real64), allocatable :: phase(:, :)
89 complex(real64), public, allocatable :: phase_corr(:,:)
92 complex(real64), allocatable :: phase_spiral(:,:)
95 type(accel_mem_t) :: buff_phase
96 type(accel_mem_t) :: buff_phase_spiral
97 type(accel_mem_t), public :: buff_phase_corr
98 integer :: buff_phase_qn_start
99 real(real64), public, pointer :: spin(:,:,:) => null()
100 contains
101 procedure :: init => phase_init_phases
102
103 procedure :: update => phase_update_phases
104
105 procedure :: end => phase_end
106
107 procedure :: set_phase_corr => phase_set_phase_corr
108
109 procedure :: unset_phase_corr => phase_unset_phase_corr
110
111 procedure :: apply_to => phase_apply_batch
112
113 procedure :: apply_to_single => phase_apply_mf
114
115 procedure :: apply_phase_spiral => phase_phase_spiral
116
117 procedure :: is_allocated => phase_is_allocated
118 end type phase_t
119
120contains
121
122 ! ---------------------------------------------------------
124 subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
125 class(phase_t), intent(inout) :: phase
126 class(mesh_t), intent(in) :: gr
127 type(distributed_t), intent(in) :: kpt
128 type(kpoints_t), intent(in) :: kpoints
129 type(states_elec_dim_t), intent(in) :: d
130 type(space_t), intent(in) :: space
131
132 integer :: ip, ik, sp
133 integer(int64) :: ip_inner_global
134 real(real64) :: kpoint(space%dim), x_global(space%dim)
135
136 push_sub(phase_init_phases)
137
138 ! no e^ik phase needed for Gamma-point-only periodic calculations
139 ! unless for velocity-gauge for lasers
140 if (accel_is_enabled()) then
141 phase%buff_phase_qn_start = kpt%start
142 end if
143 if(kpoints%gamma_only()) then
144 pop_sub(phase_init_phases)
145 return
146 end if
147
148 safe_allocate(phase%phase(1:gr%np_part, kpt%start:kpt%end))
149 safe_allocate(phase%phase_corr(gr%np+1:gr%np_part, kpt%start:kpt%end))
150 !$omp parallel private(ip)
151 do ik = kpt%start, kpt%end
152 !$omp do
153 do ip = gr%np + 1, gr%np_part
154 phase%phase_corr(ip, ik) = m_one
155 end do
156 !$omp end do nowait
157 end do
158 !$omp end parallel
159
160 ! Only when gr is a grid_t type, we can access gr%der
161 select type(gr)
162 class is(grid_t)
163 if (gr%der%boundaries%spiralBC) then
164 sp = gr%np
165 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
166
167 ! We decided to allocate the array from 1:np_part-sp as this is less error prone when passing
168 ! the array to other routines, or in particular creating a C-style pointer from phase_spiral(1,1).
169 ! We will also update phase_corr and possible other similar arrays.
170
171 safe_allocate(phase%phase_spiral(1:gr%np_part-sp, 1:2))
172
173 ! loop over boundary points
174 do ip = sp + 1, gr%np_part
175 ! get corresponding inner point
176 ip_inner_global = mesh_periodic_point(gr, space, ip)
177 x_global = mesh_x_global(gr, ip_inner_global)
178 phase%phase_spiral(ip-sp, 1) = &
179 exp(m_zi * sum((gr%x(ip, 1:space%dim)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
180 phase%phase_spiral(ip-sp, 2) = &
181 exp(-m_zi * sum((gr%x(ip, 1:space%dim)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
182 end do
183
185 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, (gr%np_part-sp)*2)
186 call accel_write_buffer(phase%buff_phase_spiral, gr%np_part-sp, 2, phase%phase_spiral)
187 end if
188 end if
189 class default
190 ! Do nothing
191 end select
194 kpoint(1:space%dim) = m_zero
195
196 sp = gr%np
197 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
199 !$omp parallel private(ip, ip_inner_global, x_global, kpoint)
200 do ik = kpt%start, kpt%end
201 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
202 !$omp do
203 do ip = 1, gr%np_part
204 phase%phase(ip, ik) = exp(-m_zi * sum(gr%x(ip, 1:space%dim) * kpoint(1:space%dim)))
205 end do
206 !$omp end do
207
208 ! loop over boundary points
209 !$omp do
210 do ip = sp + 1, gr%np_part
211 ! get corresponding inner point
212 ip_inner_global = mesh_periodic_point(gr, space, ip)
213
214 ! compute phase correction from global coordinate (opposite sign!)
215 x_global = mesh_x_global(gr, ip_inner_global)
216 phase%phase_corr(ip, ik) = phase%phase(ip, ik)* &
217 exp(m_zi * sum(x_global(1:space%dim) * kpoint(1:space%dim)))
218 end do
219 !$omp end do nowait
220 end do
221 !$omp end parallel
222
223 if (accel_is_enabled()) then
224 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, gr%np_part*kpt%nlocal)
225 call accel_write_buffer(phase%buff_phase, gr%np_part, kpt%nlocal, phase%phase)
226 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, (gr%np_part - gr%np)*kpt%nlocal)
227 call accel_write_buffer(phase%buff_phase_corr, gr%np_part - gr%np, kpt%nlocal, phase%phase_corr)
228 end if
229
230 pop_sub(phase_init_phases)
231 end subroutine phase_init_phases
232
233 ! ----------------------------------------------------------------------------------
235 subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
236 class(phase_t), intent(inout) :: phase
237 class(mesh_t), intent(in) :: mesh
238 type(distributed_t), intent(in) :: kpt
239 type(kpoints_t), intent(in) :: kpoints
240 type(states_elec_dim_t), intent(in) :: d
241 type(space_t), intent(in) :: space
242 real(real64), allocatable, intent(in) :: uniform_vector_potential(:)
243
244 integer :: ik, ip, sp, wgsize
245 integer(int64) :: ip_inner_global
246 real(real64) :: kpoint(space%dim)
247 real(real64), allocatable :: x_global(:,:), kpt_vec_pot(:,:), x(:,:)
248 type(accel_mem_t) :: buff_vec_pot, buff_x_global, buff_x
249 type(accel_kernel_t), save :: kernel
250 real(real64) :: tmp_sum
251
252 if (.not. allocated(uniform_vector_potential)) return
253
254 push_sub_with_profile(phase_update_phases)
255
256
257 if (.not. allocated(phase%phase)) then
258 safe_allocate(phase%phase(1:mesh%np_part, kpt%start:kpt%end))
259 if (accel_is_enabled()) then
260 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, &
261 mesh%np_part*kpt%nlocal)
262 end if
263 end if
264
265 if (.not. allocated(phase%phase_corr)) then
266 safe_allocate(phase%phase_corr(mesh%np+1:mesh%np_part, kpt%start:kpt%end))
267 if (accel_is_enabled()) then
268 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
269 (mesh%np_part - mesh%np)*kpt%nlocal)
270 end if
271 end if
272
273 ! TODO: We should not recompute this every time-step. We should store it.
274
275 ! loop over boundary points
276 sp = mesh%np
277 ! skip ghost points
278 if (mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
279
280 safe_allocate(x_global(1:space%dim,(sp + 1):mesh%np_part))
281
282 ! TODO: Introduce a permanent value and start replacing mesh%x by mesh%x_trans
283 safe_allocate(x(1:space%dim, 1:mesh%np_part))
284 x = transpose(mesh%x)
285
286 !$omp parallel do schedule(static) private(ip_inner_global)
287 do ip = sp + 1, mesh%np_part
288 ! get corresponding inner point
289 ip_inner_global = mesh_periodic_point(mesh, space, ip)
290 ! compute the difference between the global coordinate and the local coordinate
291 x_global(:,ip) = mesh_x_global(mesh, ip_inner_global) - mesh%x(ip, 1:space%dim)
292 end do
293
294
295 if (.not. accel_is_enabled()) then
296 !$omp parallel private(ik, ip, kpoint, tmp_sum)
297 do ik = kpt%start, kpt%end
298 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
299 !We add the vector potential
300 kpoint(1:space%dim) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
301
302 !$omp do schedule(static)
303 do ip = 1, mesh%np_part
304 tmp_sum = sum(x(1:space%dim, ip)*kpoint(1:space%dim))
305 phase%phase(ip, ik) = cmplx(cos(tmp_sum), -sin(tmp_sum), real64)
306 end do
307 !$omp end do
308
309 !$omp do schedule(static)
310 do ip = sp + 1, mesh%np_part
311 tmp_sum = sum(x_global(1:space%dim, ip)*kpoint(1:space%dim))
312 phase%phase_corr(ip, ik) = cmplx(cos(tmp_sum), sin(tmp_sum), real64)
313 end do
314 !$omp end do nowait
315 end do
316 !$omp end parallel
317
318 else !accel_is enabled
319
320 call accel_create_buffer(buff_vec_pot, accel_mem_read_only, type_float, space%dim*kpt%nlocal)
321 safe_allocate(kpt_vec_pot(1:space%dim,kpt%start:kpt%end))
322 do ik = kpt%start, kpt%end
323 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
324 kpt_vec_pot(1:space%dim, ik) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
325 end do
326 call accel_write_buffer(buff_vec_pot, space%dim, kpt%nlocal, kpt_vec_pot, async=.true.)
327
328 ! Note: this should be globally stored
329 call accel_create_buffer(buff_x, accel_mem_read_only, type_float, space%dim*(mesh%np_part))
330 call accel_write_buffer(buff_x, space%dim, mesh%np_part, x, async=.true.)
331
332 call accel_create_buffer(buff_x_global, accel_mem_read_only, type_float, space%dim*(mesh%np_part-sp))
333 call accel_write_buffer(buff_x_global, space%dim, mesh%np_part-sp, x_global(1:space%dim,(sp + 1):mesh%np_part), async=.true.)
334
335 call accel_kernel_start_call(kernel, 'phase.cl', 'update_phases')
336
337 call accel_set_kernel_arg(kernel, 0, space%dim)
338 call accel_set_kernel_arg(kernel, 1, mesh%np)
339 call accel_set_kernel_arg(kernel, 2, mesh%np_part)
340 call accel_set_kernel_arg(kernel, 3, kpt%start)
341 call accel_set_kernel_arg(kernel, 4, kpt%end)
342 call accel_set_kernel_arg(kernel, 5, sp)
343 call accel_set_kernel_arg(kernel, 6, buff_vec_pot)
344 call accel_set_kernel_arg(kernel, 7, buff_x)
345 call accel_set_kernel_arg(kernel, 8, buff_x_global)
346 call accel_set_kernel_arg(kernel, 9, phase%buff_phase)
347 call accel_set_kernel_arg(kernel, 10, phase%buff_phase_corr)
348
349 wgsize = accel_kernel_workgroup_size(kernel)
350
351 call accel_kernel_run(kernel, (/pad(mesh%np_part, wgsize), kpt%nlocal/), (/wgsize, 1/))
352
353 call accel_read_buffer(phase%buff_phase, mesh%np_part, kpt%nlocal, phase%phase, async=.true.)
354 call accel_read_buffer(phase%buff_phase_corr, mesh%np_part - mesh%np, kpt%nlocal, phase%phase_corr)
355
356 call accel_release_buffer(buff_vec_pot)
357 call accel_release_buffer(buff_x)
358 call accel_release_buffer(buff_x_global)
359 safe_deallocate_a(kpt_vec_pot)
360 end if
361
362 safe_deallocate_a(x_global)
363 safe_deallocate_a(x)
364
365 pop_sub_with_profile(phase_update_phases)
366 end subroutine phase_update_phases
367
368 ! ----------------------------------------------------------------------------------
370 subroutine phase_end(phase)
371 class(phase_t), intent(inout) :: phase
372
373 push_sub(phase_end)
374
375 if (phase%is_allocated() .and. accel_is_enabled()) then
376 call accel_release_buffer(phase%buff_phase)
377 call accel_release_buffer(phase%buff_phase_corr)
378 end if
379
380 if (allocated(phase%phase_spiral) .and. accel_is_enabled()) then
381 call accel_release_buffer(phase%buff_phase_spiral)
382 end if
383
384 safe_deallocate_a(phase%phase)
385 safe_deallocate_a(phase%phase_corr)
386 safe_deallocate_a(phase%phase_spiral)
387
388 pop_sub(phase_end)
389 end subroutine phase_end
390
391 ! ----------------------------------------------------------------------------------
393 !
394 subroutine phase_set_phase_corr(phase, mesh, psib, async)
395 class(phase_t), intent(in) :: phase
396 class(mesh_t), intent(in) :: mesh
397 type(wfs_elec_t), intent(inout) :: psib
398 logical, optional, intent(in) :: async
399
400
401 logical :: phase_correction
402
403 push_sub(phase_set_phase_corr)
404
405 ! check if we only want a phase correction for the boundary points
406 phase_correction = phase%is_allocated()
407
408 !We apply the phase only to np points, and the phase for the np+1 to np_part points
409 !will be treated as a phase correction in the Hamiltonian
410 if (phase_correction) then
411 call phase%apply_to(mesh, mesh%np, .false., psib, async=async)
412 end if
413
414 pop_sub(phase_set_phase_corr)
415 end subroutine phase_set_phase_corr
416
417 ! ----------------------------------------------------------------------------------
419 !
420 subroutine phase_unset_phase_corr(phase, mesh, psib, async)
421 class(phase_t), intent(in) :: phase
422 class(mesh_t), intent(in) :: mesh
423 type(wfs_elec_t), intent(inout) :: psib
424 logical, optional, intent(in) :: async
425
426 logical :: phase_correction
427
428 push_sub(phase_unset_phase_corr)
429
430 ! check if we only want a phase correction for the boundary points
431 phase_correction = phase%is_allocated()
432
433 !We apply the phase only to np points, and the phase for the np+1 to np_part points
434 !will be treated as a phase correction in the Hamiltonian
435 if (phase_correction) then
436 call phase%apply_to(mesh, mesh%np, .true., psib, async=async)
437 end if
438
440 end subroutine phase_unset_phase_corr
441
442 ! ---------------------------------------------------------------------------------------
444 !
445 subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
446 class(phase_t), intent(in) :: this
447 class(mesh_t), intent(in) :: mesh
448 integer, intent(in) :: np
449 logical, intent(in) :: conjugate
450 type(wfs_elec_t), target, intent(inout) :: psib
451 type(wfs_elec_t), optional, target, intent(in) :: src
452 logical, optional, intent(in) :: async
453
454 integer :: ip, ii, sp
455 type(wfs_elec_t), pointer :: src_
456 complex(real64) :: phase
457 integer(int64) :: wgsize, dim2, dim3
458 type(accel_kernel_t), save :: ker_phase
459
460 push_sub(phase_apply_batch)
461 call profiling_in("PHASE_APPLY_BATCH")
462
463 call profiling_count_operations(6*np*psib%nst_linear)
464
465 assert(np <= mesh%np_part)
466 assert(psib%type() == type_cmplx)
467 assert(psib%ik >= lbound(this%phase, dim=2))
468 assert(psib%ik <= ubound(this%phase, dim=2))
469
470 src_ => psib
471 if (present(src)) src_ => src
472
473 assert(src_%has_phase .eqv. conjugate)
474 assert(src_%ik == psib%ik)
475 assert(src_%type() == type_cmplx)
476
477 ! We want to skip the ghost points for setting the phase
478 sp = min(np, mesh%np)
479 if (np > mesh%np .and. mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
480
481 select case (psib%status())
482 case (batch_packed)
483
484 if (conjugate) then
485
486 !$omp parallel private(ii, phase)
487 !$omp do
488 do ip = 1, min(mesh%np, np)
489 phase = conjg(this%phase(ip, psib%ik))
490 !$omp simd
491 do ii = 1, psib%nst_linear
492 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
493 end do
494 end do
495 !$omp end do nowait
496
497 ! Boundary points, if requested
498 !$omp do
499 do ip = sp+1, np
500 phase = conjg(this%phase(ip, psib%ik))
501 !$omp simd
502 do ii = 1, psib%nst_linear
503 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
504 end do
505 end do
506 !$omp end parallel
507
508 else
509
510 !$omp parallel private(ii, phase)
511 !$omp do
512 do ip = 1, min(mesh%np, np)
513 phase = this%phase(ip, psib%ik)
514 !$omp simd
515 do ii = 1, psib%nst_linear
516 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
517 end do
518 end do
519 !$omp end do nowait
520
521 ! Boundary points, if requested
522 !$omp do
523 do ip = sp+1, np
524 phase = this%phase(ip, psib%ik)
525 !$omp simd
526 do ii = 1, psib%nst_linear
527 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
528 end do
529 end do
530 !$omp end parallel
531
532 end if
533
534 case (batch_not_packed)
535
536 if (conjugate) then
537
538 !$omp parallel private(ii, ip)
539 do ii = 1, psib%nst_linear
540 !$omp do simd
541 do ip = 1, min(mesh%np, np)
542 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
543 end do
544 !$omp end do simd nowait
545
546 ! Boundary points, if requested
547 !$omp do simd
548 do ip = sp+1, np
549 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
550 end do
551 !$omp end do simd nowait
552 end do
553 !$omp end parallel
554
555 else
556 !$omp parallel private(ii, ip)
557 do ii = 1, psib%nst_linear
558 !$omp do simd
559 do ip = 1, min(mesh%np, np)
560 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
561 end do
562 !$omp end do simd nowait
563
564 ! Boundary points, if requested
565 !$omp do simd
566 do ip = sp+1, np
567 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
568 end do
569 !$omp end do simd nowait
570
571 end do
572 !$omp end parallel
573
574 end if
575
577 call accel_kernel_start_call(ker_phase, 'phase.cl', 'phase_hamiltonian')
578
579 if (conjugate) then
580 call accel_set_kernel_arg(ker_phase, 0, 1_4)
581 else
582 call accel_set_kernel_arg(ker_phase, 0, 0_4)
583 end if
584
585 call accel_set_kernel_arg(ker_phase, 1, (psib%ik - this%buff_phase_qn_start)*mesh%np_part)
586 call accel_set_kernel_arg(ker_phase, 2, np)
587 call accel_set_kernel_arg(ker_phase, 3, this%buff_phase)
588 call accel_set_kernel_arg(ker_phase, 4, src_%ff_device)
589 call accel_set_kernel_arg(ker_phase, 5, log2(int(src_%pack_size(1), int32)))
590 call accel_set_kernel_arg(ker_phase, 6, psib%ff_device)
591 call accel_set_kernel_arg(ker_phase, 7, log2(int(psib%pack_size(1), int32)))
592
593 wgsize = accel_kernel_workgroup_size(ker_phase)/psib%pack_size(1)
594
595 dim3 = np/(accel_max_size_per_dim(2)*wgsize) + 1
596 dim2 = min(accel_max_size_per_dim(2)*wgsize, pad(np, wgsize))
597
598 call accel_kernel_run(ker_phase, (/psib%pack_size(1), dim2, dim3/), (/psib%pack_size(1), wgsize, 1_int64/))
599
600 if(.not. optional_default(async, .false.)) call accel_finish()
601 end select
602
603 psib%has_phase = .not. conjugate
604
605 call profiling_out("PHASE_APPLY_BATCH")
606 pop_sub(phase_apply_batch)
607 end subroutine phase_apply_batch
608
614 !
615 subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
616 class(phase_t), intent(in) :: this
617 complex(real64), intent(inout) :: psi(:, :)
618 integer, intent(in) :: np
619 integer, intent(in) :: dim
620 integer, intent(in) :: ik
621 logical, intent(in) :: conjugate
622
623 integer :: idim, ip
624
625 push_sub(phase_apply_mf)
626
627 assert(ik >= lbound(this%phase, dim=2))
628 assert(ik <= ubound(this%phase, dim=2))
629
630 call profiling_in("PHASE_APPLY_SINGLE")
631
632 if (conjugate) then
633 ! Apply the phase that contains both the k-point and vector-potential terms.
634 do idim = 1, dim
635 !$omp parallel do
636 do ip = 1, np
637 psi(ip, idim) = conjg(this%phase(ip, ik))*psi(ip, idim)
638 end do
639 !$omp end parallel do
640 end do
641 else
642 ! Apply the conjugate of (i.e. remove) the phase that contains both the k-point and vector-potential terms.
643 do idim = 1, dim
644 !$omp parallel do
645 do ip = 1, np
646 psi(ip, idim) = this%phase(ip, ik)*psi(ip, idim)
647 end do
648 !$omp end parallel do
649 end do
650 end if
651
652 call profiling_out("PHASE_APPLY_SINGLE")
653
654 pop_sub(phase_apply_mf)
655 end subroutine phase_apply_mf
656
657
658 ! ---------------------------------------------------------------------------------------
660 !
661 subroutine phase_phase_spiral(this, der, psib)
662 class(phase_t), intent(in) :: this
663 type(derivatives_t), intent(in) :: der
664 class(wfs_elec_t), intent(inout) :: psib
665
666 integer :: ip, ii, sp
667 integer, allocatable :: spin_label(:)
668 type(accel_mem_t) :: spin_label_buffer
669 integer(int64) :: wgsize
670
671 push_sub(phase_phase_spiral)
672 call profiling_in("PBC_PHASE_SPIRAL")
673
674 call profiling_count_operations(6*(der%mesh%np_part-der%mesh%np)*psib%nst_linear)
675
676 assert(der%boundaries%spiral)
677 assert(psib%type() == type_cmplx)
678
679 sp = der%mesh%np
680 if (der%mesh%parallel_in_domains) sp = der%mesh%np + der%mesh%pv%np_ghost
681
682
683 select case (psib%status())
684 case (batch_packed)
685
686 !$omp parallel do private(ip, ii)
687 do ip = sp + 1, der%mesh%np_part
688 do ii = 1, psib%nst_linear, 2
689 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
690 psib%zff_pack(ii+1, ip) = psib%zff_pack(ii+1, ip)*this%phase_spiral(ip-sp, 1)
691 else
692 psib%zff_pack(ii, ip) = psib%zff_pack(ii, ip)*this%phase_spiral(ip-sp, 2)
693 end if
694 end do
695 end do
696 !$omp end parallel do
697
698 case (batch_not_packed)
699
700 !$omp parallel private(ii, ip)
701 do ii = 1, psib%nst_linear, 2
702 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
703 !$omp do
704 do ip = sp + 1, der%mesh%np_part
705 psib%zff_linear(ip, ii+1) = psib%zff_linear(ip, ii+1)*this%phase_spiral(ip-sp, 1)
706 end do
707 !$omp end do nowait
708 else
709 !$omp do
710 do ip = sp + 1, der%mesh%np_part
711 psib%zff_linear(ip, ii) = psib%zff_linear(ip, ii)*this%phase_spiral(ip-sp, 2)
712 end do
713 !$omp end do nowait
714 end if
715 end do
716 !$omp end parallel
717
719
720 assert(accel_is_enabled())
721
722 ! generate array of offsets for access of psib and phase_spiral:
723 ! TODO: Move this to the routine where spin(:,:,:) is generated
724 ! and also move the buffer to the GPU at this point to
725 ! avoid unecessary latency here!
726
727 safe_allocate(spin_label(1:psib%nst_linear))
728 spin_label = 0
729 do ii = 1, psib%nst_linear, 2
730 if (this%spin(3, psib%linear_to_ist(ii), psib%ik) > 0) spin_label(ii)=1
731 end do
732
733 call accel_create_buffer(spin_label_buffer, accel_mem_read_only, type_integer, psib%nst_linear)
734 call accel_write_buffer(spin_label_buffer, psib%nst_linear, spin_label)
735
736 call accel_kernel_start_call(kernel_phase_spiral, 'phase_spiral.cl', 'phase_spiral_apply')
737
740 call accel_set_kernel_arg(kernel_phase_spiral, 2, der%mesh%np_part)
741 call accel_set_kernel_arg(kernel_phase_spiral, 3, psib%ff_device)
742 call accel_set_kernel_arg(kernel_phase_spiral, 4, log2(psib%pack_size(1)))
743 call accel_set_kernel_arg(kernel_phase_spiral, 5, this%buff_phase_spiral)
744 call accel_set_kernel_arg(kernel_phase_spiral, 6, spin_label_buffer)
745
746 wgsize = accel_kernel_workgroup_size(kernel_phase_spiral)/psib%pack_size(1)
747
749 (/psib%pack_size(1)/2, pad(der%mesh%np_part - sp, 2*wgsize)/), &
750 (/psib%pack_size(1)/2, 2*wgsize/))
751
752 call accel_finish()
753
754 call accel_release_buffer(spin_label_buffer)
755
756 safe_deallocate_a(spin_label)
757
758 end select
759
760 call profiling_out("PBC_PHASE_SPIRAL")
761 pop_sub(phase_phase_spiral)
762 end subroutine phase_phase_spiral
763
764
765 ! ---------------------------------------------------------------------------------------
766 logical pure function phase_is_allocated(this)
767 class(phase_t), intent(in) :: this
768
769 phase_is_allocated = allocated(this%phase)
770 end function phase_is_allocated
771
772end module phase_oct_m
773
774!! Local Variables:
775!! mode: f90
776!! coding: utf-8
777!! End:
double exp(double __x) __attribute__((__nothrow__
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1381
subroutine, public accel_finish()
Definition: accel.F90:990
integer, parameter, public accel_mem_read_write
Definition: accel.F90:196
integer pure function, public accel_max_size_per_dim(dim)
Definition: accel.F90:1416
type(accel_kernel_t), target, save, public kernel_phase_spiral
Definition: accel.F90:296
subroutine, public accel_release_buffer(this, async)
Definition: accel.F90:920
pure logical function, public accel_is_enabled()
Definition: accel.F90:419
integer function, public accel_kernel_workgroup_size(kernel)
Definition: accel.F90:1110
integer, parameter, public accel_mem_read_only
Definition: accel.F90:196
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
real(real64), parameter, public m_zero
Definition: global.F90:191
complex(real64), parameter, public m_zi
Definition: global.F90:205
real(real64), parameter, public m_one
Definition: global.F90:192
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer(int64) function, public mesh_periodic_point(mesh, space, ip)
This function returns the point inside the grid corresponding to a boundary point when PBCs are used....
Definition: mesh.F90:723
real(real64) function, dimension(1:mesh%box%dim), public mesh_x_global(mesh, ipg)
Given a global point index, this function returns the coordinates of the point.
Definition: mesh.F90:816
subroutine phase_phase_spiral(this, der, psib)
apply spiral phase
Definition: phase.F90:757
subroutine phase_unset_phase_corr(phase, mesh, psib, async)
unset the phase correction (if necessary)
Definition: phase.F90:516
subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
Initiliaze the phase arrays and copy to GPU the data.
Definition: phase.F90:220
subroutine phase_end(phase)
Releases the memory of the phase object.
Definition: phase.F90:466
subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
Update the phases.
Definition: phase.F90:331
logical pure function phase_is_allocated(this)
Definition: phase.F90:862
subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
apply (remove) the phase to the wave functions before (after) applying the Hamiltonian
Definition: phase.F90:541
subroutine phase_set_phase_corr(phase, mesh, psib, async)
set the phase correction (if necessary)
Definition: phase.F90:490
subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
apply (or remove) the phase to a wave function psi
Definition: phase.F90:711
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
This module handles spin dimensions of the states and the k-point distribution.
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_cmplx
Definition: types.F90:136
type(type_t), public type_integer
Definition: types.F90:137
class representing derivatives
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
Describes mesh distribution to nodes.
Definition: mesh.F90:187
A container for the phase.
Definition: phase.F90:179
class for organizing spins and k-points
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)