Octopus
phase.F90
Go to the documentation of this file.
1!! Copyright (C) 2009 X. Andrade
2!! Copyright (C) 2024 N. Tancogne-Dejean
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module phase_oct_m
23 use accel_oct_m
24 use batch_oct_m
27 use debug_oct_m
31 use global_oct_m
32 use grid_oct_m
35 use math_oct_m
36 use mesh_oct_m
38 use mpi_oct_m
40 use space_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50
51 public :: &
52 phase_t, &
54
85 type phase_t
86 private
87 complex(real64), allocatable :: phase(:, :)
90 complex(real64), public, allocatable :: phase_corr(:,:)
93 complex(real64), allocatable :: phase_spiral(:,:)
96 type(accel_mem_t) :: buff_phase
97 type(accel_mem_t) :: buff_phase_spiral
98 type(accel_mem_t), public :: buff_phase_corr
99 integer :: buff_phase_qn_start
100 real(real64), public, pointer :: spin(:,:,:) => null()
101 contains
102 procedure :: init => phase_init_phases
103
104 procedure :: update => phase_update_phases
105
106 procedure :: end => phase_end
107
108 procedure :: set_phase_corr => phase_set_phase_corr
109
110 procedure :: unset_phase_corr => phase_unset_phase_corr
111
112 procedure :: apply_to => phase_apply_batch
113
114 procedure :: apply_to_single => phase_apply_mf
115
116 procedure :: apply_phase_spiral => phase_phase_spiral
118 procedure :: is_allocated => phase_is_allocated
119
120 procedure :: copy_and_set_phase => phase_copy_and_set_phase
121 end type phase_t
122
123contains
124
125 ! ---------------------------------------------------------
127 subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
128 class(phase_t), intent(inout) :: phase
129 class(mesh_t), intent(in) :: gr
130 type(distributed_t), intent(in) :: kpt
131 type(kpoints_t), intent(in) :: kpoints
132 type(states_elec_dim_t), intent(in) :: d
133 type(space_t), intent(in) :: space
134
135 integer :: ip, ik, sp
136 integer(int64) :: ip_inner_global
137 real(real64) :: kpoint(space%dim), x_global(space%dim)
138
139 push_sub(phase_init_phases)
140
141 ! no e^ik phase needed for Gamma-point-only periodic calculations
142 ! unless for velocity-gauge for lasers
143 if (accel_is_enabled()) then
144 phase%buff_phase_qn_start = kpt%start
145 end if
146 if(kpoints%gamma_only()) then
147 pop_sub(phase_init_phases)
148 return
149 end if
150
151 safe_allocate(phase%phase(1:gr%np_part, kpt%start:kpt%end))
152 safe_allocate(phase%phase_corr(gr%np+1:gr%np_part, kpt%start:kpt%end))
153 !$omp parallel private(ip)
154 do ik = kpt%start, kpt%end
155 !$omp do
156 do ip = gr%np + 1, gr%np_part
157 phase%phase_corr(ip, ik) = m_one
158 end do
159 !$omp end do nowait
160 end do
161 !$omp end parallel
162
163 ! Only when gr is a grid_t type, we can access gr%der
164 select type(gr)
165 class is(grid_t)
166 if (gr%der%boundaries%spiralBC) then
167 sp = gr%np
168 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
169
170 ! We decided to allocate the array from 1:np_part-sp as this is less error prone when passing
171 ! the array to other routines, or in particular creating a C-style pointer from phase_spiral(1,1).
172 ! We will also update phase_corr and possible other similar arrays.
173
174 safe_allocate(phase%phase_spiral(1:gr%np_part-sp, 1:2))
175
176 ! loop over boundary points
177 do ip = sp + 1, gr%np_part
178 ! get corresponding inner point
179 ip_inner_global = mesh_periodic_point(gr, space, ip)
180 x_global = mesh_x_global(gr, ip_inner_global)
181 phase%phase_spiral(ip-sp, 1) = &
182 exp(m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
183 phase%phase_spiral(ip-sp, 2) = &
184 exp(-m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
185 end do
186
187 if (accel_is_enabled()) then
188 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, (gr%np_part-sp)*2)
189 call accel_write_buffer(phase%buff_phase_spiral, gr%np_part-sp, 2, phase%phase_spiral)
190 end if
191 end if
192 class default
193 ! Do nothing
194 end select
196
197 kpoint(1:space%dim) = m_zero
198
199 sp = gr%np
200 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
202 !$omp parallel private(ip, ip_inner_global, x_global, kpoint)
203 do ik = kpt%start, kpt%end
204 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
205 !$omp do
206 do ip = 1, gr%np_part
207 phase%phase(ip, ik) = exp(-m_zi * sum(gr%x(1:space%dim, ip) * kpoint(1:space%dim)))
208 end do
209 !$omp end do
210
211 ! loop over boundary points
212 !$omp do
213 do ip = sp + 1, gr%np_part
214 ! get corresponding inner point
215 ip_inner_global = mesh_periodic_point(gr, space, ip)
216
217 ! compute phase correction from global coordinate (opposite sign!)
218 x_global = mesh_x_global(gr, ip_inner_global)
219 phase%phase_corr(ip, ik) = phase%phase(ip, ik)* &
220 exp(m_zi * sum(x_global(1:space%dim) * kpoint(1:space%dim)))
221 end do
222 !$omp end do nowait
223 end do
224 !$omp end parallel
225
226 if (accel_is_enabled()) then
227 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, gr%np_part*kpt%nlocal)
228 call accel_write_buffer(phase%buff_phase, gr%np_part, kpt%nlocal, phase%phase)
229 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, (gr%np_part - gr%np)*kpt%nlocal)
230 call accel_write_buffer(phase%buff_phase_corr, gr%np_part - gr%np, kpt%nlocal, phase%phase_corr)
231 end if
232
233 pop_sub(phase_init_phases)
234 end subroutine phase_init_phases
235
236 ! ----------------------------------------------------------------------------------
238 subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
239 class(phase_t), intent(inout) :: phase
240 class(mesh_t), intent(in) :: mesh
241 type(distributed_t), intent(in) :: kpt
242 type(kpoints_t), intent(in) :: kpoints
243 type(states_elec_dim_t), intent(in) :: d
244 type(space_t), intent(in) :: space
245 real(real64), allocatable, intent(in) :: uniform_vector_potential(:)
246
247 integer :: ik, ip, sp
248 integer(int64), dimension(2) :: np, gsize, bsize
249 integer(int64) :: ip_inner_global
250 real(real64) :: kpoint(space%dim)
251 real(real64), allocatable :: x_global(:,:), kpt_vec_pot(:,:)
252 type(accel_mem_t) :: buff_vec_pot, buff_x_global, buff_x
253 type(accel_kernel_t), save :: kernel
254 real(real64) :: tmp_sum
255
256 if (.not. allocated(uniform_vector_potential)) return
257
258 push_sub_with_profile(phase_update_phases)
259
260
261 if (.not. allocated(phase%phase)) then
262 safe_allocate(phase%phase(1:mesh%np_part, kpt%start:kpt%end))
263 if (accel_is_enabled()) then
264 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, &
265 mesh%np_part*kpt%nlocal)
266 end if
267 end if
268
269 if (.not. allocated(phase%phase_corr)) then
270 safe_allocate(phase%phase_corr(mesh%np+1:mesh%np_part, kpt%start:kpt%end))
271 if (accel_is_enabled()) then
272 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
273 (mesh%np_part - mesh%np)*kpt%nlocal)
274 end if
275 end if
276
277 ! TODO: We should not recompute this every time-step. We should store it.
278
279 ! loop over boundary points
280 sp = mesh%np
281 ! skip ghost points
282 if (mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
283
284 safe_allocate(x_global(1:space%dim,(sp + 1):mesh%np_part))
285
286 !$omp parallel do schedule(static) private(ip_inner_global)
287 do ip = sp + 1, mesh%np_part
288 ! get corresponding inner point
289 ip_inner_global = mesh_periodic_point(mesh, space, ip)
290 ! compute the difference between the global coordinate and the local coordinate
291 x_global(:,ip) = mesh_x_global(mesh, ip_inner_global) - mesh%x(1:space%dim, ip)
292 end do
293
294
295 if (.not. accel_is_enabled()) then
296 !$omp parallel private(ik, ip, kpoint, tmp_sum)
297 do ik = kpt%start, kpt%end
298 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
299 !We add the vector potential
300 kpoint(1:space%dim) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
301
302 !$omp do schedule(static)
303 do ip = 1, mesh%np_part
304 tmp_sum = sum(mesh%x(1:space%dim, ip)*kpoint(1:space%dim))
305 phase%phase(ip, ik) = cmplx(cos(tmp_sum), -sin(tmp_sum), real64)
306 end do
307 !$omp end do
308
309 !$omp do schedule(static)
310 do ip = sp + 1, mesh%np_part
311 tmp_sum = sum(x_global(1:space%dim, ip)*kpoint(1:space%dim))
312 phase%phase_corr(ip, ik) = cmplx(cos(tmp_sum), sin(tmp_sum), real64)
313 end do
314 !$omp end do nowait
315 end do
316 !$omp end parallel
317
318 else !accel_is enabled
319
320 call accel_create_buffer(buff_vec_pot, accel_mem_read_only, type_float, space%dim*kpt%nlocal)
321 safe_allocate(kpt_vec_pot(1:space%dim,kpt%start:kpt%end))
322 do ik = kpt%start, kpt%end
323 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
324 kpt_vec_pot(1:space%dim, ik) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
325 end do
326 call accel_write_buffer(buff_vec_pot, space%dim, kpt%nlocal, kpt_vec_pot, async=.true.)
327
328 ! Note: this should be globally stored
329 call accel_create_buffer(buff_x, accel_mem_read_only, type_float, space%dim*(mesh%np_part))
330 call accel_write_buffer(buff_x, space%dim, mesh%np_part, mesh%x, async=.true.)
331
332 call accel_create_buffer(buff_x_global, accel_mem_read_only, type_float, space%dim*(mesh%np_part-sp))
333 call accel_write_buffer(buff_x_global, space%dim, mesh%np_part-sp, x_global(1:space%dim,(sp + 1):mesh%np_part), async=.true.)
334
335 call accel_kernel_start_call(kernel, 'phase.cu', 'update_phases')
336
337 call accel_set_kernel_arg(kernel, 0, space%dim)
338 call accel_set_kernel_arg(kernel, 1, mesh%np)
339 call accel_set_kernel_arg(kernel, 2, mesh%np_part)
340 call accel_set_kernel_arg(kernel, 3, kpt%start)
341 call accel_set_kernel_arg(kernel, 4, kpt%end)
342 call accel_set_kernel_arg(kernel, 5, sp)
343 call accel_set_kernel_arg(kernel, 6, buff_vec_pot)
344 call accel_set_kernel_arg(kernel, 7, buff_x)
345 call accel_set_kernel_arg(kernel, 8, buff_x_global)
346 call accel_set_kernel_arg(kernel, 9, phase%buff_phase)
347 call accel_set_kernel_arg(kernel, 10, phase%buff_phase_corr)
348
349 ! Compute the grid size
350 np = (/mesh%np_part, kpt%nlocal/)
351 bsize = (/accel_kernel_block_size(kernel), 1/)
352 call accel_grid_size(np, bsize, gsize)
353
354 call accel_kernel_run(kernel, gsize, bsize)
355
356 call accel_read_buffer(phase%buff_phase, mesh%np_part, kpt%nlocal, phase%phase, async=.true.)
357 call accel_read_buffer(phase%buff_phase_corr, mesh%np_part - mesh%np, kpt%nlocal, phase%phase_corr)
358
359 call accel_free_buffer(buff_vec_pot)
360 call accel_free_buffer(buff_x)
361 call accel_free_buffer(buff_x_global)
362 safe_deallocate_a(kpt_vec_pot)
363 end if
364
365 safe_deallocate_a(x_global)
366
367 pop_sub_with_profile(phase_update_phases)
368 end subroutine phase_update_phases
369
370 ! ----------------------------------------------------------------------------------
372 subroutine phase_end(phase)
373 class(phase_t), intent(inout) :: phase
374
375 push_sub(phase_end)
376
377 if (phase%is_allocated() .and. accel_is_enabled()) then
378 call accel_free_buffer(phase%buff_phase)
379 call accel_free_buffer(phase%buff_phase_corr)
380 end if
381
382 if (allocated(phase%phase_spiral) .and. accel_is_enabled()) then
383 call accel_free_buffer(phase%buff_phase_spiral)
384 end if
385
386 safe_deallocate_a(phase%phase)
387 safe_deallocate_a(phase%phase_corr)
388 safe_deallocate_a(phase%phase_spiral)
389
390 pop_sub(phase_end)
391 end subroutine phase_end
392
393 ! ----------------------------------------------------------------------------------
395 subroutine phase_accel_rebuild(phase, mesh, kpt)
396 class(phase_t), intent(inout) :: phase
397 class(mesh_t), intent(in) :: mesh
398 type(distributed_t), intent(in) :: kpt
399
400 integer :: nlocal
401
402 push_sub(phase_accel_rebuild)
403
404 if (.not. accel_is_enabled()) then
405 pop_sub(phase_accel_rebuild)
406 return
407 end if
408
409 phase%buff_phase_qn_start = kpt%start
410
411 call accel_detach_buffer(phase%buff_phase)
412 call accel_detach_buffer(phase%buff_phase_corr)
413 call accel_detach_buffer(phase%buff_phase_spiral)
414
415 if (allocated(phase%phase)) then
416 assert(size(phase%phase, 1) == mesh%np_part)
417 nlocal = ubound(phase%phase, dim=2) - lbound(phase%phase, dim=2) + 1
418 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, size(phase%phase, 1)*nlocal)
419 call accel_write_buffer(phase%buff_phase, size(phase%phase, 1), nlocal, phase%phase)
420 end if
421
422 if (allocated(phase%phase_corr)) then
423 assert(size(phase%phase_corr, 1) == mesh%np_part - mesh%np)
424 nlocal = ubound(phase%phase_corr, dim=2) - lbound(phase%phase_corr, dim=2) + 1
425 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
426 size(phase%phase_corr, 1)*nlocal)
427 call accel_write_buffer(phase%buff_phase_corr, size(phase%phase_corr, 1), nlocal, phase%phase_corr)
428 end if
429
430 if (allocated(phase%phase_spiral)) then
431 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, &
432 size(phase%phase_spiral, 1)*size(phase%phase_spiral, 2))
433 call accel_write_buffer(phase%buff_phase_spiral, size(phase%phase_spiral, 1), &
434 size(phase%phase_spiral, 2), phase%phase_spiral)
435 end if
436
437 pop_sub(phase_accel_rebuild)
438 end subroutine phase_accel_rebuild
439
440 ! ----------------------------------------------------------------------------------
442 !
443 subroutine phase_set_phase_corr(phase, mesh, psib, async)
444 class(phase_t), intent(in) :: phase
445 class(mesh_t), intent(in) :: mesh
446 type(wfs_elec_t), intent(inout) :: psib
447 logical, optional, intent(in) :: async
448
449
450 logical :: phase_correction
451
452 push_sub(phase_set_phase_corr)
453
454 ! check if we only want a phase correction for the boundary points
455 phase_correction = phase%is_allocated()
456
457 !We apply the phase only to np points, and the phase for the np+1 to np_part points
458 !will be treated as a phase correction in the Hamiltonian
459 if (phase_correction) then
460 call phase%apply_to(mesh, mesh%np, .false., psib, async=async)
461 end if
462
463 pop_sub(phase_set_phase_corr)
464 end subroutine phase_set_phase_corr
465
466 ! ----------------------------------------------------------------------------------
468 !
469 subroutine phase_unset_phase_corr(phase, mesh, psib, async)
470 class(phase_t), intent(in) :: phase
471 class(mesh_t), intent(in) :: mesh
472 type(wfs_elec_t), intent(inout) :: psib
473 logical, optional, intent(in) :: async
474
475 logical :: phase_correction
476
477 push_sub(phase_unset_phase_corr)
478
479 ! check if we only want a phase correction for the boundary points
480 phase_correction = phase%is_allocated()
481
482 !We apply the phase only to np points, and the phase for the np+1 to np_part points
483 !will be treated as a phase correction in the Hamiltonian
484 if (phase_correction) then
485 call phase%apply_to(mesh, mesh%np, .true., psib, async=async)
486 end if
487
489 end subroutine phase_unset_phase_corr
491 ! ---------------------------------------------------------------------------------------
493 !
494 subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
495 class(phase_t), intent(in) :: this
496 class(mesh_t), intent(in) :: mesh
497 integer, intent(in) :: np
498 logical, intent(in) :: conjugate
499 type(wfs_elec_t), target, intent(inout) :: psib
500 type(wfs_elec_t), optional, target, intent(in) :: src
501 logical, optional, intent(in) :: async
502
503 integer :: ip, ii, sp
504 type(wfs_elec_t), pointer :: src_
505 complex(real64) :: phase
506 integer(int64), dimension(3) :: gsizes, bsizes
507 type(accel_kernel_t), save :: ker_phase
508
509 push_sub(phase_apply_batch)
510 call profiling_in("PHASE_APPLY_BATCH")
511
512 call profiling_count_operations(6*np*psib%nst_linear)
513
514 assert(np <= mesh%np_part)
515 assert(psib%type() == type_cmplx)
516 assert(psib%ik >= lbound(this%phase, dim=2))
517 assert(psib%ik <= ubound(this%phase, dim=2))
518
519 src_ => psib
520 if (present(src)) src_ => src
521
522 assert(src_%has_phase .eqv. conjugate)
523 assert(src_%ik == psib%ik)
524 assert(src_%type() == type_cmplx)
525
526 ! We want to skip the ghost points for setting the phase
527 sp = min(np, mesh%np)
528 if (np > mesh%np .and. mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
529
530 select case (psib%status())
531 case (batch_packed)
532
533 if (conjugate) then
534
535 !$omp parallel private(ii, phase)
536 !$omp do
537 do ip = 1, min(mesh%np, np)
538 phase = conjg(this%phase(ip, psib%ik))
539 !$omp simd
540 do ii = 1, psib%nst_linear
541 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
542 end do
543 end do
544 !$omp end do nowait
545
546 ! Boundary points, if requested
547 !$omp do
548 do ip = sp+1, np
549 phase = conjg(this%phase(ip, psib%ik))
550 !$omp simd
551 do ii = 1, psib%nst_linear
552 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
553 end do
554 end do
555 !$omp end parallel
556
557 else
558
559 !$omp parallel private(ii, phase)
560 !$omp do
561 do ip = 1, min(mesh%np, np)
562 phase = this%phase(ip, psib%ik)
563 !$omp simd
564 do ii = 1, psib%nst_linear
565 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
566 end do
567 end do
568 !$omp end do nowait
569
570 ! Boundary points, if requested
571 !$omp do
572 do ip = sp+1, np
573 phase = this%phase(ip, psib%ik)
574 !$omp simd
575 do ii = 1, psib%nst_linear
576 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
577 end do
578 end do
579 !$omp end parallel
580
581 end if
582
583 case (batch_not_packed)
584
585 if (conjugate) then
586
587 !$omp parallel private(ii, ip)
588 do ii = 1, psib%nst_linear
589 !$omp do simd
590 do ip = 1, min(mesh%np, np)
591 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
592 end do
593 !$omp end do simd nowait
594
595 ! Boundary points, if requested
596 !$omp do simd
597 do ip = sp+1, np
598 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
599 end do
600 !$omp end do simd nowait
601 end do
602 !$omp end parallel
603
604 else
605 !$omp parallel private(ii, ip)
606 do ii = 1, psib%nst_linear
607 !$omp do simd
608 do ip = 1, min(mesh%np, np)
609 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
610 end do
611 !$omp end do simd nowait
612
613 ! Boundary points, if requested
614 !$omp do simd
615 do ip = sp+1, np
616 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
617 end do
618 !$omp end do simd nowait
619
620 end do
621 !$omp end parallel
622
623 end if
624
626 call accel_kernel_start_call(ker_phase, 'phase.cu', 'phase_hamiltonian')
627
628 if (conjugate) then
629 call accel_set_kernel_arg(ker_phase, 0, 1_4)
630 else
631 call accel_set_kernel_arg(ker_phase, 0, 0_4)
632 end if
633
634 call accel_set_kernel_arg(ker_phase, 1, (psib%ik - this%buff_phase_qn_start)*mesh%np_part)
635 call accel_set_kernel_arg(ker_phase, 2, np)
636 call accel_set_kernel_arg(ker_phase, 3, this%buff_phase)
637 call accel_set_kernel_arg(ker_phase, 4, src_%ff_device)
638 call accel_set_kernel_arg(ker_phase, 5, log2(int(src_%pack_size(1), int32)))
639 call accel_set_kernel_arg(ker_phase, 6, psib%ff_device)
640 call accel_set_kernel_arg(ker_phase, 7, log2(int(psib%pack_size(1), int32)))
641
642 ! Compute the grid (extend to another dimensions if the size of the problem is too big)
643 call accel_grid_size_extend_dim(int(np, int64), psib%pack_size(1), gsizes, bsizes, ker_phase)
644
645 call accel_kernel_run(ker_phase, gsizes, bsizes)
646
647 if(.not. optional_default(async, .false.)) call accel_finish()
648 end select
649
650 psib%has_phase = .not. conjugate
651
652 call profiling_out("PHASE_APPLY_BATCH")
653 pop_sub(phase_apply_batch)
654 end subroutine phase_apply_batch
655
661 !
662 subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
663 class(phase_t), intent(in) :: this
664 complex(real64), intent(inout) :: psi(:, :)
665 integer, intent(in) :: np
666 integer, intent(in) :: dim
667 integer, intent(in) :: ik
668 logical, intent(in) :: conjugate
669
670 integer :: idim, ip
671
672 push_sub(phase_apply_mf)
673
674 assert(ik >= lbound(this%phase, dim=2))
675 assert(ik <= ubound(this%phase, dim=2))
676
677 call profiling_in("PHASE_APPLY_SINGLE")
678
679 if (conjugate) then
680 ! Apply the phase that contains both the k-point and vector-potential terms.
681 do idim = 1, dim
682 !$omp parallel do
683 do ip = 1, np
684 psi(ip, idim) = conjg(this%phase(ip, ik))*psi(ip, idim)
685 end do
686 !$omp end parallel do
687 end do
688 else
689 ! Apply the conjugate of (i.e. remove) the phase that contains both the k-point and vector-potential terms.
690 do idim = 1, dim
691 !$omp parallel do
692 do ip = 1, np
693 psi(ip, idim) = this%phase(ip, ik)*psi(ip, idim)
694 end do
695 !$omp end parallel do
696 end do
697 end if
698
699 call profiling_out("PHASE_APPLY_SINGLE")
700
701 pop_sub(phase_apply_mf)
702 end subroutine phase_apply_mf
703
704
705 ! ---------------------------------------------------------------------------------------
707 !
708 subroutine phase_phase_spiral(this, der, psib)
709 class(phase_t), intent(in) :: this
710 type(derivatives_t), intent(in) :: der
711 class(wfs_elec_t), intent(inout) :: psib
712
713 integer :: ip, ii, sp
714 integer, allocatable :: spin_label(:)
715 type(accel_mem_t) :: spin_label_buffer
716 integer(int64) :: bsize
717 integer(int64), dimension(2) :: np, gsizes, bsizes
718
719 push_sub(phase_phase_spiral)
720 call profiling_in("PBC_PHASE_SPIRAL")
721
722 call profiling_count_operations(6*(der%mesh%np_part-der%mesh%np)*psib%nst_linear)
723
724 assert(der%boundaries%spiral)
725 assert(psib%type() == type_cmplx)
726
727 sp = der%mesh%np
728 if (der%mesh%parallel_in_domains) sp = der%mesh%np + der%mesh%pv%np_ghost
729
730
731 select case (psib%status())
732 case (batch_packed)
733
734 !$omp parallel do private(ip, ii)
735 do ip = sp + 1, der%mesh%np_part
736 do ii = 1, psib%nst_linear, 2
737 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
738 psib%zff_pack(ii+1, ip) = psib%zff_pack(ii+1, ip)*this%phase_spiral(ip-sp, 1)
739 else
740 psib%zff_pack(ii, ip) = psib%zff_pack(ii, ip)*this%phase_spiral(ip-sp, 2)
741 end if
742 end do
743 end do
744 !$omp end parallel do
745
746 case (batch_not_packed)
747
748 !$omp parallel private(ii, ip)
749 do ii = 1, psib%nst_linear, 2
750 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
751 !$omp do
752 do ip = sp + 1, der%mesh%np_part
753 psib%zff_linear(ip, ii+1) = psib%zff_linear(ip, ii+1)*this%phase_spiral(ip-sp, 1)
754 end do
755 !$omp end do nowait
756 else
757 !$omp do
758 do ip = sp + 1, der%mesh%np_part
759 psib%zff_linear(ip, ii) = psib%zff_linear(ip, ii)*this%phase_spiral(ip-sp, 2)
760 end do
761 !$omp end do nowait
762 end if
763 end do
764 !$omp end parallel
765
767
768 assert(accel_is_enabled())
769
770 ! generate array of offsets for access of psib and phase_spiral:
771 ! TODO: Move this to the routine where spin(:,:,:) is generated
772 ! and also move the buffer to the GPU at this point to
773 ! avoid unecessary latency here!
774
775 safe_allocate(spin_label(1:psib%nst_linear))
776 spin_label = 0
777 do ii = 1, psib%nst_linear, 2
778 if (this%spin(3, psib%linear_to_ist(ii), psib%ik) > 0) spin_label(ii)=1
779 end do
780
781 call accel_create_buffer(spin_label_buffer, accel_mem_read_only, type_integer, psib%nst_linear)
782 call accel_write_buffer(spin_label_buffer, psib%nst_linear, spin_label)
783
784 call accel_kernel_start_call(kernel_phase_spiral, 'phase_spiral.cu', 'phase_spiral_apply')
785
788 call accel_set_kernel_arg(kernel_phase_spiral, 2, der%mesh%np_part)
789 call accel_set_kernel_arg(kernel_phase_spiral, 3, psib%ff_device)
790 call accel_set_kernel_arg(kernel_phase_spiral, 4, log2(psib%pack_size(1)))
791 call accel_set_kernel_arg(kernel_phase_spiral, 5, this%buff_phase_spiral)
792 call accel_set_kernel_arg(kernel_phase_spiral, 6, spin_label_buffer)
793
794 ! Compute the grid size
795 bsize = accel_kernel_block_size(kernel_phase_spiral)/psib%pack_size(1)
796 np = (/psib%pack_size(1)/2_int64, int(der%mesh%np_part - sp, int64)/)
797 bsizes = (/psib%pack_size(1)/2, 2*bsize/)
798 call accel_grid_size(np, bsizes, gsizes)
799
800 call accel_kernel_run(kernel_phase_spiral, bsizes, gsizes)
801
802 call accel_finish()
804 call accel_free_buffer(spin_label_buffer)
805
806 safe_deallocate_a(spin_label)
807
808 end select
809
810 call profiling_out("PBC_PHASE_SPIRAL")
811 pop_sub(phase_phase_spiral)
812 end subroutine phase_phase_spiral
813
814
815 ! ---------------------------------------------------------------------------------------
816 logical pure function phase_is_allocated(this)
817 class(phase_t), intent(in) :: this
818
819 phase_is_allocated = allocated(this%phase)
820 end function phase_is_allocated
821
822 !----------------------------------------------------------
829 subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
830 class(phase_t), intent(in) :: phase
831 type(grid_t), intent(in) :: gr
832 type(distributed_t), intent(in) :: kpt
833 type(wfs_elec_t), intent(in) :: psib
834 type(wfs_elec_t), intent(out) :: psib_with_phase
835
836 integer :: k_offset, n_boundary_points
837
839
840 call psib%copy_to(psib_with_phase)
841 if (phase%is_allocated()) then
842 call phase%apply_to(gr, gr%np, conjugate = .false., psib = psib_with_phase, src = psib, async=.true.)
843 ! apply phase correction while setting boundary -> memory needs to be
844 ! accessed only once
845 k_offset = psib%ik - kpt%start
846 n_boundary_points = int(gr%np_part - gr%np)
847 call boundaries_set(gr%der%boundaries, gr, psib_with_phase, phase_correction = phase%phase_corr(:, psib%ik), &
848 buff_phase_corr = phase%buff_phase_corr, offset=k_offset*n_boundary_points, async=.true.)
849 else
850 call psib%copy_data_to(gr%np, psib_with_phase)
851 call boundaries_set(gr%der%boundaries, gr, psib_with_phase)
852 end if
853
854 call psib_with_phase%do_pack(copy = .true.)
855
857 end subroutine phase_copy_and_set_phase
858
859
860end module phase_oct_m
861
862!! Local Variables:
863!! mode: f90
864!! coding: utf-8
865!! End:
double exp(double __x) __attribute__((__nothrow__
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
integer function, public accel_kernel_block_size(kernel)
Definition: accel.F90:1214
subroutine, public accel_free_buffer(this, async)
Definition: accel.F90:1006
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1439
subroutine, public accel_finish()
Definition: accel.F90:1124
subroutine, public accel_detach_buffer(this)
Clear a buffer handle without freeing device memory.
Definition: accel.F90:1075
integer, parameter, public accel_mem_read_write
Definition: accel.F90:186
type(accel_kernel_t), target, save, public kernel_phase_spiral
Definition: accel.F90:276
pure logical function, public accel_is_enabled()
Definition: accel.F90:403
integer, parameter, public accel_mem_read_only
Definition: accel.F90:186
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
real(real64), parameter, public m_zero
Definition: global.F90:200
complex(real64), parameter, public m_zi
Definition: global.F90:214
real(real64), parameter, public m_one
Definition: global.F90:201
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer(int64) function, public mesh_periodic_point(mesh, space, ip)
This function returns the point inside the grid corresponding to a boundary point when PBCs are used....
Definition: mesh.F90:724
real(real64) function, dimension(1:mesh%box%dim), public mesh_x_global(mesh, ipg)
Given a global point index, this function returns the coordinates of the point.
Definition: mesh.F90:817
subroutine phase_phase_spiral(this, der, psib)
apply spiral phase
Definition: phase.F90:804
subroutine phase_unset_phase_corr(phase, mesh, psib, async)
unset the phase correction (if necessary)
Definition: phase.F90:565
subroutine, public phase_accel_rebuild(phase, mesh, kpt)
Rebuild phase accelerator buffers after an intrinsic copy.
Definition: phase.F90:491
subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
Copy a batch to another batch and apply the Bloch phase to it.
Definition: phase.F90:925
subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
Initiliaze the phase arrays and copy to GPU the data.
Definition: phase.F90:223
subroutine phase_end(phase)
Releases the memory of the phase object.
Definition: phase.F90:468
subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
Update the phases.
Definition: phase.F90:334
logical pure function phase_is_allocated(this)
Definition: phase.F90:912
subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
apply (remove) the phase to the wave functions before (after) applying the Hamiltonian
Definition: phase.F90:590
subroutine phase_set_phase_corr(phase, mesh, psib, async)
set the phase correction (if necessary)
Definition: phase.F90:539
subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
apply (or remove) the phase to a wave function psi
Definition: phase.F90:758
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
This module handles spin dimensions of the states and the k-point distribution.
type(type_t), parameter, public type_cmplx
Definition: types.F90:136
type(type_t), parameter, public type_integer
Definition: types.F90:137
type(type_t), parameter, public type_float
Definition: types.F90:135
class representing derivatives
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
Describes mesh distribution to nodes.
Definition: mesh.F90:187
A container for the phase.
Definition: phase.F90:180
class for organizing spins and k-points
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)