Octopus
density.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2010 M. Marques, A. Castro, A. Rubio, G. Bertsch, X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
26!
27module density_oct_m
28 use accel_oct_m
29 use batch_oct_m
31 use iso_c_binding
32 use comm_oct_m
33 use debug_oct_m
36 use global_oct_m
37 use grid_oct_m
38 use, intrinsic :: iso_fortran_env
41 use math_oct_m
42 use mesh_oct_m
45 use mpi_oct_m
48 use parser_oct_m
50 use smear_oct_m
51 use space_oct_m
55 use types_oct_m
57
58 implicit none
59
60 private
61
62 public :: &
75
78 private
79 real(real64), contiguous, pointer :: density(:, :)
80 ! !! dimensions (1:np, 1:d%nspin)
81 type(states_elec_t), pointer :: st
82 type(grid_t), pointer :: gr
83 type(accel_mem_t) :: buff_density
84 integer(int64) :: pnp
85 logical :: packed
86 end type density_calc_t
87
88contains
89
94 !
95 subroutine density_calc_init(this, st, gr, density)
96 type(density_calc_t), intent(out) :: this
97 type(states_elec_t), target, intent(in) :: st
98 type(grid_t), target, intent(in) :: gr
99 real(real64), contiguous, target, intent(out) :: density(:, :)
100
101 integer :: ip, ispin
102 logical :: correct_size
103
104 push_sub(density_calc_init)
105
106 this%st => st
107 this%gr => gr
108
109 this%density => density
110 !$omp parallel private(ip)
111 do ispin = 1, st%d%nspin
112 !$omp do
113 do ip = 1, gr%np
114 this%density(ip, ispin) = m_zero
115 end do
116 !$omp end do nowait
117 end do
118 !$omp end parallel
119
120 this%packed = .false.
121
122 correct_size = ubound(this%density, dim = 1) == this%gr%np .or. &
123 ubound(this%density, dim = 1) == this%gr%np_part
124 assert(correct_size)
125
126 pop_sub(density_calc_init)
127 end subroutine density_calc_init
128
129 ! ---------------------------------------------------
131 !
132 subroutine density_calc_pack(this, async)
133 type(density_calc_t), intent(inout) :: this
134 logical, optional, intent(in) :: async
135
136 push_sub(density_calc_pack)
137
138 this%packed = .true.
139 this%pnp = accel_padded_size(this%gr%np)
140 call accel_create_buffer(this%buff_density, accel_mem_read_write, type_float, this%pnp*this%st%d%nspin)
141
142 ! set to zero
143 call accel_set_buffer_to_zero(this%buff_density, type_float, this%pnp*this%st%d%nspin, async=async)
144
145 pop_sub(density_calc_pack)
146 end subroutine density_calc_pack
147
148 ! ---------------------------------------------------
152 !
153 subroutine density_calc_state(this, psib, istin)
154 type(density_calc_t), intent(inout) :: this
155 type(wfs_elec_t), intent(in) :: psib
156 integer, intent(in) :: istin
157
158 integer :: ist, ip, ispin, istin_, bsize, gsize
159 complex(real64) :: term, psi1, psi2
160 real(real64), allocatable :: weight(:)
161 type(accel_mem_t) :: buff_weight
162 type(accel_kernel_t), pointer :: kernel
163
164 push_sub(density_calc_state)
165 call profiling_in("CALC_STATE_DENSITY")
166
167 ispin = this%st%d%get_spin_index(psib%ik)
168
169 istin_= 0
170
171 safe_allocate(weight(1:psib%nst))
172 do ist = 1, psib%nst
173 weight(ist) = this%st%kweights(psib%ik)*this%st%occ(psib%ist(ist), psib%ik)
174 if (psib%ist(ist) == istin) istin_ = ist
175 end do
177 assert(istin_ > 0)
179 if (abs(weight(istin_)) <= m_epsilon) then
180 call profiling_out("CALC_STATE_DENSITY")
181 pop_sub(density_calc_state)
182 return
183 end if
184
185 !fix ist for the remaining code
186 ist = istin_
187
188
189 select case (psib%status())
191 select case (this%st%d%ispin)
193 if (states_are_real(this%st)) then
194 !$omp parallel do simd schedule(static)
195 do ip = 1, this%gr%np
196 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff(ip, 1, ist)**2
197 end do
198 else
199 !$omp parallel do schedule(static)
200 do ip = 1, this%gr%np
201 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
202 real(conjg(psib%zff(ip, 1, ist))*psib%zff(ip, 1, ist), real64)
203 end do
204 end if
205 case (spinors)
206 !$omp parallel do schedule(static) private(psi1, psi2, term)
207 do ip = 1, this%gr%np
208 psi1 = psib%zff(ip, 1, ist)
209 psi2 = psib%zff(ip, 2, ist)
210 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
211 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
212 term = weight(ist)*psi1*conjg(psi2)
213 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
214 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
215 end do
216 end select
217
218 case (batch_packed)
219
220 select case (this%st%d%ispin)
222 if (states_are_real(this%st)) then
223 !$omp parallel do schedule(static)
224 do ip = 1, this%gr%np
225 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff_pack(ist, ip)**2
226 end do
227 else
228
229 !$omp parallel do schedule(static)
230 do ip = 1, this%gr%np
231 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
232 real(conjg(psib%zff_pack(ist, ip))*psib%zff_pack(ist, ip), real64)
233 end do
234 end if
235 case (spinors)
236 assert(mod(psib%nst_linear, 2) == 0)
237 !$omp parallel do schedule(static) private(psi1, psi2, term)
238 do ip = 1, this%gr%np
239 psi1 = psib%zff_pack(2*ist - 1, ip)
240 psi2 = psib%zff_pack(2*ist, ip)
241 term = weight(ist)*psi1*conjg(psi2)
242
243 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
244 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
245 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
246 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
247 end do
248 end select
249
251 if (.not. this%packed) call density_calc_pack(this, async=.true.)
252
253 call accel_create_buffer(buff_weight, accel_mem_read_only, type_float, psib%nst)
254 call accel_write_buffer(buff_weight, psib%nst, weight, async=.true.)
255
256 select case (this%st%d%ispin)
258 if (states_are_real(this%st)) then
259 kernel => kernel_density_real
260 else
261 kernel => kernel_density_complex
262 end if
263
264 call accel_set_kernel_arg(kernel, 0, psib%nst)
265 call accel_set_kernel_arg(kernel, 1, this%gr%np)
266 call accel_set_kernel_arg(kernel, 2, int(this%pnp*(ispin - 1), int32))
267 call accel_set_kernel_arg(kernel, 3, buff_weight)
268 call accel_set_kernel_arg(kernel, 4, psib%ff_device)
269 call accel_set_kernel_arg(kernel, 5, int(log2(psib%pack_size(1)), int32))
270 call accel_set_kernel_arg(kernel, 6, this%buff_density)
271
272 case (spinors)
273 kernel => kernel_density_spinors
274
275 call accel_set_kernel_arg(kernel, 0, psib%nst)
276 call accel_set_kernel_arg(kernel, 1, this%gr%np)
277 call accel_set_kernel_arg(kernel, 2, int(this%pnp, int32))
278 call accel_set_kernel_arg(kernel, 3, buff_weight)
279 call accel_set_kernel_arg(kernel, 4, psib%ff_device)
280 call accel_set_kernel_arg(kernel, 5, int(log2(psib%pack_size(1)), int32))
281 call accel_set_kernel_arg(kernel, 6, this%buff_density)
282 end select
283
284 ! Compute the grid
285 bsize = accel_kernel_block_size(kernel)
286 call accel_grid_size(this%gr%np, bsize, gsize)
287
288 call accel_kernel_run(kernel, (/gsize/), (/bsize/))
289
290 call accel_finish()
291 call accel_free_buffer(buff_weight)
292 end select
293
294 safe_deallocate_a(weight)
295
296 call profiling_out("CALC_STATE_DENSITY")
297
298 pop_sub(density_calc_state)
299 end subroutine density_calc_state
300
301 ! ---------------------------------------------------
303 !
304 subroutine density_calc_accumulate(this, psib, async)
305 type(density_calc_t), intent(inout) :: this
306 type(wfs_elec_t), intent(in) :: psib
307 logical, optional, intent(in) :: async
308
309 integer :: ist, ip, ispin, bsize, gsize
310 complex(real64) :: term, psi1, psi2
311 real(real64), allocatable :: weight(:)
312 type(accel_mem_t) :: buff_weight
313 type(accel_kernel_t), pointer :: kernel
314
316 call profiling_in("CALC_DENSITY")
317
318 ispin = this%st%d%get_spin_index(psib%ik)
319
320 safe_allocate(weight(1:psib%nst))
321 do ist = 1, psib%nst
322 weight(ist) = this%st%kweights(psib%ik)*this%st%occ(psib%ist(ist), psib%ik)
323 end do
324
325 select case (psib%status())
326 case (batch_not_packed)
327 select case (this%st%d%ispin)
329 if (states_are_real(this%st)) then
330 do ist = 1, psib%nst
331 if (abs(weight(ist)) <= m_epsilon) cycle
332 !$omp parallel do simd schedule(static)
333 do ip = 1, this%gr%np
334 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff(ip, 1, ist)**2
335 end do
336 end do
337 else
338 do ist = 1, psib%nst
339 if (abs(weight(ist)) <= m_epsilon) cycle
340 !$omp parallel do schedule(static)
341 do ip = 1, this%gr%np
342 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
343 real(conjg(psib%zff(ip, 1, ist))*psib%zff(ip, 1, ist), real64)
344 end do
345 end do
346 end if
347 case (spinors)
348 do ist = 1, psib%nst
349 if (abs(weight(ist)) <= m_epsilon) cycle
350 !$omp parallel do schedule(static) private(psi1, psi2, term)
351 do ip = 1, this%gr%np
352 psi1 = psib%zff(ip, 1, ist)
353 psi2 = psib%zff(ip, 2, ist)
354 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
355 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
356 term = weight(ist)*psi1*conjg(psi2)
357 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
358 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
359 end do
360 end do
361 end select
362
363 case (batch_packed)
364
365 select case (this%st%d%ispin)
367 if (states_are_real(this%st)) then
368 !$omp parallel do schedule(static) private(ist)
369 do ip = 1, this%gr%np
370 do ist = 1, psib%nst
371 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff_pack(ist, ip)**2
372 end do
373 end do
374 else
375 !$omp parallel do schedule(static) private(ist)
376 do ip = 1, this%gr%np
377 do ist = 1, psib%nst
378 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
379 real(conjg(psib%zff_pack(ist, ip))*psib%zff_pack(ist, ip), real64)
380 end do
381 end do
382 end if
383 case (spinors)
384 assert(mod(psib%nst_linear, 2) == 0)
385 !$omp parallel do schedule(static) private(ist, psi1, psi2, term)
386 do ip = 1, this%gr%np
387 do ist = 1, psib%nst
388 psi1 = psib%zff_pack(2*ist - 1, ip)
389 psi2 = psib%zff_pack(2*ist, ip)
390 term = weight(ist)*psi1*conjg(psi2)
391
392 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
393 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
394 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
395 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
396 end do
397 end do
398 end select
401 if (.not. this%packed) call density_calc_pack(this)
402
403 call accel_create_buffer(buff_weight, accel_mem_read_only, type_float, psib%nst)
404 call accel_write_buffer(buff_weight, psib%nst, weight, async=.true.)
405
406 select case (this%st%d%ispin)
408 if (states_are_real(this%st)) then
409 kernel => kernel_density_real
410 else
411 kernel => kernel_density_complex
412 end if
413
414 call accel_set_kernel_arg(kernel, 0, psib%nst)
415 call accel_set_kernel_arg(kernel, 1, this%gr%np)
416 call accel_set_kernel_arg(kernel, 2, int(this%pnp, int32)*(ispin - 1))
417 call accel_set_kernel_arg(kernel, 3, buff_weight)
418 call accel_set_kernel_arg(kernel, 4, psib%ff_device)
419 call accel_set_kernel_arg(kernel, 5, int(log2(psib%pack_size(1)), int32))
420 call accel_set_kernel_arg(kernel, 6, this%buff_density)
421
422 case (spinors)
423 kernel => kernel_density_spinors
424
425 call accel_set_kernel_arg(kernel, 0, psib%nst)
426 call accel_set_kernel_arg(kernel, 1, this%gr%np)
427 call accel_set_kernel_arg(kernel, 2, int(this%pnp, int32))
428 call accel_set_kernel_arg(kernel, 3, buff_weight)
429 call accel_set_kernel_arg(kernel, 4, psib%ff_device)
430 call accel_set_kernel_arg(kernel, 5, int(log2(psib%pack_size(1)), int32))
431 call accel_set_kernel_arg(kernel, 6, this%buff_density)
432 end select
433
434 ! Compute the grid
435 bsize = accel_kernel_block_size(kernel)
436 call accel_grid_size(this%gr%np, bsize, gsize)
437
438 call accel_kernel_run(kernel, (/gsize/), (/bsize/))
439
440 call accel_free_buffer(buff_weight)
441
442 if(.not. optional_default(async, .false.)) call accel_finish()
443
444 end select
445
446 safe_deallocate_a(weight)
447
448
449 if (this%st%d%ispin /= spinors) then
450 if (states_are_real(this%st)) then
451 ! 1 add (1), 2 mult (1)
452 call profiling_count_operations(psib%nst*this%gr%np*(1 + 2))
453 else
454 ! 1 add (2), 1 complex mult (6), 1 real-complex mult (2)
455 call profiling_count_operations(psib%nst*this%gr%np*(2 + 6 + 2))
456 end if
457 else
458 ! 4 add (2), 3 complex mult (6), 3 real-complex mult (2)
459 call profiling_count_operations(psib%nst*this%gr%np*(4*2 + 3*6 + 3*2))
460 end if
461
462 call profiling_out("CALC_DENSITY")
463
465 end subroutine density_calc_accumulate
466
467 ! ---------------------------------------------------
473 !
474 subroutine density_calc_end(this, allreduce, symmetrize, buff_density)
475 type(density_calc_t), intent(inout) :: this
476 logical, optional, intent(in) :: allreduce
477 logical, optional, intent(in) :: symmetrize
478 type(accel_mem_t), optional, intent(inout) :: buff_density
479
480 integer :: ispin, ip
481 real(real64), allocatable :: tmpdensity(:)
482 logical :: was_packed
483
484 logical:: allreduce_local, symmetrize_local
485
486 push_sub(density_calc_end)
487
488 call profiling_in("CALC_DENSITY_END")
489
490 allreduce_local = (this%st%parallel_in_states .or. this%st%d%kpt%parallel) .and. optional_default(allreduce, .true.)
491 symmetrize_local = this%st%symmetrize_density .and. optional_default(symmetrize, .true.)
492
493 was_packed = this%packed ! Save before modifying
494 if (this%packed) then
495 this%packed = .false.
496
497 safe_allocate(tmpdensity(1:this%gr%np))
498
499 ! the density is in device memory
500 do ispin = 1, this%st%d%nspin
501 call accel_read_buffer(this%buff_density, int(this%gr%np, int64), tmpdensity, offset = (ispin - 1)*this%pnp)
502
503 !$omp parallel do simd
504 do ip = 1, this%gr%np
505 this%density(ip, ispin) = this%density(ip, ispin) + tmpdensity(ip)
506 end do
507 end do
508
509 safe_deallocate_a(tmpdensity)
510
511 ! If buff_density is not needed, free the GPU buffer now
512 if (.not. present(buff_density)) then
513 call accel_free_buffer(this%buff_density)
514 end if
515 end if
516
517 ! reduce over states and k-points
518 if (allreduce_local) then
519 call profiling_in("DENSITY_REDUCE")
520 call comm_allreduce(this%st%st_kpt_mpi_grp, this%density, dim = (/this%gr%np, this%st%d%nspin/))
521 call profiling_out("DENSITY_REDUCE")
522 end if
523
524 if (symmetrize_local) then
525 do ispin = 1, this%st%d%nspin
526 call dgrid_symmetrize_scalar_field(this%gr, this%density(:, ispin))
527 end do
528 end if
529
530 if (present(buff_density) .and. was_packed) then
531 if (allreduce_local .or. symmetrize_local) then
532 do ispin = 1, this%st%d%nspin
533 call accel_write_buffer(this%buff_density, int(this%gr%np, int64), &
534 this%density(1:this%gr%np, ispin), &
535 offset=int((ispin - 1)*this%pnp, int64))
536 end do
537 end if
538
539 ! Free any previously stored buffer before overwriting
540 if (buff_density%allocated) call accel_free_buffer(buff_density)
541
542 call accel_move_buffer(this%buff_density, buff_density)
543 end if
544
545 call profiling_out("CALC_DENSITY_END")
546
547 pop_sub(density_calc_end)
548 end subroutine density_calc_end
549
550
551 ! ---------------------------------------------------------
556 !
557 subroutine density_calc(st, gr, density, istin)
558 type(states_elec_t), intent(inout) :: st
559 type(grid_t), intent(in) :: gr
560 real(real64), contiguous, intent(out) :: density(:, :)
561 integer, optional, intent(in) :: istin
562
563 integer :: ik, ib
564 type(density_calc_t) :: dens_calc
565
566 push_sub(density_calc)
567
568 assert(ubound(density, dim = 1) == gr%np .or. ubound(density, dim = 1) == gr%np_part)
570 call density_calc_init(dens_calc, st, gr, density)
571
572 if (.not. present(istin)) then
573 ! ordinary density accumulate
574 do ik = st%d%kpt%start, st%d%kpt%end
575 do ib = st%group%block_start, st%group%block_end
576 call density_calc_accumulate(dens_calc, st%group%psib(ib, ik), async=.true.)
577 end do
578 end do
579 call accel_finish()
580 else
581 ! state-resolved density
582 do ik = st%d%kpt%start, st%d%kpt%end
583 do ib = st%group%block_start, st%group%block_end
584 if (st%group%psib(ib, ik)%ist(1) <= istin .and. st%group%psib(ib, ik)%ist(st%group%psib(ib, ik)%nst) >= istin) then
585 call density_calc_state(dens_calc, st%group%psib(ib, ik), istin)
586 end if
587 end do
588 end do
589 end if
590
591 call density_calc_end(dens_calc, allreduce = .not. present(istin), buff_density = st%buff_density)
592
593 pop_sub(density_calc)
594 end subroutine density_calc
595
596 ! ---------------------------------------------------------
598 subroutine states_elec_freeze_orbitals(st, namespace, space, gr, mc, kpoints, n, family_is_mgga)
599 type(states_elec_t), intent(inout) :: st
600 type(namespace_t), intent(in) :: namespace
601 class(space_t), intent(in) :: space
602 type(grid_t), intent(in) :: gr
603 type(multicomm_t), intent(in) :: mc
604 type(kpoints_t), intent(in) :: kpoints
605 integer, intent(in) :: n
606 logical, intent(in) :: family_is_mgga
607
608 integer :: ist, ik, ib, nblock
609 integer :: nodeto, nodefr
610 type(states_elec_t) :: staux
611 complex(real64), allocatable :: psi(:, :, :), rec_buffer(:,:)
612 type(wfs_elec_t) :: psib
613 type(density_calc_t) :: dens_calc
614
616
617 if (n >= st%nst) then
618 write(message(1),'(a)') 'Attempting to freeze a number of orbitals which is larger or equal to'
619 write(message(2),'(a)') 'the total number. The program has to stop.'
620 call messages_fatal(2, namespace=namespace)
621 end if
622
623 assert(states_are_complex(st))
624
625 if (.not. allocated(st%frozen_rho)) then
626 safe_allocate(st%frozen_rho(1:gr%np, 1:st%d%nspin))
627 end if
628
629 call density_calc_init(dens_calc, st, gr, st%frozen_rho)
630
631 do ik = st%d%kpt%start, st%d%kpt%end
632 if (n < st%st_start) cycle
633
634 do ib = st%group%block_start, st%group%block_end
635 !We can use the full batch
636 if (states_elec_block_max(st, ib) <= n) then
637
638 call density_calc_accumulate(dens_calc, st%group%psib(ib, ik))
639 if (states_elec_block_max(st, ib) == n) exit
640
641 else !Here we only use a part of this batch
642
643 nblock = n - states_elec_block_min(st, ib) + 1
644
645 safe_allocate(psi(1:gr%np, 1:st%d%dim, 1:nblock))
646
647 do ist = 1, nblock
648 call states_elec_get_state(st, gr, states_elec_block_min(st, ib) + ist - 1, ik, psi(:, :, ist))
649 end do
650
651 call wfs_elec_init(psib, st%d%dim, states_elec_block_min(st, ib), n, psi, ik)
652 call density_calc_accumulate(dens_calc, psib)
653 call psib%end()
654 safe_deallocate_a(psi)
655
656 exit
657
658 end if
659
660 end do
661 end do
662
663 call density_calc_end(dens_calc)
664
665 if (family_is_mgga) then
666 if (.not. allocated(st%frozen_tau)) then
667 safe_allocate(st%frozen_tau(1:gr%np, 1:st%d%nspin))
668 end if
669 if (.not. allocated(st%frozen_gdens)) then
670 safe_allocate(st%frozen_gdens(1:gr%np, 1:space%dim, 1:st%d%nspin))
671 end if
672 if (.not. allocated(st%frozen_ldens)) then
673 safe_allocate(st%frozen_ldens(1:gr%np, 1:st%d%nspin))
674 end if
675
676 call states_elec_calc_quantities(gr, st, kpoints, .true., kinetic_energy_density = st%frozen_tau, &
677 density_gradient = st%frozen_gdens, density_laplacian = st%frozen_ldens, st_end = n)
678 end if
679
680
681 call states_elec_copy(staux, st)
682
683 call states_elec_freeze_redistribute_states(st, namespace, gr, mc, n)
684
685 safe_allocate(psi(1:gr%np, 1:st%d%dim, 1))
686 safe_allocate(rec_buffer(1:gr%np, 1:st%d%dim))
687
688 do ik = st%d%kpt%start, st%d%kpt%end
689 ! loop over all frozen states
690 do ist = 1, st%nst
691 ! new distribution: frozen states
692 nodeto = st%node(ist)
693 ! old distribution: full states
694 nodefr = staux%node(ist+n)
695
696 if (st%mpi_grp%rank == nodeto .and. st%mpi_grp%rank == nodefr) then
697 ! do a simple copy on the same rank, also for serial version
698 call states_elec_get_state(staux, gr, ist+n, ik, psi(:, :, 1))
699 call states_elec_set_state(st, gr, ist, ik, psi(:, :, 1))
700 else
701 ! we can use blocking send/recv calls here because we always have different ranks
702 if (st%mpi_grp%rank == nodefr) then
703 call states_elec_get_state(staux, gr, ist+n, ik, psi(:, :, 1))
704 call st%mpi_grp%send(psi(1, 1, 1), gr%np*st%d%dim, mpi_double_complex, nodeto, ist)
705 end if
706 if (st%mpi_grp%rank == nodeto) then
707 call st%mpi_grp%recv(rec_buffer(1, 1), gr%np*st%d%dim, mpi_double_complex, nodefr, ist)
708 call states_elec_set_state(st, gr, ist, ik, rec_buffer(:, :))
709 end if
710 end if
711 end do
712 end do
713
714 safe_deallocate_a(psi)
715 safe_deallocate_a(rec_buffer)
716
717 do ik = 1, st%nik
718 do ist = 1, st%nst
719 st%occ(ist, ik) = staux%occ(n+ist, ik)
720 st%eigenval(ist, ik) = staux%eigenval(n+ist, ik)
721 end do
722 end do
723
724 call states_elec_end(staux)
726 end subroutine states_elec_freeze_orbitals
727
728 ! ---------------------------------------------------------
729 subroutine states_elec_freeze_redistribute_states(st, namespace, mesh, mc, nn)
730 type(states_elec_t), intent(inout) :: st
731 type(namespace_t), intent(in) :: namespace
732 class(mesh_t), intent(in) :: mesh
733 type(multicomm_t), intent(in) :: mc
734 integer, intent(in) :: nn
735
737
738 st%nst = st%nst - nn
739
741 call states_elec_distribute_nodes(st, namespace, mc)
742 call states_elec_allocate_wfns(st, mesh, type_cmplx, packed=.true.)
743
744 safe_deallocate_a(st%eigenval)
745 safe_allocate(st%eigenval(1:st%nst, 1:st%nik))
746 st%eigenval = huge(st%eigenval)
747
748 safe_deallocate_a(st%occ)
749 safe_allocate(st%occ (1:st%nst, 1:st%nik))
750 st%occ = m_zero
751
752
755
756 ! ---------------------------------------------------------
757 subroutine states_elec_freeze_adjust_qtot(st)
758 type(states_elec_t), intent(inout) :: st
759
760 integer :: ik, ist
761
763
764 ! Change the smearing method by fixing the occupations to
765 ! that of the ground-state such that the unfrozen states inherit
766 ! those values.
767 st%smear%method = smear_fixed_occ
768
769 ! Set total charge
770 st%qtot = m_zero
771 do ik = st%d%kpt%start, st%d%kpt%end
772 do ist = st%st_start, st%st_end
773 st%qtot = st%qtot + st%occ(ist, ik) * st%kweights(ik)
774 end do
775 end do
776
777 if (st%parallel_in_states .or. st%d%kpt%parallel) then
778 call comm_allreduce(st%st_kpt_mpi_grp, st%qtot)
779 end if
780
781
783 end subroutine states_elec_freeze_adjust_qtot
784
785
786 ! ---------------------------------------------------------
795 !
796 subroutine states_elec_total_density(st, mesh, total_rho)
797 type(states_elec_t), intent(in) :: st
798 class(mesh_t), intent(in) :: mesh
799 real(real64), contiguous, intent(out) :: total_rho(:,:)
800
801 integer :: is
802
804
805 call lalg_copy(mesh%np, st%d%nspin, st%rho, total_rho)
806
807 if (allocated(st%rho_core)) then
808 do is = 1, st%d%spin_channels
809 call lalg_axpy(mesh%np, m_one/st%d%spin_channels, st%rho_core, total_rho(:, is))
810 end do
811 end if
812
813 ! Add, if it exists, the frozen density from the inner orbitals.
814 if (allocated(st%frozen_rho)) then
815 call lalg_axpy(mesh%np, st%d%nspin, m_one, st%frozen_rho, total_rho)
816 end if
817
819 end subroutine states_elec_total_density
820
821 ! ---------------------------------------------------------
824 subroutine states_elec_sync_buff_density(st, mesh)
825 type(states_elec_t), intent(inout) :: st
826 class(mesh_t), intent(in) :: mesh
827
828 integer :: is
829 integer(int64) :: pnp
830
832
833 if (accel_is_enabled() .and. st%buff_density%allocated) then
834 pnp = accel_padded_size(mesh%np)
835 do is = 1, st%d%nspin
836 call accel_write_buffer(st%buff_density, int(mesh%np, int64), st%rho(1:mesh%np, is), &
837 offset = (is - 1)*pnp)
838 end do
839 end if
840
842 end subroutine states_elec_sync_buff_density
843
844#include "undef.F90"
845#include "real.F90"
846#include "density_inc.F90"
847
848#include "undef.F90"
849#include "complex.F90"
850#include "density_inc.F90"
851
853
854
855!! Local Variables:
856!! mode: f90
857!! coding: utf-8
858!! End:
constant times a vector plus a vector
Definition: lalg_basic.F90:173
Copies a vector x, to a vector y.
Definition: lalg_basic.F90:188
type(accel_kernel_t), target, save, public kernel_density_real
Definition: accel.F90:272
integer function, public accel_kernel_block_size(kernel)
Definition: accel.F90:1214
subroutine, public accel_free_buffer(this, async)
Definition: accel.F90:1006
subroutine, public accel_finish()
Definition: accel.F90:1124
subroutine, public accel_move_buffer(buffer_from, buffer_to)
Move the buffer memory from the first buffer to the second.
Definition: accel.F90:1049
type(accel_kernel_t), target, save, public kernel_density_spinors
Definition: accel.F90:274
integer, parameter, public accel_mem_read_write
Definition: accel.F90:186
pure logical function, public accel_is_enabled()
Definition: accel.F90:403
type(accel_kernel_t), target, save, public kernel_density_complex
Definition: accel.F90:273
integer, parameter, public accel_mem_read_only
Definition: accel.F90:186
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
This module implements a calculator for the density and defines related functions.
Definition: density.F90:122
subroutine density_calc_state(this, psib, istin)
Calculate contribution to the density from state istin.
Definition: density.F90:249
subroutine, public density_calc_accumulate(this, psib, async)
Accumulate weighted orbital densities for a batch psib.
Definition: density.F90:400
subroutine, public states_elec_freeze_adjust_qtot(st)
Definition: density.F90:853
subroutine, public states_elec_freeze_redistribute_states(st, namespace, mesh, mc, nn)
Definition: density.F90:825
subroutine, public density_calc_init(this, st, gr, density)
initialize the density calculator
Definition: density.F90:191
subroutine density_calc_pack(this, async)
prepare the density calculator for GPU use
Definition: density.F90:228
subroutine, public states_elec_sync_buff_density(st, mesh)
Synchronize the GPU density buffer with the host density strho.
Definition: density.F90:920
subroutine, public ddensity_accumulate_grad(space, mesh, st, psib, grad_psib, grad_rho)
Accumulate within a batch.
Definition: density.F90:1010
subroutine, public zdensity_accumulate_grad(space, mesh, st, psib, grad_psib, grad_rho)
Accumulate within a batch.
Definition: density.F90:1199
subroutine, public states_elec_total_density(st, mesh, total_rho)
This routine calculates the total electronic density.
Definition: density.F90:892
subroutine, public density_calc_end(this, allreduce, symmetrize, buff_density)
Finalize the density calculation.
Definition: density.F90:570
subroutine, public states_elec_freeze_orbitals(st, namespace, space, gr, mc, kpoints, n, family_is_mgga)
Calculate partial density for frozen orbitals.
Definition: density.F90:694
subroutine, public density_calc(st, gr, density, istin)
Computes the density from the orbitals in st.
Definition: density.F90:653
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
integer, parameter, public unpolarized
Parameters...
integer, parameter, public spinors
integer, parameter, public spin_polarized
real(real64), parameter, public m_zero
Definition: global.F90:200
real(real64), parameter, public m_epsilon
Definition: global.F90:216
real(real64), parameter, public m_one
Definition: global.F90:201
This module implements the underlying real-space grid.
Definition: grid.F90:119
subroutine, public dgrid_symmetrize_scalar_field(gr, field, suppress_warning)
Definition: grid.F90:675
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines various routines, operating on mesh functions.
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:410
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
integer, parameter, public smear_fixed_occ
Definition: smear.F90:176
pure logical function, public states_are_complex(st)
pure logical function, public states_are_real(st)
This module handles spin dimensions of the states and the k-point distribution.
integer pure function, public states_elec_block_max(st, ib)
return index of last state in block ib
subroutine, public states_elec_distribute_nodes(st, namespace, mc)
Distribute states over the processes for states parallelization.
subroutine, public states_elec_calc_quantities(gr, st, kpoints, nlcc, kinetic_energy_density, paramagnetic_current, density_gradient, density_laplacian, gi_kinetic_energy_density, st_end)
calculated selected quantities
subroutine, public states_elec_end(st)
finalize the states_elec_t object
subroutine, public states_elec_deallocate_wfns(st)
Deallocates the KS wavefunctions defined within a states_elec_t structure.
subroutine, public states_elec_allocate_wfns(st, mesh, wfs_type, skip, packed)
Allocates the KS wavefunctions defined within a states_elec_t structure.
subroutine, public states_elec_copy(stout, stin, exclude_wfns, exclude_eigenval, special)
make a (selective) copy of a states_elec_t object
integer pure function, public states_elec_block_min(st, ib)
return index of first state in block ib
type(type_t), parameter, public type_cmplx
Definition: types.F90:136
type(type_t), parameter, public type_float
Definition: types.F90:135
The calculator object.
Definition: density.F90:172
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
Describes mesh distribution to nodes.
Definition: mesh.F90:187
Stores all communicators and groups.
Definition: multicomm.F90:208
The states_elec_t class contains all electronic wave functions.
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)