38 use,
intrinsic :: iso_fortran_env
78 real(real64),
contiguous,
pointer :: density(:, :)
79 type(states_elec_t),
pointer :: st
80 type(grid_t),
pointer :: gr
81 type(accel_mem_t) :: buff_density
94 type(density_calc_t),
intent(out) :: this
95 type(states_elec_t),
target,
intent(in) :: st
96 type(grid_t),
target,
intent(in) :: gr
97 real(real64),
contiguous,
target,
intent(out) :: density(:, :)
100 logical :: correct_size
107 this%density => density
109 do ispin = 1, st%d%nspin
112 this%density(ip, ispin) =
m_zero
118 this%packed = .false.
120 correct_size = ubound(this%density, dim = 1) == this%gr%np .or. &
121 ubound(this%density, dim = 1) == this%gr%np_part
131 type(density_calc_t),
intent(inout) :: this
132 logical,
optional,
intent(in) :: async
152 type(density_calc_t),
intent(inout) :: this
153 type(wfs_elec_t),
intent(in) :: psib
154 integer,
intent(in) :: istin
156 integer :: ist, ip, ispin, istin_, wgsize
157 complex(real64) :: term, psi1, psi2
158 real(real64),
allocatable :: weight(:)
159 type(accel_mem_t) :: buff_weight
160 type(accel_kernel_t),
pointer :: kernel
165 ispin = this%st%d%get_spin_index(psib%ik)
169 safe_allocate(weight(1:psib%nst))
171 weight(ist) = this%st%kweights(psib%ik)*this%st%occ(psib%ist(ist), psib%ik)
172 if (psib%ist(ist) == istin) istin_ = ist
177 if (abs(weight(istin_)) <=
m_epsilon)
then
187 select case (psib%status())
189 select case (this%st%d%ispin)
193 do ip = 1, this%gr%np
194 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff(ip, 1, ist)**2
198 do ip = 1, this%gr%np
199 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
200 real(conjg(psib%zff(ip, 1, ist))*psib%zff(ip, 1, ist), real64)
205 do ip = 1, this%gr%np
206 psi1 = psib%zff(ip, 1, ist)
207 psi2 = psib%zff(ip, 2, ist)
208 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
209 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
210 term = weight(ist)*psi1*conjg(psi2)
211 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
212 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
218 select case (this%st%d%ispin)
222 do ip = 1, this%gr%np
223 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff_pack(ist, ip)**2
228 do ip = 1, this%gr%np
229 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
230 real(conjg(psib%zff_pack(ist, ip))*psib%zff_pack(ist, ip), real64)
234 assert(mod(psib%nst_linear, 2) == 0)
236 do ip = 1, this%gr%np
237 psi1 = psib%zff_pack(2*ist - 1, ip)
238 psi2 = psib%zff_pack(2*ist, ip)
239 term = weight(ist)*psi1*conjg(psi2)
241 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
242 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
243 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
244 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
254 select case (this%st%d%ispin)
290 safe_deallocate_a(weight)
303 logical,
optional,
intent(in) :: async
305 integer :: ist, ip, ispin, wgsize
306 complex(real64) :: term, psi1, psi2
307 real(real64),
allocatable :: weight(:)
314 ispin = this%st%d%get_spin_index(psib%ik)
316 safe_allocate(weight(1:psib%nst))
318 weight(ist) = this%st%kweights(psib%ik)*this%st%occ(psib%ist(ist), psib%ik)
321 select case (psib%status())
323 select case (this%st%d%ispin)
329 do ip = 1, this%gr%np
330 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff(ip, 1, ist)**2
337 do ip = 1, this%gr%np
338 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
339 real(conjg(psib%zff(ip, 1, ist))*psib%zff(ip, 1, ist), real64)
347 do ip = 1, this%gr%np
348 psi1 = psib%zff(ip, 1, ist)
349 psi2 = psib%zff(ip, 2, ist)
350 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
351 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
352 term = weight(ist)*psi1*conjg(psi2)
353 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
354 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
361 select case (this%st%d%ispin)
365 do ip = 1, this%gr%np
367 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)*psib%dff_pack(ist, ip)**2
372 do ip = 1, this%gr%np
374 this%density(ip, ispin) = this%density(ip, ispin) + weight(ist)* &
375 real(conjg(psib%zff_pack(ist, ip))*psib%zff_pack(ist, ip), real64)
380 assert(mod(psib%nst_linear, 2) == 0)
382 do ip = 1, this%gr%np
384 psi1 = psib%zff_pack(2*ist - 1, ip)
385 psi2 = psib%zff_pack(2*ist, ip)
386 term = weight(ist)*psi1*conjg(psi2)
388 this%density(ip, 1) = this%density(ip, 1) + weight(ist)*real(conjg(psi1)*psi1, real64)
389 this%density(ip, 2) = this%density(ip, 2) + weight(ist)*real(conjg(psi2)*psi2, real64)
390 this%density(ip, 3) = this%density(ip, 3) + real(term, real64)
391 this%density(ip, 4) = this%density(ip, 4) + aimag(term)
402 select case (this%st%d%ispin)
440 safe_deallocate_a(weight)
443 if (this%st%d%ispin /=
spinors)
then
469 logical,
optional,
intent(in) :: allreduce
470 logical,
optional,
intent(in) :: symmetrize
473 real(real64),
allocatable :: tmpdensity(:)
477 if (this%packed)
then
478 safe_allocate(tmpdensity(1:this%gr%np))
481 do ispin = 1, this%st%d%nspin
482 call accel_read_buffer(this%buff_density, int(this%gr%np, int64), tmpdensity, offset = (ispin - 1)*this%pnp)
485 do ip = 1, this%gr%np
486 this%density(ip, ispin) = this%density(ip, ispin) + tmpdensity(ip)
490 this%packed = .false.
492 safe_deallocate_a(tmpdensity)
496 if ((this%st%parallel_in_states .or. this%st%d%kpt%parallel) .and.
optional_default(allreduce, .
true.))
then
498 call comm_allreduce(this%st%st_kpt_mpi_grp, this%density, dim = (/this%gr%np, this%st%d%nspin/))
503 do ispin = 1, this%st%d%nspin
520 type(
grid_t),
intent(in) :: gr
521 real(real64),
contiguous,
intent(out) :: density(:, :)
522 integer,
optional,
intent(in) :: istin
529 assert(ubound(density, dim = 1) == gr%np .or. ubound(density, dim = 1) == gr%np_part)
533 if (.not.
present(istin))
then
535 do ik = st%d%kpt%start, st%d%kpt%end
536 do ib = st%group%block_start, st%group%block_end
543 do ik = st%d%kpt%start, st%d%kpt%end
544 do ib = st%group%block_start, st%group%block_end
545 if (st%group%psib(ib, ik)%ist(1) <= istin .and. st%group%psib(ib, ik)%ist(st%group%psib(ib, ik)%nst) >= istin)
then
564 class(
space_t),
intent(in) :: space
565 type(
grid_t),
intent(in) :: gr
568 integer,
intent(in) :: n
569 logical,
intent(in) :: family_is_mgga
571 integer :: ist, ik, ib, nblock
572 integer :: nodeto, nodefr
574 complex(real64),
allocatable :: psi(:, :, :), rec_buffer(:,:)
580 if (n >= st%nst)
then
581 write(
message(1),
'(a)')
'Attempting to freeze a number of orbitals which is larger or equal to'
582 write(
message(2),
'(a)')
'the total number. The program has to stop.'
588 if (.not.
allocated(st%frozen_rho))
then
589 safe_allocate(st%frozen_rho(1:gr%np, 1:st%d%nspin))
594 do ik = st%d%kpt%start, st%d%kpt%end
595 if (n < st%st_start) cycle
597 do ib = st%group%block_start, st%group%block_end
608 safe_allocate(psi(1:gr%np, 1:st%d%dim, 1:nblock))
617 safe_deallocate_a(psi)
628 if (family_is_mgga)
then
629 if (.not.
allocated(st%frozen_tau))
then
630 safe_allocate(st%frozen_tau(1:gr%np, 1:st%d%nspin))
632 if (.not.
allocated(st%frozen_gdens))
then
633 safe_allocate(st%frozen_gdens(1:gr%np, 1:space%dim, 1:st%d%nspin))
635 if (.not.
allocated(st%frozen_ldens))
then
636 safe_allocate(st%frozen_ldens(1:gr%np, 1:st%d%nspin))
640 density_gradient = st%frozen_gdens, density_laplacian = st%frozen_ldens, st_end = n)
648 safe_allocate(psi(1:gr%np, 1:st%d%dim, 1))
649 safe_allocate(rec_buffer(1:gr%np, 1:st%d%dim))
651 do ik = st%d%kpt%start, st%d%kpt%end
655 nodeto = st%node(ist)
657 nodefr = staux%node(ist+n)
659 if (st%mpi_grp%rank == nodeto .and. st%mpi_grp%rank == nodefr)
then
665 if (st%mpi_grp%rank == nodefr)
then
667 call st%mpi_grp%send(psi(1, 1, 1), gr%np*st%d%dim, mpi_double_complex, nodeto, ist)
669 if (st%mpi_grp%rank == nodeto)
then
670 call st%mpi_grp%recv(rec_buffer(1, 1), gr%np*st%d%dim, mpi_double_complex, nodefr, ist)
677 safe_deallocate_a(psi)
678 safe_deallocate_a(rec_buffer)
682 st%occ(ist, ik) = staux%occ(n+ist, ik)
683 st%eigenval(ist, ik) = staux%eigenval(n+ist, ik)
695 class(
mesh_t),
intent(in) :: mesh
697 integer,
intent(in) :: nn
707 safe_deallocate_a(st%eigenval)
708 safe_allocate(st%eigenval(1:st%nst, 1:st%nik))
709 st%eigenval = huge(st%eigenval)
711 safe_deallocate_a(st%occ)
712 safe_allocate(st%occ (1:st%nst, 1:st%nik))
734 do ik = st%d%kpt%start, st%d%kpt%end
735 do ist = st%st_start, st%st_end
736 st%qtot = st%qtot + st%occ(ist, ik) * st%kweights(ik)
740 if (st%parallel_in_states .or. st%d%kpt%parallel)
then
761 class(
mesh_t),
intent(in) :: mesh
762 real(real64),
contiguous,
intent(out) :: total_rho(:,:)
768 call lalg_copy(mesh%np, st%d%nspin, st%rho, total_rho)
770 if (
allocated(st%rho_core))
then
771 do is = 1, st%d%spin_channels
772 call lalg_axpy(mesh%np,
m_one/st%d%spin_channels, st%rho_core, total_rho(:, is))
777 if (
allocated(st%frozen_rho))
then
778 call lalg_axpy(mesh%np, st%d%nspin,
m_one, st%frozen_rho, total_rho)
786#include "density_inc.F90"
789#include "complex.F90"
790#include "density_inc.F90"
constant times a vector plus a vector
Copies a vector x, to a vector y.
type(accel_kernel_t), target, save, public kernel_density_real
subroutine, public accel_finish()
type(accel_kernel_t), target, save, public kernel_density_spinors
integer, parameter, public accel_mem_read_write
subroutine, public accel_release_buffer(this)
type(accel_kernel_t), target, save, public kernel_density_complex
integer function, public accel_kernel_workgroup_size(kernel)
integer, parameter, public accel_mem_read_only
This module implements batches of mesh functions.
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
This module implements common operations on batches of mesh functions.
This module implements a calculator for the density and defines related functions.
subroutine, public density_calc_end(this, allreduce, symmetrize)
Finalize the density calculation.
subroutine density_calc_state(this, psib, istin)
Calculate contribution to the density from state istin.
subroutine, public density_calc_accumulate(this, psib, async)
Accumulate weighted orbital densities for a batch psib.
subroutine, public states_elec_freeze_adjust_qtot(st)
subroutine, public states_elec_freeze_redistribute_states(st, namespace, mesh, mc, nn)
subroutine, public density_calc_init(this, st, gr, density)
initialize the density calculator
subroutine density_calc_pack(this, async)
prepare the density calculator for GPU use
subroutine, public ddensity_accumulate_grad(space, mesh, st, psib, grad_psib, grad_rho)
Accumulate within a batch.
subroutine, public zdensity_accumulate_grad(space, mesh, st, psib, grad_psib, grad_rho)
Accumulate within a batch.
subroutine, public states_elec_total_density(st, mesh, total_rho)
This routine calculates the total electronic density.
subroutine, public states_elec_freeze_orbitals(st, namespace, space, gr, mc, kpoints, n, family_is_mgga)
Calculate partial density for frozen orbitals.
subroutine, public density_calc(st, gr, density, istin)
Computes the density from the orbitals in st.
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
integer, parameter, public unpolarized
Parameters...
integer, parameter, public spinors
integer, parameter, public spin_polarized
real(real64), parameter, public m_zero
real(real64), parameter, public m_epsilon
real(real64), parameter, public m_one
This module implements the underlying real-space grid.
subroutine, public dgrid_symmetrize_scalar_field(gr, field, suppress_warning)
This module is intended to contain "only mathematical" functions and procedures.
This module defines various routines, operating on mesh functions.
This module defines the meshes, which are used in Octopus.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
This module handles the communicators for the various parallelization strategies.
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
integer, parameter, public smear_fixed_occ
pure logical function, public states_are_complex(st)
pure logical function, public states_are_real(st)
This module handles spin dimensions of the states and the k-point distribution.
integer pure function, public states_elec_block_max(st, ib)
return index of last state in block ib
subroutine, public states_elec_distribute_nodes(st, namespace, mc)
@Brief. Distribute states over the processes for states parallelization
subroutine, public states_elec_calc_quantities(gr, st, kpoints, nlcc, kinetic_energy_density, paramagnetic_current, density_gradient, density_laplacian, gi_kinetic_energy_density, st_end)
calculated selected quantities
subroutine, public states_elec_end(st)
finalize the states_elec_t object
subroutine, public states_elec_deallocate_wfns(st)
Deallocates the KS wavefunctions defined within a states_elec_t structure.
subroutine, public states_elec_allocate_wfns(st, mesh, wfs_type, skip, packed)
Allocates the KS wavefunctions defined within a states_elec_t structure.
subroutine, public states_elec_copy(stout, stin, exclude_wfns, exclude_eigenval, special)
make a (selective) copy of a states_elec_t object
integer pure function, public states_elec_block_min(st, ib)
return index of first state in block ib
type(type_t), parameter, public type_cmplx
type(type_t), parameter, public type_float
Description of the grid, containing information on derivatives, stencil, and symmetries.
Describes mesh distribution to nodes.
Stores all communicators and groups.
The states_elec_t class contains all electronic wave functions.
batches of electronic states