main/doxygen_doc/dm__propagation_8F90_source.html

!! Copyright (C) 2024 - 2025 S. Pal, Z. Nie, U. De Giovannini

!! This program is free software; you can redistribute it and/or modify

!! it under the terms of the GNU General Public License as published by

!! the Free Software Foundation; either version 2, or (at your option)

!! any later version.

!!

!! This program is distributed in the hope that it will be useful,

!! but WITHOUT ANY WARRANTY; without even the implied warranty of

!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

!! GNU General Public License for more details.

!!

!! You should have received a copy of the GNU General Public License

!! along with this program; if not, write to the Free Software

!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

!! 02110-1301, USA.

!!


#include "global.h"


module dm_propagation_oct_m

  use batch_ops_oct_m

  use blas_oct_m

  use debug_oct_m

  use density_oct_m

  use distributed_oct_m

  use eigensolver_oct_m

  use electron_space_oct_m

  use global_oct_m

  use grid_oct_m

  use hamiltonian_elec_oct_m

  use interaction_partner_oct_m

  use ions_oct_m

  use, intrinsic :: iso_fortran_env

  use kpoints_oct_m

  use lalg_adv_oct_m

  use lalg_basic_oct_m

  use mesh_batch_oct_m

  use mesh_function_oct_m

  use messages_oct_m

  use mpi_distribute_oct_m

  use mpi_lib_oct_m

  use mpi_oct_m

  use multicomm_oct_m

  use namespace_oct_m

  use parser_oct_m

  use profiling_oct_m

  use restart_oct_m

  use space_oct_m

  use states_elec_calc_oct_m

  use states_elec_oct_m

  use states_elec_restart_oct_m

  use unit_oct_m

  use unit_system_oct_m

  use v_ks_oct_m

  use varinfo_oct_m


  implicit none


  private

  public ::                             &

    dmp_t,                              &

    dm_propagation_init_run,            &

    dm_propagation_run,                 &

    dm_end_run


  type dmp_t

    integer                             :: calculation_mode

    integer                             :: basis

    logical                             :: unitary_transform

    type(states_elec_t)                 :: adiabatic_st

    integer                             :: strategy

    logical                             :: othn

    type(restart_t)                     :: restart_dump

    ! 2-Times Model

    real(real64)                        :: tmodel(2)

    real(real64), allocatable           :: occ_gs(:, :)

    ! Uniform Decay

    real(real64)                        :: uniform(2)

    ! EPW Data Parsing and Shared Memory Configuration

    character(len=256)                  :: epw_file

    integer                             :: iunit

    integer                             :: istart, iend, wnst

    integer                             :: ia

    integer                             :: na(3)

    real(real64)                        :: astep(3)

    integer(int64)                      :: num

    integer, allocatable                :: kmap(:)

    type(mpi_grp_t)                     :: intranode_grp, internode_grp

#ifdef HAVE_MPI

    type(MPI_Win)                       :: window_trans_rate

#endif

  contains

    procedure :: init => dmp_init

    procedure :: update_trans_rate => dm_propagation_update_trans_rate

  end type dmp_t


  ! Defined separately because the VOLATILE attribute is not allowed for derived-type components.

  real(real32), pointer, volatile :: ave_trans(:)


contains


  subroutine dmp_init(this, namespace, st, space, hm)

    class(dmp_t),             intent(inout) :: this

    type(namespace_t),        intent(in)    :: namespace

    type(states_elec_t),      intent(in)    :: st

    class(space_t),           intent(in)    :: space

    type(hamiltonian_elec_t), intent(in)    :: hm


    type(block_t)       :: blk

    integer             :: ncols, nempty

    real(real64)        :: nempty_percent


    push_sub(dmp_init)


    !%Variable TDDMPropagationBasis

    !%Type integer

    !%Default Adiabatic

    !%Section Time-Dependent

    !%Description

    !% Decides the basis set for the density matrix propagation.

    !%Option Adiabatic 01

    !% Instantaneous eigenstates of the Hamiltonian.

    !%Option Groundstate 02

    !% Eigenstates of the Hamiltonian at t=0.

    !%End

    call parse_variable(namespace, 'TDDMPropagationBasis', option__tddmpropagationbasis__adiabatic, this%basis)

    if (.not. varinfo_valid_option('TDDMPropagationBasis', this%basis)) then

      call messages_input_error(namespace, 'TDDMPropagationBasis')

    endif

    call messages_print_var_option('TDDMPropagationBasis', this%basis, namespace=namespace)


    !%Variable TDDMOrthogonal

    !%Type logical

    !%Default no

    !%Section Time-Dependent

    !%Description

    !% Use a fully orthonormalized basis when constructing and

    !% damping the density matrix.

    !%End

    call parse_variable(namespace, 'TDDMOrthogonal', .false., this%othn)

    call messages_print_var_value('TDDMOrthogonal', this%othn, namespace=namespace)


    !%Variable TDDMUnitaryTransformFix

    !%Type logical

    !%Default yes

    !%Section Time-Dependent

    !%Description

    !% Applies an additional unitary transformation to the damped wavefunctions

    !% to maximize their overlap with the reference wavefunctions after damping.

    !%End

    call parse_variable(namespace, 'TDDMUnitaryTransformFix', .true., this%unitary_transform)

    call messages_print_var_value('TDDMUnitaryTransformFix', this%unitary_transform, namespace=namespace)


    this%strategy = -1


    !%Variable TDDMPropagation_uniform_decay

    !%Type block

    !%Section Time-Dependent

    !%Description

    !% The intra-k transition rates between any pair of states are taken to be constant.

    !% The population dynamics are therefore determined solely by the state occupations

    !% and the bath temperature, following Fermi’s golden rule.

    !% The first column of the block specifies the characteristic lifetime, and the

    !% second column gives the bath temperature (in Kelvin).

    !% <tt>%TDDMPropagation_uniform_decay

    !% <br>&nbsp;&nbsp; Time | Temp

    !% <br>%</tt>

    !%End

    if (parse_block(namespace, 'TDDMPropagation_uniform_decay', blk) == 0) then

      if (this%strategy /= -1) then

        message(1) = "Multiple dissipation strategies are not allowed."

        call messages_fatal(1, namespace=namespace)

      end if

      this%strategy = 1

      ncols = parse_block_cols(blk, 0)

      if (ncols == 2) then

        call parse_block_float(blk, 0, 0, this%uniform(1), units_inp%time)

        call parse_block_float(blk, 0, 1, this%uniform(2))

        call parse_block_end(blk)


        message(1) = "Info: TDDMPropagation uniform decay approximation:"

        write(message(2),'(a, f0.2, 1x, 2a, F0.2, 1x, 2a)') '      [lifetime, temperature] = [', &

          this%uniform(1), trim(units_abbrev(units_out%time)), ', ', this%uniform(2), trim(units_abbrev(unit_kelvin)), ']'

        call messages_info(2, namespace=namespace)

      else

        message(1) = "Input: TDDMPropagation_uniform_decay block must have 2 columns."

        call messages_fatal(1, namespace=namespace)

      end if

    end if


    !%Variable TDDMPropagation_2Times

    !%Type block

    !%Section Time-Dependent

    !%Description

    !% Two times approximation for the jump operator in the master equation.

    !% S. A. Sato <i>et al.</i>, <i>Phys. Rev. B</i> 99, 214302 (2019).

    !%

    !% <tt>%TDDMPropagation_2Times

    !% <br>&nbsp;&nbsp; t1 | t2

    !% <br>%</tt>

    !%End

    if (parse_block(namespace, 'TDDMPropagation_2Times', blk) == 0) then

      if (this%strategy /= -1) then

        message(1) = "Multiple dissipation strategies are not allowed."

        call messages_fatal(1, namespace=namespace)

      end if

      this%strategy = 2

      ncols = parse_block_cols(blk, 0)

      if (ncols == 2) then

        call parse_block_float(blk, 0, 0, this%tmodel(1), units_inp%time)

        call parse_block_float(blk, 0, 1, this%tmodel(2), units_inp%time)

        call parse_block_end(blk)


        message(1) = "Info: TDDMPropagation 2-times approximation:"

        write(message(2),'(a, f12.6, a, f12.6, a,a)') &

          '      [t1, t2] = [', this%tmodel(1) , ', ', this%tmodel(2), '] ', &

          trim(units_abbrev(units_out%time))

        call messages_info(2, namespace=namespace)

      else

        message(1) = "Input: TDDMPropagation_2Times block must have 2 columns."

        call messages_fatal(1, namespace=namespace)

      end if

      ! for two times model, only orthonormalized basis is allowed

      if (.not. this%othn) then

        this%othn = .true.

        message(1) = "Overriding input: TDDMOrthogonal set to yes for TDDMPropagation_2Times."

        call messages_warning(1, namespace=namespace)

      end if

    end if


    !%Variable TDDMPropagation_from_epw

    !%Type string

    !%Default "-"

    !%Section Time-Dependent

    !%Description

    !% Specifies the transition-rate file generated by EPW. Once loaded, the

    !% simulation includes all intra-k and inter-k scattering processes, with

    !% the electron dynamics governed by Fermi’s Golden Rule. This option

    !% requires EPWBandLowest to be set.

    !%End

    call parse_variable(namespace, 'TDDMPropagation_from_epw', '-', this%epw_file)

    if (trim(this%epw_file) /= '-') then

      if (bitand(hm%kpoints%method, kpoints_monkh_pack) == 0) then

        write(message(1),'(a)') 'Only Monkhorst-Pack k-point meshes are supported in the EPW-DMPropagation strategy.'

        call messages_fatal(1, namespace = namespace)

      end if

      if (hm%kpoints%reduced%nshifts /= 1) then

        write(message(1),'(a)') 'Multiple Monkhorst-Pack shifts are not supported in the EPW-DMPropagation strategy.'

        call messages_fatal(1, namespace = namespace)

      end if

      if (space%dim /= 3) then

        write(message(1),'(a)') 'Only 3D systems are supported in the EPW-DMPropagation strategy.'

        call messages_fatal(1, namespace = namespace)

      end if


      if (this%strategy /= -1) then

        message(1) = "Multiple dissipation strategies are not allowed."

        call messages_fatal(1, namespace=namespace)

      end if

      this%strategy = 3

      write(message(1),'(a, a)') "Info: TDDMPropagation transition rates from file: ", trim(this%epw_file)

      call messages_info(1, namespace=namespace)

    end if


    !%Variable EPWBandLowest

    !%Type integer

    !%Default -1

    !%Section Time-Dependent

    !%Description

    !% The starting DFT band index used for damping.

    !%

    !% Since EPW transition rates are calculated for a subset of DFT bands,

    !% this variable tells the code which global DFT band corresponds

    !% to damping band #1.

    !%

    !% Example: If you damp DFT bands 10 through 15, set EPWBandLowest = 10.

    !%End

    if (this%strategy == 3) then

      call parse_variable(namespace, 'EPWBandLowest', 0, this%istart)

      if (.NOT. (this%istart > 0)) then

        write(message(1), '(a)') 'EPWBandLowest must be specified when TDDMPropagation_from_epw is enabled.'

        call messages_fatal(1, namespace=namespace)

      end if

      call messages_print_var_value('EPWBandLowest', this%istart, namespace=namespace)

    end if


    ! Sanity checks

    if (this%calculation_mode == option__tddmpropagation__collision_integral .and. this%strategy /= 3) then

      message(1) = "Warning: TDDMPropagation_from_epw is required for collision integral."

      message(2) = "         Add TDDMPropagation_from_epw and rerun."

      call messages_fatal(2, namespace=namespace)

    end if


    call parse_variable(namespace, 'ExtraStates', 0, nempty)

    call parse_variable(namespace, 'ExtraStatesInPercent', m_zero, nempty_percent)

    if (nempty == 0 .and. nempty_percent < m_epsilon) then

      message(1) = "Warning: TDDMPropagation requires a number of empty states."

      message(2) = "         Add ExtraStates and rerun."

      call messages_fatal(2, namespace=namespace)

    end if


    if (st%parallel_in_states) then

      message(1) = "Warning: TDDMPropagation does not support parallel states."

      message(2) = "         Remove ParallelStates and rerun."

      call messages_fatal(2, namespace=namespace)

    end if


    if (st%d%ispin == spin_polarized .and. this%strategy == 3) then

      call messages_not_implemented('Spin-polarized TDDMPropagation with EPW', namespace=namespace)

    end if


    pop_sub(dmp_init)


  end subroutine dmp_init


  subroutine dm_propagation_init_run(dmp, namespace, space, gr, ions, st, hm, mc, from_scratch)

    type(dmp_t),              intent(inout)   :: dmp

    type(namespace_t),        intent(in)      :: namespace

    type(electron_space_t),   intent(in)      :: space

    type(grid_t),             intent(in)      :: gr

    type(ions_t),             intent(in)      :: ions

    type(states_elec_t),      intent(in)      :: st

    type(hamiltonian_elec_t), intent(in)      :: hm

    type(multicomm_t),        intent(in)      :: mc

    logical,                  intent(in)      :: from_scratch


    integer               :: ierr

    type(restart_t)       :: restart_load


    push_sub(dm_propagation_init_run)


    ! By default, wavefunctions are copied. In principle, exclude_eigenval should be true.

    ! But we need its allocation

    call states_elec_copy(dmp%adiabatic_st, st, special=.true.)


    if (from_scratch .or. dmp%basis == option__tddmpropagationbasis__groundstate) then

      call restart_load%init(namespace, restart_gs, restart_type_load, mc, ierr, mesh=gr, exact=.true.)

    else

      call restart_load%init(namespace, restart_dm, restart_type_load, mc, ierr, mesh=gr, exact=.true.)

    end if


    if (ierr == 0) then

      call states_elec_load(restart_load, namespace, space, dmp%adiabatic_st, gr, hm%kpoints, fixed_occ=st%restart_fixed_occ, &

        ierr=ierr, label = ": DM-basis")

    else

      message(1) = 'Unable to read DM-basis wavefunctions.'

      call messages_fatal(1, namespace=namespace)

    end if


    call restart_load%end()


    ! TDDMPropagation_2Times

    if (dmp%strategy == 2) then

      safe_allocate(dmp%occ_gs(1:st%nst, 1:st%nik))

      dmp%occ_gs = dmp%adiabatic_st%occ

    end if


    ! dmp%ave_trans: replicated on each node, shared within the node

    if (dmp%strategy == 3) call iopar_open_trans_rate(namespace, ions, hm, st%system_grp, dmp)


    ! record only adiabatic states

    if (dmp%basis == option__tddmpropagationbasis__adiabatic) then

      call dmp%restart_dump%init(namespace, restart_dm, restart_type_dump, mc, ierr, mesh=gr)

    end if


    pop_sub(dm_propagation_init_run)

  end subroutine dm_propagation_init_run


  subroutine dm_end_run(system_grp, dmp)

    type(mpi_grp_t),  intent(in)     :: system_grp

    type(dmp_t),      intent(inout)  :: dmp


    push_sub(dm_end_run)


    if (dmp%basis == option__tddmpropagationbasis__adiabatic) then

      call dmp%restart_dump%end()

    end if


    call states_elec_end(dmp%adiabatic_st)


    if (dmp%strategy == 3) call iopar_close_trans_rate(system_grp, dmp)


    safe_deallocate_a(dmp%occ_gs)


    pop_sub(dm_end_run)

  end subroutine dm_end_run


  subroutine dm_propagation_run(dmp, namespace, space, gr, ions, st, mc, hm, ks, iter, dt, ext_partners, update_energy)

    type(dmp_t),              intent(inout)   :: dmp

    type(namespace_t),        intent(in)      :: namespace

    type(electron_space_t),   intent(in)      :: space

    type(grid_t),             intent(in)      :: gr

    type(ions_t),             intent(in)      :: ions

    type(states_elec_t),      intent(inout)   :: st

    type(multicomm_t),        intent(in)      :: mc

    type(hamiltonian_elec_t), intent(inout)   :: hm

    type(v_ks_t),             intent(inout)   :: ks

    integer,                  intent(in)      :: iter

    real(real64),             intent(in)      :: dt

    type(partner_list_t),     intent(in)      :: ext_partners

    logical,         optional,intent(in)      :: update_energy


    real(real64), parameter       :: ZERO = 1.0e-15_real64

    type(eigensolver_t)           :: eigens

    real(real64)                  :: nrm2_tdks(st%nst, st%nik), population(3), leak

    integer                       :: ik

    logical                       :: update_energy_

    complex(real64), allocatable  :: rho_mat_k(:, :, :)

    type(states_elec_t)           :: resd_st

    complex(real64), allocatable  :: overlap_ad_ks(:, :, :), overlap_resd_ks(:, :, :)

    integer, allocatable          :: nresd_k(:)

    !*

    push_sub_with_profile(dm_propagation_run)


    ! Update the hamiltonian. hm is updated after each td propagation.

    ! But ions may move afterthen

    call hm%update(gr, namespace, space, ext_partners, time=iter * dt)

    if (dmp%basis == option__tddmpropagationbasis__adiabatic) then

      call eigensolver_init(eigens, namespace, gr, dmp%adiabatic_st, hm, mc, space)

      eigens%converged = 0

      call eigens%run(namespace, gr, dmp%adiabatic_st, hm, space, ext_partners, iter)

    end if


    safe_allocate(overlap_ad_ks(1:st%nst, 1:st%nst, st%d%kpt%start:st%d%kpt%end))

    safe_allocate(overlap_resd_ks(1:st%nst, 1:st%nst, st%d%kpt%start:st%d%kpt%end))

    safe_allocate(rho_mat_k(2*st%nst, 2*st%nst, st%d%kpt%start:st%d%kpt%end))

    safe_allocate(nresd_k(st%d%kpt%start:st%d%kpt%end))


    population = 0.0_real64


    ! Create a copy of the TDKS states to initialise the residuals

    call states_elec_copy(resd_st, st, exclude_eigenval = .true.)


    ! Store all density matrices for potential future communication

    do ik = st%d%kpt%start, st%d%kpt%end

      ! pop = \sum_{i,k} f_{i,k} <\psi_{i,k} | \psi_{i,k}>

      call total_population(ik, st, gr, nrm2_tdks(:, ik), population(1))

      ! S^{\phi,\psi}_{ij} = <\phi_i | \psi_j>. one more conjugation is required for this function

      call zstates_elec_calc_projections(st, dmp%adiabatic_st, namespace, gr, ik, overlap_ad_ks(:, :, ik))

      overlap_ad_ks(:, :, ik) = conjg(overlap_ad_ks(:, :, ik))

      ! population before damping, set occupation as well

      call population_in_adiabatic(ik, dmp%adiabatic_st, st, overlap_ad_ks(:, :, ik), population(2))


      ! residual |d_j> = |\psi_j> - \sum_i S^{\phi,\psi}_{ij} |\phi_i>

      ! S^{d,\psi}_{ij} = <d_i | \psi_j>

      call construct_residuals(gr, namespace, dmp%adiabatic_st, st, ik, dmp%othn, overlap_ad_ks(:, :, ik), &

        nrm2_tdks(:, ik), nresd_k(ik), overlap_resd_ks(:, :, ik), resd_st)


      ! Active block: (nresd+nst, nresd+nst, :), allocated shape: (2*nst, 2*nst, :)

      call construct_density_matrix(nresd_k(ik), ik, st, overlap_ad_ks(:, :, ik), overlap_resd_ks(:, :, ik), rho_mat_k(:, :, ik))

    end do


    ! broadcast the instanteous occupation for Pauli blocking

    call broadcast_occupation(dmp%adiabatic_st%occ, dmp%adiabatic_st%d%kpt, dmp%adiabatic_st%nst, st%parallel_in_states)


    ! dissipate the density matrix

    call dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)


    do ik = st%d%kpt%start, st%d%kpt%end

      ! reconstruct wavefunctions. Active block: (nresd+nst, nresd+nst, :), allocated shape: (2*nst, 2*nst, :)

      call update_st(dmp, ik, gr, namespace, nresd_k(ik), overlap_ad_ks(:, :, ik), overlap_resd_ks(:, :, ik), nrm2_tdks(:, ik), &

        resd_st, st, rho_mat_k(:, :, ik), population(3))

    end do


    call states_elec_end(resd_st)


    safe_deallocate_a(overlap_ad_ks)

    safe_deallocate_a(overlap_resd_ks)


    call broadcast_occupation(st%occ, st%d%kpt, st%nst, st%parallel_in_states)


    safe_deallocate_a(rho_mat_k)

    safe_deallocate_a(nresd_k)


    call st%d%kpt%mpi_grp%allreduce_inplace(population, 3, mpi_double_precision, mpi_sum)


    write(message(1), '(a,E21.14,a,E21.14,a,E21.14,a)') 'DMPopulation: ', population(1), &

      ' (', population(2), ' in basis ) before damping; ', population(3), ' after damping'

    call messages_info(1)


    leak = population(1) - population(3)

    if (abs(leak) > zero) then

      write(message(1), '(a,E15.8,a,I8)') 'Leakage: ', leak, '(after damping) at time step', iter

      call messages_info(1)

    end if

    dmp%adiabatic_st%qtot = population(1)


    ! renew hamiltonian

    call density_calc(st, gr, st%rho)

    update_energy_ = optional_default(update_energy, .true.)

    call v_ks_calc(ks, namespace, space, hm, st, ions, ext_partners, calc_eigenval = update_energy_, &

      time = abs(iter * dt), calc_energy = update_energy_)


    if (dmp%basis == option__tddmpropagationbasis__adiabatic) then

      call eigensolver_end(eigens)

    end if


    pop_sub_with_profile(dm_propagation_run)


  end subroutine dm_propagation_run


  subroutine total_population(ik, st, gr, nrm2, pop)

    integer,              intent(in)      :: ik

    type(states_elec_t),  intent(in)      :: st

    type(grid_t),         intent(in)      :: gr

    real(real64),         intent(out)     :: nrm2(:)

    real(real64),         intent(inout)   :: pop


    integer                           :: ib, i_minst, i_maxst


    push_sub(total_population)


    assert(pop >= 0.0_real64)

    assert(size(nrm2) == st%nst)


    do ib = st%group%block_start, st%group%block_end

      i_minst = states_elec_block_min(st, ib)

      i_maxst = states_elec_block_max(st, ib)

      call mesh_batch_nrm2(gr, st%group%psib(ib, ik), nrm2(i_minst:i_maxst), reduce = .false.)

    end do

    nrm2(1:st%nst) = nrm2(1:st%nst)**2

    if (gr%parallel_in_domains) then

      call gr%allreduce(nrm2, dim = st%nst)

    end if


    pop = pop + sum(st%occ(1:st%nst, ik)* nrm2(1:st%nst)) * st%kweights(ik)


    pop_sub(total_population)


  end subroutine total_population


  subroutine population_in_adiabatic(ik, ad_st, st, overlap, pop)

    integer,             intent(in)      :: ik

    type(states_elec_t), intent(inout)   :: ad_st

    type(states_elec_t), intent(in)      :: st

    complex(real64),     intent(in)      :: overlap(:, :)

    real(real64),        intent(inout)   :: pop


    integer                       :: ist, jst


    push_sub(population_in_adiabatic)


    assert(ubound(overlap, dim = 1) == ad_st%nst)

    assert(ubound(overlap, dim = 2) == st%nst)


    ! Assign ad_st occupation and store in dm_proj_basis (also used for hopping rate calculation)

    ad_st%occ(1:ad_st%nst, ik) = 0.0_real64

    do jst = 1, st%nst

      do ist = 1, ad_st%nst

        ad_st%occ(ist, ik) = ad_st%occ(ist, ik) + &

          (real(overlap(ist, jst))**2 + aimag(overlap(ist, jst))**2) * st%occ(jst, ik)

      end do

    end do


    pop = pop + sum(ad_st%occ(:, ik)) * ad_st%kweights(ik)


    pop_sub(population_in_adiabatic)


  end subroutine population_in_adiabatic


  subroutine construct_residuals(gr, namespace, ad_st, st, ik, othn, overlap_ad_ks, nrm2_tdks, nresd, overlap_resd_ks, resd)

    type(grid_t),        intent(in)     :: gr

    type(namespace_t),   intent(in)     :: namespace

    type(states_elec_t), intent(in)     :: ad_st

    type(states_elec_t), intent(in)     :: st

    integer,             intent(in)     :: ik

    logical,             intent(in)     :: othn

    complex(real64),     intent(in)     :: overlap_ad_ks(:, :)

    real(real64),        intent(in)     :: nrm2_tdks(:)

    integer,             intent(out)    :: nresd

    complex(real64),     intent(out)    :: overlap_resd_ks(:, :)

    type(states_elec_t), intent(inout)  :: resd


    integer                           :: ib, j, i_minst, i_maxst

    complex(real64), allocatable      :: d_j(:, :), ss(:)

    real(real64), parameter           :: small_rho = 1.0e-14_real64

    real(real64), parameter           :: small_resd = 1.0e-7_real64

    real(real64)                      :: nrm_dj, nrm2_dj


    push_sub_with_profile(construct_residuals)


    assert(ubound(overlap_ad_ks, dim = 1) == ad_st%nst)

    assert(ubound(overlap_ad_ks, dim = 2) == st%nst)

    assert(ubound(overlap_resd_ks, dim = 1) == resd%nst)

    assert(ubound(overlap_resd_ks, dim = 2) == st%nst)


    safe_allocate(d_j(1:gr%np, st%d%dim))


    if (.not. othn) then


      ! Iterate over TDKS batches, construct the residuals

      do j = 1, st%nst

        call states_elec_get_state(st, gr, j, ik, d_j)

        ! Compute |\psi_j > - sum_i S^{\phi,\psi}_{ij} |\phi_i >

        do ib = ad_st%group%block_start, ad_st%group%block_end

          i_minst = states_elec_block_min(ad_st, ib)

          i_maxst = states_elec_block_max(ad_st, ib)

          call zbatch_axpy_function(gr%np, -overlap_ad_ks(i_minst:i_maxst, j), &

            ad_st%group%psib(ib, ik), d_j)

        enddo

        ! reset the residuals

        call states_elec_set_state(resd, gr, j, ik, d_j)

      enddo


      ! Overlap S^{d,\psi} = < d | \psi>

      nresd = st%nst

      call zstates_elec_calc_projections(st, resd, namespace, gr, ik, overlap_resd_ks)

      overlap_resd_ks = conjg(overlap_resd_ks)


    else


      ! Construct the orthonormalized residuals

      safe_allocate(ss(1:st%nst))


      nresd = 0

      ! Iterate over TDKS states, construct the linearly-independent terms as residuals

      do j = 1, resd%nst

        ! nrm2_dj = nrm2_tdks - \sum_i |S^{\phi,\psi}_{ij}|^2

        nrm2_dj = nrm2_tdks(j) - sum(real(overlap_ad_ks(:, j))**2) - sum(aimag(overlap_ad_ks(:, j))**2)

        if (nrm2_dj > small_rho) then

          ! scale the TDKS wavefunction to make the residual terms approximately normalized

          call states_elec_get_state(st, gr, j, ik, d_j)

          nrm_dj = sqrt(nrm2_dj)

          call lalg_scal(gr%np, resd%d%dim, m_one / nrm_dj, d_j)


          ! substract the overlap with ad states

          ! Compute 1/|d_j| |\psi_j > - 1/|d_j| sum_i S^{\phi,\psi}_{ij} |\phi_i >

          do ib = ad_st%group%block_start, ad_st%group%block_end

            i_minst = states_elec_block_min(ad_st, ib)

            i_maxst = states_elec_block_max(ad_st, ib)

            call zbatch_axpy_function(gr%np, -overlap_ad_ks(i_minst:i_maxst, j) / nrm_dj, &

              ad_st%group%psib(ib, ik), d_j)

          enddo

          call zstates_elec_orthogonalize_single_batch(ad_st, gr, ad_st%nst, ik, d_j)

          ! substract the overlap with previous residuals

          if (nresd > 0) then

            call zstates_elec_orthogonalize_single_batch(resd, gr, nresd, ik, d_j)

          end if

          ! screen the small terms

          nrm_dj = zmf_nrm2(gr, resd%d%dim, d_j, reduce=.true.)

          if (nrm_dj > small_resd) then

            call lalg_scal(gr%np, resd%d%dim, m_one / nrm_dj, d_j)

            nresd = nresd + 1

            call states_elec_set_state(resd, gr, nresd, ik, d_j)

          else

            cycle

          end if

          ! Overlap matrix S^{d,\psi}_{ji} = < d_j | \psi_i >

          do ib = st%group%block_start, st%group%block_end

            i_minst = states_elec_block_min(st, ib)

            i_maxst = states_elec_block_max(st, ib)

            call zmesh_batch_mf_dotp(gr, st%group%psib(ib, ik), d_j, ss(i_minst:i_maxst), reduce = .false., nst = i_maxst-i_minst+1)

          end do


          overlap_resd_ks(nresd, 1:st%nst) = conjg(ss(1:st%nst))

        end if

      enddo


      overlap_resd_ks(nresd+1:resd%nst, :) = m_z0

      if (gr%parallel_in_domains) then

        call gr%allreduce(overlap_resd_ks)

      end if


      safe_deallocate_a(ss)


    end if


    safe_deallocate_a(d_j)


    pop_sub_with_profile(construct_residuals)


  end subroutine construct_residuals


  subroutine construct_density_matrix(nresd, ik, st, overlap_ad_ks, overlap_resd_ks, rho_mat)

    integer,                intent(in)     :: nresd

    integer,                intent(in)     :: ik

    type(states_elec_t),    intent(in)     :: st

    complex(real64),        intent(in)     :: overlap_ad_ks(:, :)

    complex(real64),        intent(in)     :: overlap_resd_ks(:, :)

    complex(real64),        intent(out)    :: rho_mat(:, :)


    integer                           :: ist, jst, lst

    real(real64)                      :: sqrt_f

    complex(real64), allocatable      :: s_ad_scaled(:, :), s_resd_scaled(:, :)


    push_sub(construct_density_matrix)


    assert(ubound(overlap_ad_ks, dim = 1) == st%nst)

    assert(ubound(overlap_ad_ks, dim = 2) == st%nst)

    assert(ubound(overlap_resd_ks, dim = 1) == st%nst)

    assert(ubound(overlap_resd_ks, dim = 2) == st%nst)

    assert(ubound(rho_mat, dim = 1) == 2*st%nst)

    assert(ubound(rho_mat, dim = 2) == 2*st%nst)


    safe_allocate(s_ad_scaled(st%nst, st%nst))

    if (nresd > 0) then

      safe_allocate(s_resd_scaled(nresd, st%nst))

    end if


    ! set those beyond (nst+nresd) elements to zero

    rho_mat = 0.0_real64


    ! scale S matrix

    do lst = 1, st%nst

      sqrt_f = sqrt(st%occ(lst, ik))

      s_ad_scaled(1:st%nst, lst) = overlap_ad_ks(1:st%nst, lst) * sqrt_f

      if (nresd > 0) then

        s_resd_scaled(1:nresd, lst) = overlap_resd_ks(1:nresd, lst) * sqrt_f

      end if

    end do


    ! ad-ad

    call blas_herk('U', 'N', st%nst, st%nst, 1.0_real64, s_ad_scaled(1,1), st%nst, 0.0_real64, rho_mat(1, 1), 2*st%nst)

    if (nresd > 0) then

      ! resd-resd

      call blas_herk('U', 'N', nresd, st%nst, 1.0_real64, s_resd_scaled(1,1), nresd, 0.0_real64, &

        rho_mat(st%nst + 1, st%nst + 1), 2*st%nst)

      ! ad-resd

      call blas_gemm('N', 'C', st%nst, nresd, st%nst, m_z1, s_ad_scaled(1,1), st%nst, s_resd_scaled(1,1), &

        nresd, m_z0, rho_mat(1, st%nst + 1), 2*st%nst)

    end if


    !$omp parallel do private(jst, ist)

    do jst = 1, st%nst

      ! mirror ad-ad lower triangle

      do ist = 1, jst - 1

        rho_mat(jst, ist) = conjg(rho_mat(ist, jst))

      end do


      ! mirror ad-resd to resd-ad (bottom-left block)

      do ist = 1, nresd

        rho_mat(ist + st%nst, jst) = conjg(rho_mat(jst, ist + st%nst))

      end do

    end do

    !$omp end parallel do


    !$omp parallel do private(jst, ist)

    do jst = 1, nresd

      ! mirror resd-resd lower triangle

      do ist = 1, jst - 1

        rho_mat(jst + st%nst, ist + st%nst) = conjg(rho_mat(ist + st%nst, jst + st%nst))

      end do

    end do

    !$omp end parallel do


    safe_deallocate_a(s_ad_scaled)

    if (nresd > 0) then

      safe_deallocate_a(s_resd_scaled)

    end if


    pop_sub(construct_density_matrix)

  end subroutine construct_density_matrix


  subroutine broadcast_occupation(occ, kpt, nst, parstate)

    real(real64),             intent(inout)   :: occ(:, :)

    type(distributed_t),      intent(in)      :: kpt

    integer,                  intent(in)      :: nst

    logical,                  intent(in)      :: parstate


    integer                       :: incount

    integer, allocatable          :: rdispls(:), recvcnts(:)

    real(real64), allocatable     :: sendbuffer(:, :)


    push_sub_with_profile(broadcast_occupation)


    assert(ubound(occ, dim = 1) == nst)

    assert(ubound(occ, dim = 2) == kpt%nglobal)

    assert(.not. parstate)


    if (kpt%parallel) then

      safe_allocate(recvcnts(1:kpt%mpi_grp%size))

      safe_allocate(rdispls(1:kpt%mpi_grp%size))

      safe_allocate(sendbuffer(1:nst, kpt%nlocal))


      incount = nst * kpt%nlocal

      recvcnts(:) = nst * kpt%num(:)

      sendbuffer(1:nst, 1:kpt%nlocal) = occ(:, kpt%start:kpt%end)


      call mpi_displacements(recvcnts, rdispls)

      ! send buffer and receive buffer can not be the same address

      call kpt%mpi_grp%allgatherv(sendbuffer, incount, mpi_double_precision, &

        occ, recvcnts, rdispls, mpi_double_precision)


      safe_deallocate_a(sendbuffer)

      safe_deallocate_a(recvcnts)

      safe_deallocate_a(rdispls)

    end if


    pop_sub_with_profile(broadcast_occupation)


  end subroutine broadcast_occupation


  subroutine dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)

    type(hamiltonian_elec_t), intent(in)     :: hm

    type(states_elec_t),      intent(in)     :: st

    type(namespace_t),        intent(in)     :: namespace

    integer,                  intent(in)     :: nresd_k(:)

    real(real64),             intent(in)     :: dt

    type(dmp_t),              intent(inout)  :: dmp

    complex(real64),          intent(inout)  :: rho_mat_k(:, :, :)


    push_sub_with_profile(dissipation)


    assert(ubound(nresd_k, dim = 1) == dmp%adiabatic_st%d%kpt%nlocal)

    assert(ubound(rho_mat_k, dim = 1) == 2*dmp%adiabatic_st%nst)

    assert(ubound(rho_mat_k, dim = 2) == 2*dmp%adiabatic_st%nst)

    assert(ubound(rho_mat_k, dim = 3) == dmp%adiabatic_st%d%kpt%nlocal)


    if (dmp%calculation_mode == option__tddmpropagation__master_equation) then

      select case (dmp%strategy)

      case (1)

        call lindblad_uniform(dmp, st%d%kpt, nresd_k, dt, rho_mat_k)

      case (2)

        call lindblad_2times(dmp, st%d%kpt, nresd_k, dt, rho_mat_k)

      case (3)

        call lindblad_from_epw(dmp, hm, st%d%kpt, st%system_grp, namespace, nresd_k, dt, rho_mat_k)

      end select

    else if (dmp%calculation_mode == option__tddmpropagation__collision_integral) then

      call collision_from_epw(dmp, hm, st%d%kpt, st%system_grp, namespace, nresd_k, dt, rho_mat_k)

    end if


    pop_sub_with_profile(dissipation)


  end subroutine dissipation


  subroutine lindblad_uniform(dmp, kpt, nresd_k, dt, rho_mat_k)

    type(dmp_t),         intent(in)     :: dmp

    type(distributed_t), intent(in)     :: kpt

    integer,             intent(in)     :: nresd_k(:)

    real(real64),        intent(in)     :: dt

    complex(real64),     intent(inout)  :: rho_mat_k(:, :, :)


    real(real64)                  :: coeff

    real(real64), allocatable     :: rtrans(:, :)

    complex(real64), allocatable  :: rho_in(:, :), rho_out(:, :), rho_res(:, :), rho_tmp(:, :)

    integer                       :: iorder, nst, ik, ik_, nresd

    integer, parameter            :: norder = 4


    push_sub_with_profile(lindblad_uniform)


    nst = dmp%adiabatic_st%nst


    ! lindblad operate on the density matrix

    safe_allocate(rtrans(1:nst, 1:nst))

    safe_allocate(rho_in(1:2*nst, 1:2*nst))

    safe_allocate(rho_out(1:2*nst, 1:2*nst))

    safe_allocate(rho_res(1:2*nst, 1:2*nst))


    do ik = kpt%start, kpt%end

      ik_ = ik - kpt%start + 1

      nresd = nresd_k(ik_)


      call transition_rate_uniform(dmp%uniform, dmp%adiabatic_st, ik, rtrans)


      rho_res = 0.0_real64

      rho_in(1:2*nst, 1:2*nst) = rho_mat_k(1:2*nst, 1:2*nst, ik_)


      coeff = m_one

      do iorder = 1, norder-1


        call lindblad_operator_uniform(nst, nresd, rtrans, rho_in, rho_out)


        coeff = coeff * dt / iorder

        rho_res = rho_res + coeff * rho_out

        ! swap the pointer between rho_in and rho_out, so new rho_in is current rho_out,

        ! and new rho_out is the prior input that will get overwritten

        call move_alloc(rho_in, rho_tmp)

        call move_alloc(rho_out, rho_in)

        call move_alloc(rho_tmp, rho_out)

      end do

      ! iorder == norder

      call lindblad_operator_uniform(nst, nresd, rtrans, rho_in, rho_out)

      coeff = coeff * dt / norder

      rho_res = rho_res + coeff * rho_out


      rho_mat_k(1:2*nst, 1:2*nst, ik_) = rho_mat_k(1:2*nst, 1:2*nst, ik_) + rho_res

    end do


    safe_deallocate_a(rho_in)

    safe_deallocate_a(rho_out)

    safe_deallocate_a(rtrans)

    safe_deallocate_a(rho_res)


    pop_sub_with_profile(lindblad_uniform)


  end subroutine lindblad_uniform


  subroutine transition_rate_uniform(uniform, ad_st, ik, rtrans)

    real(real64),        intent(in)     :: uniform(:)

    type(states_elec_t), intent(in)     :: ad_st

    integer,             intent(in)     :: ik

    real(real64),        intent(out)    :: rtrans(:, :)


    integer                       :: ist, jst, nst

    real(real64)                  :: rate_character, omega, inv_omega, nph, delta_e

    real(real64), parameter       :: small_e = 0.002_real64/(m_two*p_ry), large_e = 0.5_real64/(m_two*p_ry)

    real(real64)                  :: unocc_ist, unocc_jst


    push_sub(transition_rate_uniform)


    nst = ad_st%nst

    assert(ubound(rtrans, dim = 1) == nst)

    assert(ubound(rtrans, dim = 2) == nst)


    rate_character = 1 / uniform(1)

    omega = units_to_atomic(unit_kelvin, uniform(2))

    inv_omega = 1.0_real64 / max(omega, 1.0e-12_real64)


    rtrans = m_zero

    ! rtrans(ist, jst) is the transition rate from jst to ist

    do ist = 1, nst

      unocc_ist = 1.0_real64 - ad_st%occ(ist, ik) / ad_st%smear%el_per_state


      do jst = ist + 1, nst

        delta_e = ad_st%eigenval(jst, ik) - ad_st%eigenval(ist, ik)

        if (delta_e < small_e) cycle


        if (delta_e < large_e) then

          nph = 1.0_real64/(exp(delta_e*inv_omega) - 1.0_real64)

        else

          nph = m_zero

        end if

        unocc_jst = 1.0_real64 - ad_st%occ(jst, ik) / ad_st%smear%el_per_state


        rtrans(ist, jst) = merge(rate_character * unocc_ist * (nph + 1), m_zero, unocc_ist > m_zero)

        rtrans(jst, ist) = merge(rate_character * unocc_jst * nph, m_zero, unocc_jst > m_zero)

      end do

    end do


    pop_sub(transition_rate_uniform)


  end subroutine transition_rate_uniform


  subroutine lindblad_operator_uniform(nst, nresd, rtrans, den_mat, l_mat)

    integer,             intent(in)        :: nst

    integer,             intent(in)        :: nresd

    real(real64),        intent(in)        :: rtrans(:, :)

    complex(real64),     intent(in)        :: den_mat(:, :)

    complex(real64),     intent(out)       :: l_mat(:, :)


    integer                     :: ist, jst, lst


    push_sub_with_profile(lindblad_operator_uniform)


    assert(ubound(rtrans, dim = 1) == nst)

    assert(ubound(rtrans, dim = 2) == nst)

    assert(ubound(den_mat, dim = 1) == 2*nst)

    assert(ubound(den_mat, dim = 2) == 2*nst)

    assert(ubound(l_mat, dim = 1) == 2*nst)

    assert(ubound(l_mat, dim = 2) == 2*nst)


    l_mat = 0.0_real64


    do ist = 1, nst

      do jst = 1, nst

        if (ist == jst) cycle

        do lst = 1, nst + nresd

          l_mat(ist, lst) = l_mat(ist, lst) - m_half * rtrans(jst, ist) * den_mat(ist, lst)

          l_mat(lst, ist) = l_mat(lst, ist) - m_half * rtrans(jst, ist) * den_mat(lst, ist)

        end do

        l_mat(jst, jst) = l_mat(jst, jst) + rtrans(jst, ist) * den_mat(ist, ist)

      end do

    end do


    pop_sub_with_profile(lindblad_operator_uniform)


  end subroutine lindblad_operator_uniform


  subroutine lindblad_2times(dmp, kpt, nresd_k, dt, rho_mat_k)

    type(dmp_t),         intent(in)     :: dmp

    type(distributed_t), intent(in)     :: kpt

    integer,             intent(in)     :: nresd_k(:)

    real(real64),        intent(in)     :: dt

    complex(real64),     intent(inout)  :: rho_mat_k(:, :, :)


    real(real64)                  :: decay_T1, decay_T2

    integer                       :: ist, jst, nst, ik, ik_, nresd


    push_sub_with_profile(lindblad_2times)


    nst = dmp%adiabatic_st%nst


    decay_t1 = exp(-dt / dmp%tmodel(1))

    decay_t2 = exp(-dt / dmp%tmodel(2))


    do ik = kpt%start, kpt%end

      ik_ = ik - kpt%start + 1

      nresd = nresd_k(ik_)


      do ist = 1, nst

        rho_mat_k(ist, ist, ik_) = dmp%occ_gs(ist, ik_) + (rho_mat_k(ist, ist, ik_) - dmp%occ_gs(ist, ik_)) * decay_t1

      end do

      do ist = nst + 1, nst + nresd

        rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * decay_t1

      end do

      do ist = 1, nst + nresd

        do jst = ist + 1, nst + nresd

          rho_mat_k(jst, ist, ik_) = rho_mat_k(jst, ist, ik_) * decay_t2

          rho_mat_k(ist, jst, ik_) = conjg(rho_mat_k(jst, ist, ik_))

        end do

      end do

    end do


    pop_sub_with_profile(lindblad_2times)

  end subroutine lindblad_2times


  subroutine lindblad_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)

    type(dmp_t),              intent(inout)  :: dmp

    type(hamiltonian_elec_t), intent(in)     :: hm

    type(distributed_t),      intent(in)     :: kpt

    type(mpi_grp_t),          intent(in)     :: system_grp

    type(namespace_t),        intent(in)     :: namespace

    integer,                  intent(in)     :: nresd_k(:)

    real(real64),             intent(in)     :: dt

    complex(real64),          intent(inout)  :: rho_mat_k(:, :, :)


    real(real64)                  :: coeff

    real(real64), allocatable     :: rho_diag(:, :)

    complex(real64), allocatable  :: rho_in(:, :, :), rho_out(:, :, :), rho_tmp(:, :, :)

    integer, parameter            :: norder = 4

    integer                       :: ist, iorder, ik, nst, ik_, nik


    push_sub_with_profile(lindblad_from_epw)


    nst = dmp%adiabatic_st%nst

    nik = dmp%adiabatic_st%nik


    call dmp%update_trans_rate(hm, system_grp, namespace)


    ! the diagonal elements of L\rho is initialized to occupation

    safe_allocate_source_a(rho_diag, dmp%adiabatic_st%occ)


    ! L_D[rho_in] = rho_out

    safe_allocate_source(rho_in, rho_mat_k)

    safe_allocate(rho_out(1:2*nst, 1:2*nst, 1:kpt%nlocal))


    ! dimensition of rho_out should be modified later

    coeff = 1.0_real64

    do iorder = 1, norder - 1


      coeff = coeff * dt / iorder


      do ik = kpt%start, kpt%end

        ik_ = ik - kpt%start + 1


        call lindblad_operator_epw(dmp, ik, hm%kpoints%nik_skip, nresd_k(ik_), rho_diag, rho_in(:, :, ik_), rho_out(:, :, ik_))


        call lalg_axpy(2*nst, 2*nst, coeff, rho_out(:, :, ik_), rho_mat_k(:, :, ik_))

      end do


      ! update rho_diag independently

      do ik = kpt%start, kpt%end

        ik_ = ik - kpt%start + 1

        do ist = 1, nst

          rho_diag(ist, ik) = real(rho_out(ist, ist, ik_))

        end do

      end do

      ! broadcast occupation for all processes

      call broadcast_occupation(rho_diag, kpt, nst, dmp%adiabatic_st%parallel_in_states)


      ! swap the pointer between rho_in and rho_out, so new rho_in is current rho_out,

      ! and new rho_out is the prior input that will get overwritten

      call move_alloc(rho_in, rho_tmp)

      call move_alloc(rho_out, rho_in)

      call move_alloc(rho_tmp, rho_out)

    end do


    ! iorder == norder

    coeff = coeff * dt / norder


    do ik = kpt%start, kpt%end

      ik_ = ik - kpt%start + 1


      call lindblad_operator_epw(dmp, ik, hm%kpoints%nik_skip, nresd_k(ik_), rho_diag, rho_in(:, :, ik_), rho_out(:, :, ik_))


      call lalg_axpy(2*nst, 2*nst, coeff, rho_out(:, :, ik_), rho_mat_k(:, :, ik_))

    end do


    safe_deallocate_a(rho_in)

    safe_deallocate_a(rho_out)

    safe_deallocate_a(rho_diag)


    pop_sub_with_profile(lindblad_from_epw)

  end subroutine lindblad_from_epw


  subroutine collision_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)

    type(dmp_t),              intent(inout)  :: dmp

    type(hamiltonian_elec_t), intent(in)     :: hm

    type(distributed_t),      intent(in)     :: kpt

    type(mpi_grp_t),          intent(in)     :: system_grp

    type(namespace_t),        intent(in)     :: namespace

    integer,                  intent(in)     :: nresd_k(:)

    real(real64),             intent(in)     :: dt

    complex(real64),          intent(inout)  :: rho_mat_k(:, :, :)


    real(real64)                  :: gam, gam_in, gam_out

    real(real64), allocatable     :: gam_bnd(:)

    real(real64), parameter       :: gthresh = 1.0e-8_real64

    integer                       :: ist, jst, ik, nst, nresd, ik_, nik_skip


    push_sub_with_profile(collision_from_epw)


    nst = dmp%adiabatic_st%nst

    nik_skip = hm%kpoints%nik_skip


    safe_allocate(gam_bnd(2*nst))


    call dmp%update_trans_rate(hm, system_grp, namespace)


    ! off-diagonals

    do ik = kpt%start, kpt%end

      ik_ = ik - kpt%start + 1

      nresd = nresd_k(ik_)


      ! pre-calculate gamma

      gam_bnd = 0.0_real64

      do ist = dmp%istart, dmp%iend

        call lifetime(dmp, ik, ist, nik_skip, gam_bnd(ist))

      end do


      do ist = 1, nst + nresd

        do jst = ist + 1, nst + nresd

          gam = -(gam_bnd(ist) + gam_bnd(jst)) / 2.0_real64


          ! damping

          rho_mat_k(jst, ist, ik_) = rho_mat_k(jst, ist, ik_) * exp(gam * dt)

          rho_mat_k(ist, jst, ik_) = conjg(rho_mat_k(jst, ist, ik_))

        end do

      end do

    end do


    ! diagonals, \dot{f} = -gam_out * f + gam_in, gives analytical solution

    do ik = kpt%start, kpt%end

      ik_ = ik - kpt%start + 1

      do ist = dmp%istart, dmp%iend

        call get_gamma(dmp, ik, ist, nik_skip, gam_in, gam_out)

        if (gam_out * dt > gthresh) then

          rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * exp(-gam_out * dt) &

            + (gam_in / gam_out) * (1.0_real64 - exp(-gam_out * dt))

        else

          ! when denominator divergent

          rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * (1.0_real64 - gam_out * dt) + gam_in * dt

        end if

      end do

    end do


    safe_deallocate_a(gam_bnd)


    pop_sub_with_profile(collision_from_epw)

  end subroutine collision_from_epw


  subroutine dm_propagation_update_trans_rate(this, hm, system_grp, namespace)

    class(dmp_t),              intent(inout)  :: this

    type(hamiltonian_elec_t),  intent(in)     :: hm

    type(mpi_grp_t),           intent(in)     :: system_grp

    type(namespace_t),         intent(in)     :: namespace


    integer                   :: ia


    push_sub_with_profile(dmp_propagation_update_trans_rate)


    ia = get_vector_field_index(this, hm, namespace)


    if (ia /= this%ia) then

      call iopar_read_trans_rate(ia, system_grp, namespace, this)

      this%ia = ia

    end if


    pop_sub_with_profile(dmp_propagation_update_trans_rate)


  end subroutine dm_propagation_update_trans_rate


  subroutine iopar_open_trans_rate(namespace, ions, hm, system_grp, dmp)

    type(namespace_t),        intent(in)    :: namespace

    type(ions_t),             intent(in)    :: ions

    type(hamiltonian_elec_t), intent(in)    :: hm

    type(mpi_grp_t),          intent(in)    :: system_grp

    type(dmp_t),              intent(inout) :: dmp


    integer               :: ierr, iostat, idim, idir, iqq, totq

    integer               :: epw_nk(3), oct_nk(3)

    real(real64)          :: oct_s(3), at(3,3)


    push_sub(iopar_open_trans_rate)


    ! metadata

    if (system_grp%is_root()) then

      open(newunit=dmp%iunit, file=trim(dmp%epw_file), form='unformatted', access='stream', status='old', &

        action='read', iostat=iostat)

      if (iostat /= 0) then

        dmp%iunit = -1

        write(message(1), '(a,a)') 'Error opening file: ', trim(dmp%epw_file)

        call messages_fatal(1, namespace=namespace)

      end if

      !

      read(dmp%iunit, iostat=ierr) iqq, totq, dmp%wnst, epw_nk(1:3), at, oct_nk(1:3), oct_s(1:3), dmp%astep(1:3), dmp%na(1:3)

      if (ierr /= 0) then

        write(message(1), '(a,a)') 'Error reading header from: ', trim(dmp%epw_file)

        call messages_fatal(1, namespace=namespace)

      end if

      !

    end if


    call system_grp%bcast(dmp%wnst, 1, mpi_integer, 0)

    call system_grp%bcast(at, 9, mpi_double_precision, 0)

    call system_grp%bcast(oct_s, 3, mpi_double_precision, 0)

    call system_grp%bcast(dmp%astep, 3, mpi_double_precision, 0)

    call system_grp%bcast(dmp%na, 3, mpi_integer, 0)

    call system_grp%bcast(epw_nk, 3, mpi_integer, 0)

    call system_grp%bcast(oct_nk, 3, mpi_integer, 0)


    dmp%iend = dmp%istart + dmp%wnst - 1


    if (any(oct_nk /= hm%kpoints%nik_axis)) then

      write(message(1), '(a, a)') 'Inconsistent k-point mesh in KPointsGrid and ', trim(dmp%epw_file)

      call messages_fatal(1, namespace=namespace)

    end if

    if (any(abs(oct_s - hm%kpoints%full%shifts(:, 1)) > m_epsilon)) then

      write(message(1), '(a, a)') 'Inconsistent k-point mesh shifts in KPointsGrid and ', trim(dmp%epw_file)

      call messages_fatal(1, namespace=namespace)

    end if

    !

    write(message(1), '(3(a,i0), a)') 'Info: Averaged transition rates obtained from a ', epw_nk(1), ' x ', &

      epw_nk(2), ' x ', epw_nk(3), ' EPW k-point mesh'

    call messages_info(1, namespace=namespace)

    write(message(1), '(a, i0, a, i0)') 'Info: Damping band ', dmp%istart, ' - ', dmp%iend

    call messages_info(1, namespace=namespace)

    write(message(1),'(a)') '  EPW Lattice Vectors [1/alat]'

    do idim = 1, 3

      write(message(1+idim),'(3f12.6)') (at(idir, idim), idir = 1, 3)

    end do

    call messages_info(4, namespace=namespace)

    if (any(abs(at - ions%latt%rlattice_primitive) > m_epsilon)) then

      write(message(1),'(a)') 'Lattice settings are not fully consistent with those in EPW'

      call messages_warning(1, namespace=namespace)

    end if


    call build_epw_kmap(namespace, hm%kpoints, dmp)


    ! initialize vector field grid index

    dmp%ia = -1


    ! shared memory for transition rates

    dmp%num = int(product(oct_nk), kind=int64)**2 * int(dmp%wnst, kind=int64)**2

#ifdef HAVE_MPI

    ! create shared memory window and fill it only on root

    call create_intranode_communicator(system_grp, dmp%intranode_grp, dmp%internode_grp)

    ! We inline the logic of lmpi_create_shared_memory_window because single precision is not supported.

    call slmpi_create_shared_memory_window(dmp%num, dmp%intranode_grp, dmp%window_trans_rate, ave_trans)

#else

    safe_allocate(ave_trans(1:dmp%num))

#endif


    pop_sub(iopar_open_trans_rate)

  end subroutine iopar_open_trans_rate


  subroutine iopar_read_trans_rate(ia, system_grp, namespace, dmp)

    integer,                  intent(in)      :: ia

    type(mpi_grp_t),          intent(in)      :: system_grp

    type(namespace_t),        intent(in)      :: namespace

    type(dmp_t),              intent(inout)   :: dmp


    integer(int64), parameter   :: header_bytes = 168 ! head info in binary file

    integer(int64), parameter   :: epw_bytes = 4      ! single precision

    integer(int64)              :: offset

    integer                     :: ierr


    push_sub(iopar_read_trans_rate)


    if (system_grp%is_root()) then

      ! single precision

      offset = header_bytes + int(ia - 1, kind=int64) * dmp%num * epw_bytes + 1_int64

      read(dmp%iunit, pos=offset, iostat=ierr) ave_trans

      if (ierr /= 0) then

        write(message(1), '(a,a)') 'Error reading transition rates from: ', trim(dmp%epw_file)

        call messages_fatal(1, namespace=namespace)

      end if

    end if


#ifdef HAVE_MPI

    ! now broadcast the global arrays to local rank 0 on each node

    if (dmp%intranode_grp%rank == 0) then

      call smpi_grp_bcast(dmp%internode_grp, ave_trans(1), dmp%num, mpi_real, 0)

    end if

    call lmpi_sync_shared_memory_window(dmp%window_trans_rate, dmp%intranode_grp)

#endif


    pop_sub(iopar_read_trans_rate)

  end subroutine iopar_read_trans_rate


  subroutine iopar_close_trans_rate(system_grp, dmp)

    type(mpi_grp_t), intent(in)    :: system_grp

    type(dmp_t),     intent(inout) :: dmp


    push_sub(iopar_close_trans_rate)


    safe_deallocate_a(dmp%kmap)


    if (system_grp%is_root()) then

      close(dmp%iunit)

    end if


#ifdef HAVE_MPI

    call lmpi_destroy_shared_memory_window(dmp%window_trans_rate)

    nullify(ave_trans)

#else

    safe_deallocate_p(ave_trans)

#endif


    pop_sub(iopar_close_trans_rate)


  end subroutine iopar_close_trans_rate


  subroutine build_epw_kmap(namespace, kpoints, dmp)

    type(namespace_t),    intent(in)    :: namespace

    type(kpoints_t),      intent(in)    :: kpoints

    type(dmp_t),          intent(inout) :: dmp


    integer                 :: ik, idx, nik, nik_mp

    integer                 :: kidx_(3)

    real(real64)            :: kred(3), kidx(3)

    real(real64), parameter :: tol = 1.0d-10


    push_sub(build_epw_kmap)


    nik = dmp%adiabatic_st%nik

    nik_mp = nik - kpoints%nik_skip

    safe_allocate(dmp%kmap(nik))


    ! mp k-points

    dmp%kmap = nik_mp + 1


    do ik = 1, nik

      ! Map reduced coordinate into an integer MP-grid coordinate

      kred = kpoints%get_point(ik, .false.)

      kidx = (kred + 0.5_real64) * real(kpoints%nik_axis, kind=real64) - kpoints%full%shifts(:, 1)


      ! for MP k-points

      if (ik <= nik_mp) then

        if (any(abs(kidx - nint(kidx)) > tol)) then

          write(message(1), '(a)') 'K-point mesh is not compatible with EPW input'

          call messages_fatal(1, namespace=namespace)

        end if

      end if


      ! Fold into central cell in EPW

      kidx_ = modulo(nint(kidx), kpoints%nik_axis)

      ! Flatten into a single, contiguous index

      idx = kidx_(1) * kpoints%nik_axis(2) * kpoints%nik_axis(3) + kidx_(2) * kpoints%nik_axis(3) + &

        kidx_(3) + 1


      dmp%kmap(ik) = idx

    end do


    ! sanity check

    assert(all(dmp%kmap <= nik_mp))


    pop_sub(build_epw_kmap)

  end subroutine build_epw_kmap


  function get_vector_field_index(dmp, hm, namespace) result(aidx)

    type(dmp_t),              intent(in)  :: dmp

    type(hamiltonian_elec_t), intent(in)  :: hm

    type(namespace_t),        intent(in)  :: namespace


    real(real64)           :: ared(3), approx(3)

    integer                :: apoint_idx(3)

    integer                :: aidx, idim


    push_sub(get_vector_field_index)


    ! Find the nearest discrete vector field grid point on the EPW vector field mesh

    if (allocated(hm%hm_base%uniform_vector_potential)) then

      call kpoints_to_reduced(hm%ions%latt, hm%hm_base%uniform_vector_potential, ared)

    else

      ared = 0.0_real64

    end if

    do idim = 1, 3

      if (dmp%astep(idim) > m_zero) then

        apoint_idx(idim) = nint(ared(idim) / dmp%astep(idim))

        approx(idim) = apoint_idx(idim) * dmp%astep(idim)

      else

        apoint_idx(idim) = 0

        approx(idim) = 0.0_real64

      end if

    end do

    if (any(abs(apoint_idx) > dmp%na)) then

      write(message(1), '(a, 3F8.3)') 'Vector potential exceeds mesh range: ', ared

      call messages_warning(1, namespace=namespace)

      where (apoint_idx < -dmp%na)

        apoint_idx = -dmp%na

      end where

      where (apoint_idx > dmp%na)

        apoint_idx = dmp%na

      end where

    end if

    ! Get the flattened 1D index

    ! Step 1: Find the relative offset by subtracting the minimum index (-na).

    !         This shifts the range [-na, na] to an offset [0, 2*na].

    !         e.g., if range is [-2, 2], the offset for 1 is 1 - (-2) = 3.

    ! Step 2: Flatten the 3D offsets into 1D, and finally add 1 for Fortran 1-based indexing.

    aidx = (apoint_idx(1) + dmp%na(1)) * (2 * dmp%na(2) + 1) * (2 * dmp%na(3) + 1) + &

      (apoint_idx(2) + dmp%na(2)) * (2 * dmp%na(3) + 1) + &

      (apoint_idx(3) + dmp%na(3)) + 1


    write(message(1), '(a, x, 3F7.3)') 'Approximated vector field damping grid:', approx

    call messages_info(1, namespace=namespace)


    pop_sub(get_vector_field_index)

  end function get_vector_field_index


  function get_trans_rate(dmp, nik_mp, jbnd, ibnd, k, kq, p_block) result(res)

    type(dmp_t),        intent(in) :: dmp

    integer,            intent(in) :: nik_mp

    integer,            intent(in) :: jbnd

    integer,            intent(in) :: ibnd

    integer,            intent(in) :: k

    integer,            intent(in) :: kq

    logical, optional,  intent(in) :: p_block


    real(real64)      :: unocc, res

    integer           :: k_, kq_

    integer(int64)    :: idx


    push_sub(get_trans_rate)


    k_ = dmp%kmap(k)

    kq_ = dmp%kmap(kq)

    ! Flatten 4D transition rate index to 1D: (kq, k, ibnd, jbnd) -> idx

    ! Matrix dimensions are [nik_mp, nik_mp, wnst, wnst]

    ! The layout follows the hierarchy: kq is the slowest index, jbnd is the fastest.

    idx = int(kq_-1, kind=int64) * int(nik_mp, kind=int64) * int(dmp%wnst, kind=int64)**2 + &

      int((k_-1) * dmp%wnst**2, kind=int64) + &

      int((ibnd-dmp%istart)*dmp%wnst + (jbnd-dmp%istart) + 1, kind=int64)


    assert(idx >= 1 .and. idx <= dmp%num)


    res = real(ave_trans(idx), kind=real64)

    ! Pauli blocking

    if (optional_default(p_block, .true.)) then

      unocc = 1.0_real64 - dmp%adiabatic_st%occ(jbnd, kq) / dmp%adiabatic_st%smear%el_per_state

      res = res * max(unocc, 0.0_real64)

    end if


    pop_sub(get_trans_rate)

  end function get_trans_rate


  subroutine lindblad_operator_epw(dmp, ik, nik_skip, nresd, rho_diag, den_mat, l_mat)

    type(dmp_t),         intent(in)        :: dmp

    integer,             intent(in)        :: ik

    integer,             intent(in)        :: nik_skip

    integer,             intent(in)        :: nresd

    real(real64),        intent(in)        :: rho_diag(:, :)

    complex(real64),     intent(in)        :: den_mat(:, :)

    complex(real64),     intent(out)       :: l_mat(:, :)


    integer                     :: lst, nst, nik_mp, ikq, ibnd, jbnd

    real(real64)                :: tr

    push_sub(lindblad_operator_epw)


    nst = dmp%adiabatic_st%nst

    nik_mp = dmp%adiabatic_st%nik - nik_skip


    assert(ubound(rho_diag, dim = 1) == nst)

    assert(ubound(rho_diag, dim = 2) == dmp%adiabatic_st%nik)

    assert(ubound(den_mat, dim = 1) == 2*nst)

    assert(ubound(den_mat, dim = 2) == 2*nst)

    assert(ubound(l_mat, dim = 1) == 2*nst)

    assert(ubound(l_mat, dim = 2) == 2*nst)


    l_mat = 0.0_real64


    ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering

    ! in both case, ikq is the mp grid.

    do ikq = 1, nik_mp

      do jbnd = dmp%istart, dmp%iend

        do ibnd = dmp%istart, dmp%iend


          tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)

          do lst = 1, nst + nresd

            l_mat(ibnd, lst) = l_mat(ibnd, lst) - m_half * tr * den_mat(ibnd, lst)

            l_mat(lst, ibnd) = l_mat(lst, ibnd) - m_half * tr * den_mat(lst, ibnd)

          end do

          l_mat(ibnd, ibnd) = l_mat(ibnd, ibnd) + get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.true.) &

            * rho_diag(jbnd, ikq)

        end do

      end do

    end do


    pop_sub(lindblad_operator_epw)


  end subroutine lindblad_operator_epw


  subroutine lifetime(dmp, ik, ibnd, nik_skip, gam)

    type(dmp_t),         intent(in)        :: dmp

    integer,             intent(in)        :: ik

    integer,             intent(in)        :: ibnd

    integer,             intent(in)        :: nik_skip

    real(real64),        intent(out)       :: gam


    integer                     :: nik_mp, ikq, jbnd

    real(real64)                :: tr

    push_sub(lifetime)


    assert(ibnd >= dmp%istart .and. ibnd <= dmp%iend)

    nik_mp = dmp%adiabatic_st%nik - nik_skip


    ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering

    ! in both case, ikq is the mp grid.

    gam = 0.0_real64

    do ikq = 1, nik_mp

      do jbnd = dmp%istart, dmp%iend

        tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)

        gam = gam + tr

        tr = get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.false.)

        gam = gam + tr * dmp%adiabatic_st%occ(jbnd, ikq) / dmp%adiabatic_st%smear%el_per_state

      end do

    end do


    pop_sub(lifetime)


  end subroutine lifetime


  subroutine get_gamma(dmp, ik, ibnd, nik_skip, gam_in, gam_out)

    type(dmp_t),         intent(in)        :: dmp

    integer,             intent(in)        :: ik

    integer,             intent(in)        :: ibnd

    integer,             intent(in)        :: nik_skip

    real(real64),        intent(out)       :: gam_in

    real(real64),        intent(out)       :: gam_out


    integer                     :: nik_mp, ikq, jbnd

    real(real64)                :: tr

    push_sub(get_gamma)


    assert(ibnd >= dmp%istart .and. ibnd <= dmp%iend)

    nik_mp = dmp%adiabatic_st%nik - nik_skip


    ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering

    ! in both case, ikq is the mp grid.

    gam_in = 0.0_real64

    gam_out = 0.0_real64

    do ikq = 1, nik_mp

      do jbnd = dmp%istart, dmp%iend

        tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)

        gam_out = gam_out + tr

        tr = get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.true.)

        gam_in = gam_in + tr * dmp%adiabatic_st%occ(jbnd, ikq)

      end do

    end do


    pop_sub(get_gamma)


  end subroutine get_gamma


  subroutine update_st(dmp, ik,  gr, namespace, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, resd, st, rho_mat, pop)

    type(dmp_t),                intent(inout)    :: dmp

    integer,                    intent(in)       :: ik

    type(grid_t),               intent(in)       :: gr

    type(namespace_t),          intent(in)       :: namespace

    integer,                    intent(in)       :: nresd

    complex(real64),            intent(in)       :: overlap_ad_ks(:, :)

    complex(real64),            intent(in)       :: overlap_resd_ks(:, :)

    real(real64),               intent(in)       :: nrm2_tdks(:)

    type(states_elec_t),        intent(inout)    :: resd

    type(states_elec_t),        intent(inout)    :: st

    complex(real64),            intent(inout)    :: rho_mat(:, :)

    real(real64),               intent(inout)    :: pop


    real(real64), allocatable             :: occ(:)

    complex(real64), allocatable          :: overlap(:, :), overlap_ad_resd(:, :)

    integer                               :: ist, jst, nst


    push_sub_with_profile(update_st)


    nst = dmp%adiabatic_st%nst


    assert(ubound(overlap_ad_ks, dim = 1) == nst)

    assert(ubound(overlap_ad_ks, dim = 2) == nst)

    assert(ubound(overlap_resd_ks, dim = 1) == nst)

    assert(ubound(overlap_resd_ks, dim = 2) == nst)

    assert(ubound(rho_mat, dim = 1) == 2*nst)

    assert(ubound(rho_mat, dim = 2) == 2*nst)

    assert(is_hermitian(2*nst, rho_mat))


    safe_allocate(occ(1:nst+nresd))


    if (dmp%othn) then

      call lalg_eigensolve(nst+nresd, rho_mat, occ)

    else

      assert(nresd == nst)


      ! generalized eigenvalue problem

      safe_allocate(overlap(1:2*nst, 1:2*nst))

      safe_allocate(overlap_ad_resd(1:nst, 1:nst))


      ! ad-ad block

      overlap = m_z0

      do ist = 1, nst

        overlap(ist, ist) = m_one

      end do


      ! adiabatic - resd, zstates_elec_calc_projections returns the conjugate

      call zstates_elec_calc_projections(resd, dmp%adiabatic_st, namespace, gr, ik, overlap_ad_resd)

      do jst = 1, nresd

        do ist = 1, nst

          overlap(ist, jst + nst) = conjg(overlap_ad_resd(ist, jst))

        enddo

      enddo


      ! resd - resd

      ! _i<d | d>_j = S^{d,\psi}_{ij} - sum_k S^{\phi,\psi}_{kj} S^{d,\phi}_{ik}

      overlap(nst+1:nst+nresd, nst+1:nst+nresd) = overlap_resd_ks(1:nresd, 1:nresd)

      call blas_gemm('T', 'N', nresd, nresd, nst, -m_z1, overlap_ad_resd(1, 1), nst, overlap_ad_ks(1, 1), nst, &

        m_z1, overlap(nst+1, nst+1), 2*nst)


      ! only the upper triangle of overlap is needed

      call lalg_geneigensolve(nst+nresd, rho_mat, overlap, occ, preserve_mat=.false.)


      safe_deallocate_a(overlap_ad_resd)

      safe_deallocate_a(overlap)


    end if


    if (dmp%unitary_transform) then

      call update_wfc_occ_procrustes(ik, dmp%adiabatic_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, &

        nrm2_tdks, occ, rho_mat, st, pop)

    else

      call update_wfc_occ(ik, dmp%adiabatic_st, resd, gr, nresd, occ, rho_mat, st, pop)

    end if


    safe_deallocate_a(occ)


    pop_sub_with_profile(update_st)


  end subroutine update_st


  subroutine update_wfc_occ(ik, ad_st, resd, gr, nresd, occ, v_mat, st, pop)

    integer,                    intent(in)       :: ik

    type(states_elec_t),        intent(in)       :: ad_st

    type(states_elec_t),        intent(in)       :: resd

    type(grid_t),               intent(in)       :: gr

    integer,                    intent(in)       :: nresd

    real(real64),               intent(in)       :: occ(:)

    complex(real64),            intent(in)       :: v_mat(:, :)

    type(states_elec_t),        intent(inout)    :: st

    real(real64),               intent(inout)    :: pop


    complex(real64), allocatable  :: psi_j(:, :)

    integer                       :: j, ib, i_minst, i_maxst, nst


    push_sub_with_profile(update_wfc_occ)


    nst = ad_st%nst

    assert(ubound(v_mat, dim = 1) == 2*nst)

    assert(ubound(v_mat, dim = 2) == 2*nst)


    safe_allocate(psi_j(1:gr%np, st%d%dim))


    do j = nst+nresd, nst+1, -1

      psi_j = (0.0_real64, 0.0_real64)

      do ib = ad_st%group%block_start, ad_st%group%block_end

        i_minst = states_elec_block_min(ad_st, ib)

        i_maxst = states_elec_block_max(ad_st, ib)

        call zbatch_axpy_function(gr%np, v_mat(i_minst:i_maxst, j), &

          ad_st%group%psib(ib, ik), psi_j)

      enddo

      do ib = resd%group%block_start, resd%group%block_end

        i_minst = states_elec_block_min(resd, ib)

        i_maxst = min(states_elec_block_max(resd, ib), nresd)

        if (i_minst > nresd) cycle

        call zbatch_axpy_function(gr%np, v_mat(i_minst+nst:i_maxst+nst, j), &

          resd%group%psib(ib, ik), psi_j, nst=i_maxst-i_minst+1)

      enddo

      ! reset TDKS state

      call states_elec_set_state(st, gr, nst+nresd+1-j, ik, psi_j)

    end do


    ! eigenvalues in ascending order, so inverse

    st%occ(1:nst, ik) = occ(nst+nresd:nresd+1:-1)

    pop = pop + sum(st%occ(1:nst, ik)) * st%kweights(ik)


    safe_deallocate_a(psi_j)

    pop_sub_with_profile(update_wfc_occ)

  end subroutine update_wfc_occ


  subroutine update_wfc_occ_procrustes(ik, ad_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, &

    nrm2_tdks, occ_tilde, v_mat, st, pop)

    integer,                    intent(in)       :: ik

    type(states_elec_t),        intent(in)       :: ad_st

    type(states_elec_t),        intent(in)       :: resd

    type(grid_t),               intent(in)       :: gr

    integer,                    intent(in)       :: nresd

    complex(real64),            intent(in)       :: overlap_ad_ks(:, :)

    complex(real64),            intent(in)       :: overlap_resd_ks(:, :)

    real(real64),               intent(in)       :: nrm2_tdks(:)

    real(real64),               intent(in)       :: occ_tilde(:)

    complex(real64),            intent(inout)    :: v_mat(:, :)

    type(states_elec_t),        intent(inout)    :: st

    real(real64),               intent(inout)    :: pop


    integer                       :: ist, jst, ib, i_minst, i_maxst, nst

    complex(real64), allocatable  :: overlap_procrus(:, :), uproj(:, :), utrans(:, :)

    complex(real64), allocatable  :: uu(:, :), vt(:, :)

    complex(real64), allocatable  :: psi(:, :)

    real(real64), parameter       :: small_occ = 5.0e-15_real64 ! for those zero occupations, assume them a small quantity

    real(real64)                  :: sg_values(1:st%nst), rocc_tilde(1:st%nst+nresd), rocc(1:st%nst)

    real(real64)                  :: qtot_transform, nrm2, occ


    push_sub_with_profile(update_wfc_occ_procrustes)


    nst = ad_st%nst

    assert(ubound(overlap_ad_ks, dim = 1) == ad_st%nst)

    assert(ubound(overlap_ad_ks, dim = 2) == nst)

    assert(ubound(overlap_resd_ks, dim = 1) == resd%nst)

    assert(ubound(overlap_resd_ks, dim = 2) == nst)

    assert(ubound(v_mat, dim = 1) == 2*nst)

    assert(ubound(v_mat, dim = 2) == 2*nst)


    ! set rocc for both TDKS and new wfcs. For those zero occupations, set them a small quantity

    rocc_tilde(1:nst+nresd) = sqrt(max(occ_tilde(1:nst+nresd), small_occ))

    rocc(1:nst) = sqrt(max(st%occ(1:nst, ik), small_occ))


    safe_allocate(overlap_procrus(1:nst+nresd, 1:nst))


    ! Procrustes Overlap: S \propto (C_{top})^\dagger S_{ad\_ks} + (C_{bot})^\dagger S_{resd\_ks}

    ! Computed via in-place BLAS GEMM accumulation ('C' for conjugate transpose),

    ! avoiding the temporary memory allocation and copy overhead of native matmul.

    call blas_gemm('C', 'N', nst+nresd, nst, nst, m_z1, v_mat(1, 1), 2*nst, overlap_ad_ks(1, 1), nst, &

      m_z0, overlap_procrus(1, 1), nst+nresd)

    call blas_gemm('C', 'N', nst+nresd, nst, nresd, m_z1, v_mat(1+nst, 1), 2*nst, overlap_resd_ks(1, 1), nst, &

      m_z1, overlap_procrus(1, 1), nst+nresd)

    do jst = 1, nst

      do ist = 1, nst+nresd

        overlap_procrus(ist, jst) = overlap_procrus(ist, jst) * rocc_tilde(ist) * rocc(jst)

      end do

    end do


    safe_allocate(uproj(1:nst+nresd, 1:nst))

    safe_allocate(uu(1:nst+nresd, 1:nst+nresd))

    safe_allocate(vt(1:nst, 1:nst))


    call lalg_singular_value_decomp(nst+nresd, nst, overlap_procrus, uu, vt, sg_values)

    uproj = matmul(uu(:,1:nst), vt)


    safe_deallocate_a(overlap_procrus)

    safe_deallocate_a(vt)

    safe_deallocate_a(uu)


    safe_allocate(utrans(1:nst+nresd, 1:nst))


    ! update transformation matrix

    do ist = 1, nst+nresd

      call blas_scal(nst+nresd, cmplx(rocc_tilde(ist), 0.0, real64), v_mat(1, ist), 1)

    end do

    ! utrans = matmul(v_mat(1:nst+nresd,1:nst+nresd), uproj)

    call blas_gemm('N', 'N', nst+nresd, nst, nst+nresd, m_z1, v_mat(1, 1), 2*nst, uproj(1, 1), nst+nresd, &

      m_z0, utrans(1, 1), nst+nresd)


    safe_deallocate_a(uproj)

    safe_allocate(psi(1:gr%np, st%d%dim))


    ! update state and occupations

    qtot_transform = 0.0_real64

    do jst = 1, nst

      psi = (0.0_real64, 0.0_real64)

      do ib = ad_st%group%block_start, ad_st%group%block_end

        i_minst = states_elec_block_min(ad_st, ib)

        i_maxst = states_elec_block_max(ad_st, ib)

        call zbatch_axpy_function(gr%np, utrans(i_minst:i_maxst, jst), &

          ad_st%group%psib(ib, ik), psi)

      end do

      do ib = resd%group%block_start, resd%group%block_end

        i_minst = states_elec_block_min(resd, ib)

        i_maxst = min(states_elec_block_max(resd, ib), nresd)

        if (i_minst > nresd) cycle

        call zbatch_axpy_function(gr%np, utrans(i_minst+nst:i_maxst+nst, jst), &

          resd%group%psib(ib, ik), psi, nst=i_maxst-i_minst+1)

      end do

      nrm2 = real(zmf_dotp(gr, st%d%dim, psi, psi, reduce = .true.), real64)

      qtot_transform = qtot_transform + nrm2

      occ = nrm2 / nrm2_tdks(jst)

      st%occ(jst, ik) = occ

      call lalg_scal(gr%np, st%d%dim, 1.0_real64/sqrt(occ), psi)

      call states_elec_set_state(st, gr, jst, ik, psi)

    end do


    pop = pop + qtot_transform * st%kweights(ik)


    safe_deallocate_a(utrans)

    safe_deallocate_a(psi)


    pop_sub_with_profile(update_wfc_occ_procrustes)


  end subroutine update_wfc_occ_procrustes


  logical function is_hermitian(n, mat)

    integer,                 intent(in) :: n

    complex(real64),         intent(in) :: mat(:, :)

    real(real64), parameter             :: tol = 1.0e-14_real64


    assert(ubound(mat, dim=1) == n)

    assert(ubound(mat, dim=2) == n)


    is_hermitian = maxval(abs(mat - transpose(conjg(mat)))) <= tol

  end function is_hermitian


#ifdef HAVE_MPI

  subroutine slmpi_create_shared_memory_window(number_of_elements, intranode_grp, window, array)

    use iso_c_binding

    integer(int64),        intent(in)  :: number_of_elements

    type(mpi_grp_t),       intent(in)  :: intranode_grp

    type(mpi_win),         intent(out) :: window

    real(real32), pointer, intent(out) :: array(:)


    type(c_ptr) :: ptr

    integer(kind=MPI_ADDRESS_KIND) :: window_size

    integer :: disp_unit


    assert(not_in_openmp())


    ! allocate only on rank 0 of each node

    if (intranode_grp%rank == 0) then

      window_size = number_of_elements * 4_mpi_address_kind

    else

      window_size = 0_mpi_address_kind

    end if

    assert(number_of_elements * 4 < huge(0_mpi_address_kind))

    assert(window_size >= 0)

    disp_unit = 4

    call mpi_win_allocate_shared(window_size, disp_unit, mpi_info_null, &

      intranode_grp%comm, ptr, window)

    ! get pointer on all ranks

    if (intranode_grp%rank /= 0) then

      call mpi_win_shared_query(window, 0, window_size, disp_unit, ptr)

    end if

    ! get fortran pointer

    call c_f_pointer(ptr, array, [number_of_elements])


    ! start access epoch

    call mpi_win_lock_all(mpi_mode_nocheck, window)


  end subroutine slmpi_create_shared_memory_window


  subroutine smpi_grp_bcast(mpi_grp, buf, cnt, sendtype, root)

    use iso_c_binding

    class(mpi_grp_t),     intent(in)    :: mpi_grp

    real(real32), target, intent(inout) :: buf

    integer(int64),       intent(in)    :: cnt

    type(mpi_datatype),   intent(in)    :: sendtype

    integer,              intent(in)    :: root


    integer :: rounds, iround, size

    integer(int64) :: offset

    real(real32), pointer :: bufptr(:)


    assert(not_in_openmp())


    call mpi_debug_in(mpi_grp%comm, c_mpi_bcast)

    if (mpi_grp%comm /= mpi_comm_undefined) then

      ! need to do the broadcast in rounds that fit into int32 integers

      call c_f_pointer(c_loc(buf), bufptr, [cnt])

      rounds = int(cnt/huge(0_int32), int32)

      do iround = 1, rounds

        offset = int(huge(0_int32), int64) * (iround - 1) + 1

        call mpi_bcast(bufptr(offset), huge(0_int32), sendtype, root, mpi_grp%comm)

      end do

      ! broadcast the remainder

      offset = int(huge(0_int32), int64) * rounds + 1

      size = int(mod(cnt, int(huge(0_int32),int64)), int32)

      call mpi_bcast(bufptr(offset), size, sendtype, root, mpi_grp%comm)

    end if

    call mpi_debug_out(mpi_grp%comm, c_mpi_bcast)


  end subroutine smpi_grp_bcast

#endif


end module dm_propagation_oct_m

access
int access(const char *__name, int __type) __attribute__((__nothrow__

blas_oct_m::blas_gemm
--------------— gemm ---------------— performs one of the matrix-matrix operations
Definition: blas.F90:370

blas_oct_m::blas_herk
--------------— syrk, herk ---------------— performs one of the symmetric rank k operations
Definition: blas.F90:490

blas_oct_m::blas_scal
--------------— scal ---------------— Scales a vector by a constant.
Definition: blas.F90:150

global_oct_m::bitand
Definition: global.F90:318

global_oct_m::optional_default
Definition: global.F90:299

lalg_adv_oct_m::lalg_eigensolve
Definition: lalg_adv.F90:188

lalg_adv_oct_m::lalg_geneigensolve
Definition: lalg_adv.F90:180

lalg_adv_oct_m::lalg_singular_value_decomp
Definition: lalg_adv.F90:215

lalg_basic_oct_m::lalg_axpy
constant times a vector plus a vector
Definition: lalg_basic.F90:173

lalg_basic_oct_m::lalg_scal
scales a vector by a constant
Definition: lalg_basic.F90:159

mesh_function_oct_m::zmf_dotp
Definition: mesh_function.F90:195

mesh_function_oct_m::zmf_nrm2
Definition: mesh_function.F90:203

messages_oct_m::messages_print_var_option
Definition: messages.F90:199

messages_oct_m::messages_print_var_value
Prints out to iunit a message in the form: ["InputVariable" = value] where "InputVariable" is given b...
Definition: messages.F90:182

mpi_distribute_oct_m::mpi_displacements
Definition: mpi_distribute.F90:125

parser_oct_m::parse_block_cols
Definition: parser.F90:285

parser_oct_m::parse_block_end
Definition: parser.F90:269

parser_oct_m::parse_block_float
Definition: parser.F90:313

parser_oct_m::parse_variable
Definition: parser.F90:253

states_elec_oct_m::states_elec_get_state
Definition: states_elec.F90:341

states_elec_oct_m::states_elec_set_state
Definition: states_elec.F90:346

unit_oct_m::sqrt
Definition: unit.F90:176

unit_oct_m::units_to_atomic
Definition: unit.F90:168

varinfo_oct_m::varinfo_valid_option
Definition: varinfo.F90:134

exp
double exp(double __x) __attribute__((__nothrow__

batch_ops_oct_m
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118

batch_ops_oct_m::zbatch_axpy_function
subroutine, public zbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:3000

blas_oct_m
This module contains interfaces for BLAS routines You should not use these routines directly....
Definition: blas.F90:120

debug_oct_m
Definition: debug.F90:116

density_oct_m
This module implements a calculator for the density and defines related functions.
Definition: density.F90:122

density_oct_m::density_calc
subroutine, public density_calc(st, gr, density, istin)
Computes the density from the orbitals in st.
Definition: density.F90:653

distributed_oct_m
Definition: distributed.F90:116

dm_propagation_oct_m
Definition: dm_propagation.F90:115

dm_propagation_oct_m::lifetime
subroutine lifetime(dmp, ik, ibnd, nik_skip, gam)
Calculate the total scattering rate (inverse lifetime) for a given state.
Definition: dm_propagation.F90:1690

dm_propagation_oct_m::iopar_close_trans_rate
subroutine iopar_close_trans_rate(system_grp, dmp)
Finalize transition rate resources.
Definition: dm_propagation.F90:1470

dm_propagation_oct_m::lindblad_from_epw
subroutine lindblad_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
Evolve the density matrix under EPW-derived Lindblad dissipation.
Definition: dm_propagation.F90:1178

dm_propagation_oct_m::lindblad_uniform
subroutine lindblad_uniform(dmp, kpt, nresd_k, dt, rho_mat_k)
Evolve the density matrix in time under uniform dissipation.
Definition: dm_propagation.F90:978

dm_propagation_oct_m::dm_propagation_init_run
subroutine, public dm_propagation_init_run(dmp, namespace, space, gr, ions, st, hm, mc, from_scratch)
Initialise the adiabatic states prior to running TD propagation.
Definition: dm_propagation.F90:414

dm_propagation_oct_m::lindblad_operator_uniform
subroutine lindblad_operator_uniform(nst, nresd, rtrans, den_mat, l_mat)
Calculate the Lindblad dissipator matrix for uniform decay.
Definition: dm_propagation.F90:1095

dm_propagation_oct_m::get_vector_field_index
integer function get_vector_field_index(dmp, hm, namespace)
Get the flattened 1D index of the current vector potential on the discrete EPW vector field grid.
Definition: dm_propagation.F90:1542

dm_propagation_oct_m::get_trans_rate
real(real64) function get_trans_rate(dmp, nik_mp, jbnd, ibnd, k, kq, p_block)
Get transition rate from state (k, ibnd) to (kq, jbnd).
Definition: dm_propagation.F90:1597

dm_propagation_oct_m::lindblad_operator_epw
subroutine lindblad_operator_epw(dmp, ik, nik_skip, nresd, rho_diag, den_mat, l_mat)
Calculate the Lindblad dissipator matrix using EPW electron-phonon scattering rates.
Definition: dm_propagation.F90:1640

dm_propagation_oct_m::dm_end_run
subroutine, public dm_end_run(system_grp, dmp)
Definition: dm_propagation.F90:467

dm_propagation_oct_m::is_hermitian
logical function is_hermitian(n, mat)
Check if a matrix is Hermitian.
Definition: dm_propagation.F90:2040

dm_propagation_oct_m::iopar_read_trans_rate
subroutine iopar_read_trans_rate(ia, system_grp, namespace, dmp)
Read in transition rates to the shared memory window and then broadcast via internode communicator.
Definition: dm_propagation.F90:1441

dm_propagation_oct_m::dmp_init
subroutine dmp_init(this, namespace, st, space, hm)
Initialise an instance of density matrix dissipation.
Definition: dm_propagation.F90:199

dm_propagation_oct_m::build_epw_kmap
subroutine build_epw_kmap(namespace, kpoints, dmp)
Map internal k-point indices to the 1D EPW Monkhorst-Pack grid and verify mesh compatibility.
Definition: dm_propagation.F90:1494

dm_propagation_oct_m::collision_from_epw
subroutine collision_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
Evolve the density matrix subject to the electron-phonon collision integral.
Definition: dm_propagation.F90:1267

dm_propagation_oct_m::broadcast_occupation
subroutine broadcast_occupation(occ, kpt, nst, parstate)
Definition: dm_propagation.F90:887

dm_propagation_oct_m::total_population
subroutine total_population(ik, st, gr, nrm2, pop)
Calculate total population.
Definition: dm_propagation.F90:603

dm_propagation_oct_m::update_wfc_occ_procrustes
subroutine update_wfc_occ_procrustes(ik, ad_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, occ_tilde, v_mat, st, pop)
Update states using Procrustes transformation to ensure time continuity.
Definition: dm_propagation.F90:1930

dm_propagation_oct_m::get_gamma
subroutine get_gamma(dmp, ik, ibnd, nik_skip, gam_in, gam_out)
Calculate in/out scattering rates (Gamma) for a specific state (ik, ibnd).
Definition: dm_propagation.F90:1730

dm_propagation_oct_m::update_wfc_occ
subroutine update_wfc_occ(ik, ad_st, resd, gr, nresd, occ, v_mat, st, pop)
Update states directly from diagonalization (no Procrustes).
Definition: dm_propagation.F90:1865

dm_propagation_oct_m::construct_residuals
subroutine construct_residuals(gr, namespace, ad_st, st, ik, othn, overlap_ad_ks, nrm2_tdks, nresd, overlap_resd_ks, resd)
Construct the residual basis and its overlap with TDKS wavefunctions.
Definition: dm_propagation.F90:679

dm_propagation_oct_m::dm_propagation_run
subroutine, public dm_propagation_run(dmp, namespace, space, gr, ions, st, mc, hm, ks, iter, dt, ext_partners, update_energy)
Density matrix propagation.
Definition: dm_propagation.F90:487

dm_propagation_oct_m::dm_propagation_update_trans_rate
subroutine dm_propagation_update_trans_rate(this, hm, system_grp, namespace)
Read and update the EPW transition matrix only when the vector field index changes.
Definition: dm_propagation.F90:1334

dm_propagation_oct_m::transition_rate_uniform
subroutine transition_rate_uniform(uniform, ad_st, ik, rtrans)
Calculate state transition rates assuming uniform electron-phonon coupling.
Definition: dm_propagation.F90:1044

dm_propagation_oct_m::construct_density_matrix
subroutine construct_density_matrix(nresd, ik, st, overlap_ad_ks, overlap_resd_ks, rho_mat)
Construct the full density matrix in the adiabatic and residual basis.
Definition: dm_propagation.F90:807

dm_propagation_oct_m::lindblad_2times
subroutine lindblad_2times(dmp, kpt, nresd_k, dt, rho_mat_k)
Evolve the density matrix using the phenomenological two-time (T1/T2) relaxation model.
Definition: dm_propagation.F90:1137

dm_propagation_oct_m::iopar_open_trans_rate
subroutine iopar_open_trans_rate(namespace, ions, hm, system_grp, dmp)
Read in metadata of transition rates, build intra/inter communicators and shared memory window.
Definition: dm_propagation.F90:1356

dm_propagation_oct_m::dissipation
subroutine dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)
Evolve the density matrix in time under dissipation.
Definition: dm_propagation.F90:941

dm_propagation_oct_m::update_st
subroutine update_st(dmp, ik, gr, namespace, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, resd, st, rho_mat, pop)
Diagonalize the density matrix to update occupations and wavefunctions.
Definition: dm_propagation.F90:1777

dm_propagation_oct_m::population_in_adiabatic
subroutine population_in_adiabatic(ik, ad_st, st, overlap, pop)
Calculate number of electrons in the adiabatic basis.
Definition: dm_propagation.F90:635

eigensolver_oct_m
Definition: eigensolver.F90:116

eigensolver_oct_m::eigensolver_init
subroutine, public eigensolver_init(eigens, namespace, gr, st, hm, mc, space, deactivate_oracle)
Definition: eigensolver.F90:229

eigensolver_oct_m::eigensolver_end
subroutine, public eigensolver_end(eigens)
Definition: eigensolver.F90:617

electron_space_oct_m
Definition: electron_space.F90:116

electron_space_oct_m::spin_polarized
integer, parameter, public spin_polarized
Definition: electron_space.F90:141

global_oct_m
Definition: global.F90:116

global_oct_m::m_two
real(real64), parameter, public m_two
Definition: global.F90:202

global_oct_m::m_zero
real(real64), parameter, public m_zero
Definition: global.F90:200

global_oct_m::p_ry
real(real64), parameter, public p_ry
Definition: global.F90:239

global_oct_m::not_in_openmp
logical pure function, public not_in_openmp()
Definition: global.F90:566

global_oct_m::m_z0
complex(real64), parameter, public m_z0
Definition: global.F90:210

global_oct_m::m_epsilon
real(real64), parameter, public m_epsilon
Definition: global.F90:216

global_oct_m::m_z1
complex(real64), parameter, public m_z1
Definition: global.F90:211

global_oct_m::m_half
real(real64), parameter, public m_half
Definition: global.F90:206

global_oct_m::m_one
real(real64), parameter, public m_one
Definition: global.F90:201

grid_oct_m
This module implements the underlying real-space grid.
Definition: grid.F90:119

hamiltonian_elec_oct_m
Definition: hamiltonian_elec.F90:117

interaction_partner_oct_m
This module defines classes and functions for interaction partners.
Definition: interaction_partner.F90:108

ions_oct_m
Definition: ions.F90:117

kpoints_oct_m
Definition: kpoints.F90:116

kpoints_oct_m::kpoints_monkh_pack
integer, parameter, public kpoints_monkh_pack
Definition: kpoints.F90:223

kpoints_oct_m::kpoints_to_reduced
subroutine, public kpoints_to_reduced(latt, kin, kout)
Definition: kpoints.F90:1150

lalg_adv_oct_m
Definition: lalg_adv.F90:116

lalg_basic_oct_m
Definition: lalg_basic.F90:116

mesh_batch_oct_m
This module defines functions over batches of mesh functions.
Definition: mesh_batch.F90:118

mesh_batch_oct_m::mesh_batch_nrm2
subroutine, public mesh_batch_nrm2(mesh, aa, nrm2, reduce)
Calculate the norms (norm2, not the square!) of a batch of mesh functions.
Definition: mesh_batch.F90:178

mesh_batch_oct_m::zmesh_batch_mf_dotp
subroutine, public zmesh_batch_mf_dotp(mesh, aa, psi, dot, reduce, nst)
calculate the dot products between a batch and a vector of mesh functions
Definition: mesh_batch.F90:2347

mesh_function_oct_m
This module defines various routines, operating on mesh functions.
Definition: mesh_function.F90:118

messages_oct_m
Definition: messages.F90:117

messages_oct_m::messages_not_implemented
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1068

messages_oct_m::messages_warning
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:525

messages_oct_m::message
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162

messages_oct_m::messages_fatal
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:410

messages_oct_m::messages_input_error
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:691

messages_oct_m::messages_info
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:594

mpi_distribute_oct_m
Definition: mpi_distribute.F90:115

mpi_lib_oct_m
This module contains some common usage patterns of MPI routines.
Definition: mpi_lib.F90:117

mpi_oct_m
Definition: mpi.F90:116

mpi_oct_m::mpi_comm_undefined
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
Definition: mpi.F90:138

multicomm_oct_m
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147

namespace_oct_m
Definition: namespace.F90:105

parser_oct_m
Definition: parser.F90:116

parser_oct_m::parse_block
integer function, public parse_block(namespace, name, blk, check_varinfo_)
Definition: parser.F90:623

profiling_oct_m
Definition: profiling.F90:118

restart_oct_m
Definition: restart.F90:119

restart_oct_m::restart_dm
integer, parameter, public restart_dm
Definition: restart.F90:156

restart_oct_m::restart_gs
integer, parameter, public restart_gs
Definition: restart.F90:156

restart_oct_m::restart_type_dump
integer, parameter, public restart_type_dump
Definition: restart.F90:184

restart_oct_m::restart_type_load
integer, parameter, public restart_type_load
Definition: restart.F90:184

space_oct_m
Definition: space.F90:116

states_elec_calc_oct_m
Definition: states_elec_calc.F90:116

states_elec_calc_oct_m::zstates_elec_calc_projections
subroutine, public zstates_elec_calc_projections(st, gs_st, namespace, mesh, ik, proj, gs_nst)
This routine computes the projection between two set of states.
Definition: states_elec_calc.F90:3531

states_elec_calc_oct_m::zstates_elec_orthogonalize_single_batch
subroutine, public zstates_elec_orthogonalize_single_batch(st, mesh, nst, iqn, phi, normalize, mask, overlap, norm, Theta_fi, beta_ij, against_all)
orthogonalize a single wave function against a set of states
Definition: states_elec_calc.F90:2697

states_elec_oct_m
Definition: states_elec.F90:115

states_elec_oct_m::states_elec_block_max
integer pure function, public states_elec_block_max(st, ib)
return index of last state in block ib
Definition: states_elec.F90:2672

states_elec_oct_m::states_elec_end
subroutine, public states_elec_end(st)
finalize the states_elec_t object
Definition: states_elec.F90:1620

states_elec_oct_m::states_elec_copy
subroutine, public states_elec_copy(stout, stin, exclude_wfns, exclude_eigenval, special)
make a (selective) copy of a states_elec_t object
Definition: states_elec.F90:1515

states_elec_oct_m::states_elec_block_min
integer pure function, public states_elec_block_min(st, ib)
return index of first state in block ib
Definition: states_elec.F90:2662

states_elec_restart_oct_m
This module handles reading and writing restart information for the states_elec_t.
Definition: states_elec_restart.F90:118

states_elec_restart_oct_m::states_elec_load
subroutine, public states_elec_load(restart, namespace, space, st, mesh, kpoints, fixed_occ, ierr, iter, lr, lowest_missing, label, verbose, skip)
returns in ierr: <0 => Fatal error, or nothing read =0 => read all wavefunctions >0 => could only rea...
Definition: states_elec_restart.F90:461

unit_oct_m
brief This module defines the class unit_t which is used by the unit_systems_oct_m module.
Definition: unit.F90:134

unit_oct_m::units_abbrev
character(len=20) pure function, public units_abbrev(this)
Definition: unit.F90:225

unit_system_oct_m
This module defines the unit system, used for input and output.
Definition: unit_system.F90:128

unit_system_oct_m::units_out
type(unit_system_t), public units_out
Definition: unit_system.F90:163

unit_system_oct_m::unit_kelvin
type(unit_t), public unit_kelvin
For converting energies into temperatures.
Definition: unit_system.F90:172

unit_system_oct_m::units_inp
type(unit_system_t), public units_inp
the units systems for reading and writing
Definition: unit_system.F90:163

v_ks_oct_m
Definition: v_ks.F90:116

v_ks_oct_m::v_ks_calc
subroutine, public v_ks_calc(ks, namespace, space, hm, st, ions, ext_partners, calc_eigenval, time, calc_energy, calc_current, force_semilocal)
Definition: v_ks.F90:708

varinfo_oct_m
Definition: varinfo.F90:116

distributed_oct_m::distributed_t
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Definition: distributed.F90:150

dm_propagation_oct_m::dmp_t
Definition: dm_propagation.F90:161

eigensolver_oct_m::eigensolver_t
Definition: eigensolver.F90:171

electron_space_oct_m::electron_space_t
Extension of space that contains the knowledge of the spin dimension.
Definition: electron_space.F90:134

grid_oct_m::grid_t
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171

hamiltonian_elec_oct_m::hamiltonian_elec_t
Definition: hamiltonian_elec.F90:226

interaction_partner_oct_m::partner_list_t
the list of partners
Definition: interaction_partner.F90:183

ions_oct_m::ions_t
Definition: ions.F90:164

kpoints_oct_m::kpoints_t
Definition: kpoints.F90:182

mpi_oct_m::mpi_grp_t
This is defined even when running serial.
Definition: mpi.F90:144

multicomm_oct_m::multicomm_t
Stores all communicators and groups.
Definition: multicomm.F90:208

namespace_oct_m::namespace_t
Definition: namespace.F90:117

restart_oct_m::restart_t
Definition: restart.F90:253

states_elec_oct_m::states_elec_t
The states_elec_t class contains all electronic wave functions.
Definition: states_elec.F90:229

v_ks_oct_m::v_ks_t
Definition: v_ks.F90:223

true
int true(void)
Definition: symmetries_finite.c:3150