Octopus
dm_propagation.F90
Go to the documentation of this file.
1!! Copyright (C) 2024 - 2025 S. Pal, Z. Nie, U. De Giovannini
2!! This program is free software; you can redistribute it and/or modify
3!! it under the terms of the GNU General Public License as published by
4!! the Free Software Foundation; either version 2, or (at your option)
5!! any later version.
6!!
7!! This program is distributed in the hope that it will be useful,
8!! but WITHOUT ANY WARRANTY; without even the implied warranty of
9!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10!! GNU General Public License for more details.
11!!
12!! You should have received a copy of the GNU General Public License
13!! along with this program; if not, write to the Free Software
14!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15!! 02110-1301, USA.
16!!
17
18#include "global.h"
19
22 use blas_oct_m
23 use debug_oct_m
28 use global_oct_m
29 use grid_oct_m
32 use ions_oct_m
33 use, intrinsic :: iso_fortran_env
42 use mpi_oct_m
45 use parser_oct_m
48 use space_oct_m
52 use unit_oct_m
54 use v_ks_oct_m
56
57 implicit none
58
59 private
60 public :: &
61 dmp_t, &
65
66 type dmp_t
67 integer :: calculation_mode
68 integer :: basis
69 logical :: unitary_transform
70 type(states_elec_t) :: adiabatic_st
71 integer :: strategy
72 logical :: othn
73 type(restart_t) :: restart_dump
74 ! 2-Times Model
75 real(real64) :: tmodel(2)
76 real(real64), allocatable :: occ_gs(:, :)
77 ! Uniform Decay
78 real(real64) :: uniform(2)
79 ! EPW Data Parsing and Shared Memory Configuration
80 character(len=256) :: epw_file
81 integer :: iunit
82 integer :: istart, iend, wnst
83 integer :: ia
84 integer :: na(3)
85 real(real64) :: astep(3)
86 integer(int64) :: num
87 integer, allocatable :: kmap(:)
88 type(mpi_grp_t) :: intranode_grp, internode_grp
89#ifdef HAVE_MPI
90 type(MPI_Win) :: window_trans_rate
91#endif
92 contains
93 procedure :: init => dmp_init
94 procedure :: update_trans_rate => dm_propagation_update_trans_rate
95 end type dmp_t
96
97 ! Defined separately because the VOLATILE attribute is not allowed for derived-type components.
98 real(real32), pointer, volatile :: ave_trans(:)
99
100contains
101
103 subroutine dmp_init(this, namespace, st, space, hm)
104 class(dmp_t), intent(inout) :: this
105 type(namespace_t), intent(in) :: namespace
106 type(states_elec_t), intent(in) :: st
107 class(space_t), intent(in) :: space
108 type(hamiltonian_elec_t), intent(in) :: hm
109
110 type(block_t) :: blk
111 integer :: ncols, nempty
112 real(real64) :: nempty_percent
113
114 push_sub(dmp_init)
116 !%Variable TDDMPropagationBasis
117 !%Type integer
118 !%Default Adiabatic
119 !%Section Time-Dependent
120 !%Description
121 !% Decides the basis set for the density matrix propagation.
122 !%Option Adiabatic 01
123 !% Instantaneous eigenstates of the Hamiltonian.
124 !%Option Groundstate 02
125 !% Eigenstates of the Hamiltonian at t=0.
126 !%End
127 call parse_variable(namespace, 'TDDMPropagationBasis', option__tddmpropagationbasis__adiabatic, this%basis)
128 if (.not. varinfo_valid_option('TDDMPropagationBasis', this%basis)) then
129 call messages_input_error(namespace, 'TDDMPropagationBasis')
130 endif
131 call messages_print_var_option('TDDMPropagationBasis', this%basis, namespace=namespace)
132
133 !%Variable TDDMOrthogonal
134 !%Type logical
135 !%Default no
136 !%Section Time-Dependent
137 !%Description
138 !% Use a fully orthonormalized basis when constructing and
139 !% damping the density matrix.
140 !%End
141 call parse_variable(namespace, 'TDDMOrthogonal', .false., this%othn)
142 call messages_print_var_value('TDDMOrthogonal', this%othn, namespace=namespace)
143
144 !%Variable TDDMUnitaryTransformFix
145 !%Type logical
146 !%Default yes
147 !%Section Time-Dependent
148 !%Description
149 !% Applies an additional unitary transformation to the damped wavefunctions
150 !% to maximize their overlap with the reference wavefunctions after damping.
151 !%End
152 call parse_variable(namespace, 'TDDMUnitaryTransformFix', .true., this%unitary_transform)
153 call messages_print_var_value('TDDMUnitaryTransformFix', this%unitary_transform, namespace=namespace)
154
155 this%strategy = -1
156
157 !%Variable TDDMPropagation_uniform_decay
158 !%Type block
159 !%Section Time-Dependent
160 !%Description
161 !% The intra-k transition rates between any pair of states are taken to be constant.
162 !% The population dynamics are therefore determined solely by the state occupations
163 !% and the bath temperature, following Fermi’s golden rule.
164 !% The first column of the block specifies the characteristic lifetime, and the
165 !% second column gives the bath temperature (in Kelvin).
166 !% <tt>%TDDMPropagation_uniform_decay
167 !% <br>&nbsp;&nbsp; Time | Temp
168 !% <br>%</tt>
169 !%End
170 if (parse_block(namespace, 'TDDMPropagation_uniform_decay', blk) == 0) then
171 if (this%strategy /= -1) then
172 message(1) = "Multiple dissipation strategies are not allowed."
173 call messages_fatal(1, namespace=namespace)
174 end if
175 this%strategy = 1
176 ncols = parse_block_cols(blk, 0)
177 if (ncols == 2) then
178 call parse_block_float(blk, 0, 0, this%uniform(1), units_inp%time)
179 call parse_block_float(blk, 0, 1, this%uniform(2))
182 message(1) = "Info: TDDMPropagation uniform decay approximation:"
183 write(message(2),'(a, f0.2, 1x, 2a, F0.2, 1x, 2a)') ' [lifetime, temperature] = [', &
184 this%uniform(1), trim(units_abbrev(units_out%time)), ', ', this%uniform(2), trim(units_abbrev(unit_kelvin)), ']'
185 call messages_info(2, namespace=namespace)
186 else
187 message(1) = "Input: TDDMPropagation_uniform_decay block must have 2 columns."
188 call messages_fatal(1, namespace=namespace)
189 end if
190 end if
191
192 !%Variable TDDMPropagation_2Times
193 !%Type block
194 !%Section Time-Dependent
195 !%Description
196 !% Two times approximation for the jump operator in the master equation.
197 !% S. A. Sato <i>et al.</i>, <i>Phys. Rev. B</i> 99, 214302 (2019).
198 !%
199 !% <tt>%TDDMPropagation_2Times
200 !% <br>&nbsp;&nbsp; t1 | t2
201 !% <br>%</tt>
202 !%End
203 if (parse_block(namespace, 'TDDMPropagation_2Times', blk) == 0) then
204 if (this%strategy /= -1) then
205 message(1) = "Multiple dissipation strategies are not allowed."
206 call messages_fatal(1, namespace=namespace)
207 end if
208 this%strategy = 2
209 ncols = parse_block_cols(blk, 0)
210 if (ncols == 2) then
211 call parse_block_float(blk, 0, 0, this%tmodel(1), units_inp%time)
212 call parse_block_float(blk, 0, 1, this%tmodel(2), units_inp%time)
213 call parse_block_end(blk)
214
215 message(1) = "Info: TDDMPropagation 2-times approximation:"
216 write(message(2),'(a, f12.6, a, f12.6, a,a)') &
217 ' [t1, t2] = [', this%tmodel(1) , ', ', this%tmodel(2), '] ', &
218 trim(units_abbrev(units_out%time))
219 call messages_info(2, namespace=namespace)
220 else
221 message(1) = "Input: TDDMPropagation_2Times block must have 2 columns."
222 call messages_fatal(1, namespace=namespace)
223 end if
224 ! for two times model, only orthonormalized basis is allowed
225 if (.not. this%othn) then
226 this%othn = .true.
227 message(1) = "Overriding input: TDDMOrthogonal set to yes for TDDMPropagation_2Times."
228 call messages_warning(1, namespace=namespace)
229 end if
230 end if
231
232 !%Variable TDDMPropagation_from_epw
233 !%Type string
234 !%Default "-"
235 !%Section Time-Dependent
236 !%Description
237 !% Specifies the transition-rate file generated by EPW. Once loaded, the
238 !% simulation includes all intra-k and inter-k scattering processes, with
239 !% the electron dynamics governed by Fermi’s Golden Rule. This option
240 !% requires EPWBandLowest to be set.
241 !%End
242 call parse_variable(namespace, 'TDDMPropagation_from_epw', '-', this%epw_file)
243 if (trim(this%epw_file) /= '-') then
244 if (bitand(hm%kpoints%method, kpoints_monkh_pack) == 0) then
245 write(message(1),'(a)') 'Only Monkhorst-Pack k-point meshes are supported in the EPW-DMPropagation strategy.'
246 call messages_fatal(1, namespace = namespace)
247 end if
248 if (hm%kpoints%reduced%nshifts /= 1) then
249 write(message(1),'(a)') 'Multiple Monkhorst-Pack shifts are not supported in the EPW-DMPropagation strategy.'
250 call messages_fatal(1, namespace = namespace)
251 end if
252 if (space%dim /= 3) then
253 write(message(1),'(a)') 'Only 3D systems are supported in the EPW-DMPropagation strategy.'
254 call messages_fatal(1, namespace = namespace)
255 end if
256
257 if (this%strategy /= -1) then
258 message(1) = "Multiple dissipation strategies are not allowed."
259 call messages_fatal(1, namespace=namespace)
260 end if
261 this%strategy = 3
262 write(message(1),'(a, a)') "Info: TDDMPropagation transition rates from file: ", trim(this%epw_file)
263 call messages_info(1, namespace=namespace)
264 end if
265
266 !%Variable EPWBandLowest
267 !%Type integer
268 !%Default -1
269 !%Section Time-Dependent
270 !%Description
271 !% The starting DFT band index used for damping.
272 !%
273 !% Since EPW transition rates are calculated for a subset of DFT bands,
274 !% this variable tells the code which global DFT band corresponds
275 !% to damping band #1.
276 !%
277 !% Example: If you damp DFT bands 10 through 15, set EPWBandLowest = 10.
278 !%End
279 if (this%strategy == 3) then
280 call parse_variable(namespace, 'EPWBandLowest', 0, this%istart)
281 if (.NOT. (this%istart > 0)) then
282 write(message(1), '(a)') 'EPWBandLowest must be specified when TDDMPropagation_from_epw is enabled.'
283 call messages_fatal(1, namespace=namespace)
284 end if
285 call messages_print_var_value('EPWBandLowest', this%istart, namespace=namespace)
286 end if
287
288 ! Sanity checks
289 if (this%calculation_mode == option__tddmpropagation__collision_integral .and. this%strategy /= 3) then
290 message(1) = "Warning: TDDMPropagation_from_epw is required for collision integral."
291 message(2) = " Add TDDMPropagation_from_epw and rerun."
292 call messages_fatal(2, namespace=namespace)
293 end if
294
295 call parse_variable(namespace, 'ExtraStates', 0, nempty)
296 call parse_variable(namespace, 'ExtraStatesInPercent', m_zero, nempty_percent)
297 if (nempty == 0 .and. nempty_percent < m_epsilon) then
298 message(1) = "Warning: TDDMPropagation requires a number of empty states."
299 message(2) = " Add ExtraStates and rerun."
300 call messages_fatal(2, namespace=namespace)
301 end if
302
303 if (st%parallel_in_states) then
304 message(1) = "Warning: TDDMPropagation does not support parallel states."
305 message(2) = " Remove ParallelStates and rerun."
306 call messages_fatal(2, namespace=namespace)
307 end if
308
309 if (st%d%ispin == spin_polarized .and. this%strategy == 3) then
310 call messages_not_implemented('Spin-polarized TDDMPropagation with EPW', namespace=namespace)
311 end if
312
313 pop_sub(dmp_init)
314
315 end subroutine dmp_init
316
318 subroutine dm_propagation_init_run(dmp, namespace, space, gr, ions, st, hm, mc, from_scratch)
319 type(dmp_t), intent(inout) :: dmp
320 type(namespace_t), intent(in) :: namespace
321 type(electron_space_t), intent(in) :: space
322 type(grid_t), intent(in) :: gr
323 type(ions_t), intent(in) :: ions
324 type(states_elec_t), intent(in) :: st
325 type(hamiltonian_elec_t), intent(in) :: hm
326 type(multicomm_t), intent(in) :: mc
327 logical, intent(in) :: from_scratch
328
329 integer :: ierr
330 type(restart_t) :: restart_load
331
333
334 ! By default, wavefunctions are copied. In principle, exclude_eigenval should be true.
335 ! But we need its allocation
336 call states_elec_copy(dmp%adiabatic_st, st, special=.true.)
337
338 if (from_scratch .or. dmp%basis == option__tddmpropagationbasis__groundstate) then
339 call restart_load%init(namespace, restart_gs, restart_type_load, mc, ierr, mesh=gr, exact=.true.)
340 else
341 call restart_load%init(namespace, restart_dm, restart_type_load, mc, ierr, mesh=gr, exact=.true.)
342 end if
343
344 if (ierr == 0) then
345 call states_elec_load(restart_load, namespace, space, dmp%adiabatic_st, gr, hm%kpoints, fixed_occ=st%restart_fixed_occ, &
346 ierr=ierr, label = ": DM-basis")
347 else
348 message(1) = 'Unable to read DM-basis wavefunctions.'
349 call messages_fatal(1, namespace=namespace)
350 end if
351
352 call restart_load%end()
353
354 ! TDDMPropagation_2Times
355 if (dmp%strategy == 2) then
356 safe_allocate(dmp%occ_gs(1:st%nst, 1:st%nik))
357 dmp%occ_gs = dmp%adiabatic_st%occ
358 end if
359
360 ! dmp%ave_trans: replicated on each node, shared within the node
361 if (dmp%strategy == 3) call iopar_open_trans_rate(namespace, ions, hm, st%system_grp, dmp)
362
363 ! record only adiabatic states
364 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
365 call dmp%restart_dump%init(namespace, restart_dm, restart_type_dump, mc, ierr, mesh=gr)
366 end if
367
369 end subroutine dm_propagation_init_run
370
371 subroutine dm_end_run(system_grp, dmp)
372 type(mpi_grp_t), intent(in) :: system_grp
373 type(dmp_t), intent(inout) :: dmp
374
375 push_sub(dm_end_run)
376
377 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
378 call dmp%restart_dump%end()
379 end if
380
381 call states_elec_end(dmp%adiabatic_st)
382
383 if (dmp%strategy == 3) call iopar_close_trans_rate(system_grp, dmp)
384
385 safe_deallocate_a(dmp%occ_gs)
386
387 pop_sub(dm_end_run)
388 end subroutine dm_end_run
389
391 subroutine dm_propagation_run(dmp, namespace, space, gr, ions, st, mc, hm, ks, iter, dt, ext_partners, update_energy)
392 type(dmp_t), intent(inout) :: dmp
393 type(namespace_t), intent(in) :: namespace
394 type(electron_space_t), intent(in) :: space
395 type(grid_t), intent(in) :: gr
396 type(ions_t), intent(in) :: ions
397 type(states_elec_t), intent(inout) :: st
398 type(multicomm_t), intent(in) :: mc
399 type(hamiltonian_elec_t), intent(inout) :: hm
400 type(v_ks_t), intent(inout) :: ks
401 integer, intent(in) :: iter
402 real(real64), intent(in) :: dt
403 type(partner_list_t), intent(in) :: ext_partners
404 logical, optional,intent(in) :: update_energy
405
406 real(real64), parameter :: ZERO = 1.0e-15_real64
407 type(eigensolver_t) :: eigens
408 real(real64) :: nrm2_tdks(st%nst, st%nik), population(3), leak
409 integer :: ik
410 logical :: update_energy_
411 complex(real64), allocatable :: rho_mat_k(:, :, :)
412 type(states_elec_t) :: resd_st
413 complex(real64), allocatable :: overlap_ad_ks(:, :, :), overlap_resd_ks(:, :, :)
414 integer, allocatable :: nresd_k(:)
415 !*
416 push_sub_with_profile(dm_propagation_run)
417
418 ! Update the hamiltonian. hm is updated after each td propagation.
419 ! But ions may move afterthen
420 call hm%update(gr, namespace, space, ext_partners, time=iter * dt)
421 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
422 call eigensolver_init(eigens, namespace, gr, dmp%adiabatic_st, hm, mc, space)
423 eigens%converged = 0
424 call eigens%run(namespace, gr, dmp%adiabatic_st, hm, space, ext_partners, iter)
425 end if
426
427 safe_allocate(overlap_ad_ks(1:st%nst, 1:st%nst, st%d%kpt%start:st%d%kpt%end))
428 safe_allocate(overlap_resd_ks(1:st%nst, 1:st%nst, st%d%kpt%start:st%d%kpt%end))
429 safe_allocate(rho_mat_k(2*st%nst, 2*st%nst, st%d%kpt%start:st%d%kpt%end))
430 safe_allocate(nresd_k(st%d%kpt%start:st%d%kpt%end))
431
432 population = 0.0_real64
433
434 ! Create a copy of the TDKS states to initialise the residuals
435 call states_elec_copy(resd_st, st, exclude_eigenval = .true.)
436
437 ! Store all density matrices for potential future communication
438 do ik = st%d%kpt%start, st%d%kpt%end
439 ! pop = \sum_{i,k} f_{i,k} <\psi_{i,k} | \psi_{i,k}>
440 call total_population(ik, st, gr, nrm2_tdks(:, ik), population(1))
441 ! S^{\phi,\psi}_{ij} = <\phi_i | \psi_j>. one more conjugation is required for this function
442 call zstates_elec_calc_projections(st, dmp%adiabatic_st, namespace, gr, ik, overlap_ad_ks(:, :, ik))
443 overlap_ad_ks(:, :, ik) = conjg(overlap_ad_ks(:, :, ik))
444 ! population before damping, set occupation as well
445 call population_in_adiabatic(ik, dmp%adiabatic_st, st, overlap_ad_ks(:, :, ik), population(2))
446
447 ! residual |d_j> = |\psi_j> - \sum_i S^{\phi,\psi}_{ij} |\phi_i>
448 ! S^{d,\psi}_{ij} = <d_i | \psi_j>
449 call construct_residuals(gr, namespace, dmp%adiabatic_st, st, ik, dmp%othn, overlap_ad_ks(:, :, ik), &
450 nrm2_tdks(:, ik), nresd_k(ik), overlap_resd_ks(:, :, ik), resd_st)
451
452 ! Active block: (nresd+nst, nresd+nst, :), allocated shape: (2*nst, 2*nst, :)
453 call construct_density_matrix(nresd_k(ik), ik, st, overlap_ad_ks(:, :, ik), overlap_resd_ks(:, :, ik), rho_mat_k(:, :, ik))
454 end do
455
456 ! broadcast the instanteous occupation for Pauli blocking
457 call broadcast_occupation(dmp%adiabatic_st%occ, dmp%adiabatic_st%d%kpt, dmp%adiabatic_st%nst, st%parallel_in_states)
458
459 ! dissipate the density matrix
460 call dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)
461
462 do ik = st%d%kpt%start, st%d%kpt%end
463 ! reconstruct wavefunctions. Active block: (nresd+nst, nresd+nst, :), allocated shape: (2*nst, 2*nst, :)
464 call update_st(dmp, ik, gr, namespace, nresd_k(ik), overlap_ad_ks(:, :, ik), overlap_resd_ks(:, :, ik), nrm2_tdks(:, ik), &
465 resd_st, st, rho_mat_k(:, :, ik), population(3))
466 end do
467
468 call states_elec_end(resd_st)
469
470 safe_deallocate_a(overlap_ad_ks)
471 safe_deallocate_a(overlap_resd_ks)
472
473 call broadcast_occupation(st%occ, st%d%kpt, st%nst, st%parallel_in_states)
474
475 safe_deallocate_a(rho_mat_k)
476 safe_deallocate_a(nresd_k)
477
478 call st%d%kpt%mpi_grp%allreduce_inplace(population, 3, mpi_double_precision, mpi_sum)
479
480 write(message(1), '(a,E21.14,a,E21.14,a,E21.14,a)') 'DMPopulation: ', population(1), &
481 ' (', population(2), ' in basis ) before damping; ', population(3), ' after damping'
482 call messages_info(1)
483
484 leak = population(1) - population(3)
485 if (abs(leak) > zero) then
486 write(message(1), '(a,E15.8,a,I8)') 'Leakage: ', leak, '(after damping) at time step', iter
487 call messages_info(1)
488 end if
489 dmp%adiabatic_st%qtot = population(1)
490
491 ! renew hamiltonian
492 call density_calc(st, gr, st%rho)
493 update_energy_ = optional_default(update_energy, .true.)
494 call v_ks_calc(ks, namespace, space, hm, st, ions, ext_partners, calc_eigenval = update_energy_, &
495 time = abs(iter * dt), calc_energy = update_energy_)
496
497 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
498 call eigensolver_end(eigens)
499 end if
500
501 pop_sub_with_profile(dm_propagation_run)
502
503 end subroutine dm_propagation_run
504
507 subroutine total_population(ik, st, gr, nrm2, pop)
508 integer, intent(in) :: ik
509 type(states_elec_t), intent(in) :: st
510 type(grid_t), intent(in) :: gr
511 real(real64), intent(out) :: nrm2(:)
512 real(real64), intent(inout) :: pop
513
514 integer :: ib, i_minst, i_maxst
515
516 push_sub(total_population)
517
518 assert(pop >= 0.0_real64)
519 assert(size(nrm2) == st%nst)
520
521 do ib = st%group%block_start, st%group%block_end
522 i_minst = states_elec_block_min(st, ib)
523 i_maxst = states_elec_block_max(st, ib)
524 call mesh_batch_nrm2(gr, st%group%psib(ib, ik), nrm2(i_minst:i_maxst), reduce = .false.)
525 end do
526 nrm2(1:st%nst) = nrm2(1:st%nst)**2
527 if (gr%parallel_in_domains) then
528 call gr%allreduce(nrm2, dim = st%nst)
529 end if
530
531 pop = pop + sum(st%occ(1:st%nst, ik)* nrm2(1:st%nst)) * st%kweights(ik)
532
533 pop_sub(total_population)
534
535 end subroutine total_population
536
539 subroutine population_in_adiabatic(ik, ad_st, st, overlap, pop)
540 integer, intent(in) :: ik
541 type(states_elec_t), intent(inout) :: ad_st
542 type(states_elec_t), intent(in) :: st
543 complex(real64), intent(in) :: overlap(:, :)
544 real(real64), intent(inout) :: pop
545
546 integer :: ist, jst
547
549
550 assert(ubound(overlap, dim = 1) == ad_st%nst)
551 assert(ubound(overlap, dim = 2) == st%nst)
552
553 ! Assign ad_st occupation and store in dm_proj_basis (also used for hopping rate calculation)
554 ad_st%occ(1:ad_st%nst, ik) = 0.0_real64
555 do jst = 1, st%nst
556 do ist = 1, ad_st%nst
557 ad_st%occ(ist, ik) = ad_st%occ(ist, ik) + &
558 (real(overlap(ist, jst))**2 + aimag(overlap(ist, jst))**2) * st%occ(jst, ik)
559 end do
560 end do
561
562 pop = pop + sum(ad_st%occ(:, ik)) * ad_st%kweights(ik)
563
565
566 end subroutine population_in_adiabatic
567
583 subroutine construct_residuals(gr, namespace, ad_st, st, ik, othn, overlap_ad_ks, nrm2_tdks, nresd, overlap_resd_ks, resd)
584 type(grid_t), intent(in) :: gr
585 type(namespace_t), intent(in) :: namespace
586 type(states_elec_t), intent(in) :: ad_st
587 type(states_elec_t), intent(in) :: st
588 integer, intent(in) :: ik
589 logical, intent(in) :: othn
590 complex(real64), intent(in) :: overlap_ad_ks(:, :)
591 real(real64), intent(in) :: nrm2_tdks(:)
592 integer, intent(out) :: nresd
593 complex(real64), intent(out) :: overlap_resd_ks(:, :)
594 type(states_elec_t), intent(inout) :: resd
595
596 integer :: ib, j, i_minst, i_maxst
597 complex(real64), allocatable :: d_j(:, :), ss(:)
598 real(real64), parameter :: small_rho = 1.0e-14_real64
599 real(real64), parameter :: small_resd = 1.0e-7_real64
600 real(real64) :: nrm_dj, nrm2_dj
601
602 push_sub_with_profile(construct_residuals)
603
604 assert(ubound(overlap_ad_ks, dim = 1) == ad_st%nst)
605 assert(ubound(overlap_ad_ks, dim = 2) == st%nst)
606 assert(ubound(overlap_resd_ks, dim = 1) == resd%nst)
607 assert(ubound(overlap_resd_ks, dim = 2) == st%nst)
608
609 safe_allocate(d_j(1:gr%np, st%d%dim))
610
611 if (.not. othn) then
612
613 ! Iterate over TDKS batches, construct the residuals
614 do j = 1, st%nst
615 call states_elec_get_state(st, gr, j, ik, d_j)
616 ! Compute |\psi_j > - sum_i S^{\phi,\psi}_{ij} |\phi_i >
617 do ib = ad_st%group%block_start, ad_st%group%block_end
618 i_minst = states_elec_block_min(ad_st, ib)
619 i_maxst = states_elec_block_max(ad_st, ib)
620 call zbatch_axpy_function(gr%np, -overlap_ad_ks(i_minst:i_maxst, j), &
621 ad_st%group%psib(ib, ik), d_j)
622 enddo
623 ! reset the residuals
624 call states_elec_set_state(resd, gr, j, ik, d_j)
625 enddo
626
627 ! Overlap S^{d,\psi} = < d | \psi>
628 nresd = st%nst
629 call zstates_elec_calc_projections(st, resd, namespace, gr, ik, overlap_resd_ks)
630 overlap_resd_ks = conjg(overlap_resd_ks)
631
632 else
633
634 ! Construct the orthonormalized residuals
635 safe_allocate(ss(1:st%nst))
636
637 nresd = 0
638 ! Iterate over TDKS states, construct the linearly-independent terms as residuals
639 do j = 1, resd%nst
640 ! nrm2_dj = nrm2_tdks - \sum_i |S^{\phi,\psi}_{ij}|^2
641 nrm2_dj = nrm2_tdks(j) - sum(real(overlap_ad_ks(:, j))**2) - sum(aimag(overlap_ad_ks(:, j))**2)
642 if (nrm2_dj > small_rho) then
643 ! scale the TDKS wavefunction to make the residual terms approximately normalized
644 call states_elec_get_state(st, gr, j, ik, d_j)
645 nrm_dj = sqrt(nrm2_dj)
646 call lalg_scal(gr%np, resd%d%dim, m_one / nrm_dj, d_j)
647
648 ! substract the overlap with ad states
649 ! Compute 1/|d_j| |\psi_j > - 1/|d_j| sum_i S^{\phi,\psi}_{ij} |\phi_i >
650 do ib = ad_st%group%block_start, ad_st%group%block_end
651 i_minst = states_elec_block_min(ad_st, ib)
652 i_maxst = states_elec_block_max(ad_st, ib)
653 call zbatch_axpy_function(gr%np, -overlap_ad_ks(i_minst:i_maxst, j) / nrm_dj, &
654 ad_st%group%psib(ib, ik), d_j)
655 enddo
656 call zstates_elec_orthogonalize_single_batch(ad_st, gr, ad_st%nst, ik, d_j)
657 ! substract the overlap with previous residuals
658 if (nresd > 0) then
659 call zstates_elec_orthogonalize_single_batch(resd, gr, nresd, ik, d_j)
660 end if
661 ! screen the small terms
662 nrm_dj = zmf_nrm2(gr, resd%d%dim, d_j, reduce=.true.)
663 if (nrm_dj > small_resd) then
664 call lalg_scal(gr%np, resd%d%dim, m_one / nrm_dj, d_j)
665 nresd = nresd + 1
666 call states_elec_set_state(resd, gr, nresd, ik, d_j)
667 else
668 cycle
669 end if
670 ! Overlap matrix S^{d,\psi}_{ji} = < d_j | \psi_i >
671 do ib = st%group%block_start, st%group%block_end
672 i_minst = states_elec_block_min(st, ib)
673 i_maxst = states_elec_block_max(st, ib)
674 call zmesh_batch_mf_dotp(gr, st%group%psib(ib, ik), d_j, ss(i_minst:i_maxst), reduce = .false., nst = i_maxst-i_minst+1)
675 end do
676
677 overlap_resd_ks(nresd, 1:st%nst) = conjg(ss(1:st%nst))
678 end if
679 enddo
680
681 overlap_resd_ks(nresd+1:resd%nst, :) = m_z0
682 if (gr%parallel_in_domains) then
683 call gr%allreduce(overlap_resd_ks)
684 end if
685
686 safe_deallocate_a(ss)
687
688 end if
689
690 safe_deallocate_a(d_j)
691
692 pop_sub_with_profile(construct_residuals)
693
694 end subroutine construct_residuals
695
711 subroutine construct_density_matrix(nresd, ik, st, overlap_ad_ks, overlap_resd_ks, rho_mat)
712 integer, intent(in) :: nresd
713 integer, intent(in) :: ik
714 type(states_elec_t), intent(in) :: st
715 complex(real64), intent(in) :: overlap_ad_ks(:, :)
716 complex(real64), intent(in) :: overlap_resd_ks(:, :)
717 complex(real64), intent(out) :: rho_mat(:, :)
718
719 integer :: ist, jst, lst
720 real(real64) :: sqrt_f
721 complex(real64), allocatable :: s_ad_scaled(:, :), s_resd_scaled(:, :)
722
724
725 assert(ubound(overlap_ad_ks, dim = 1) == st%nst)
726 assert(ubound(overlap_ad_ks, dim = 2) == st%nst)
727 assert(ubound(overlap_resd_ks, dim = 1) == st%nst)
728 assert(ubound(overlap_resd_ks, dim = 2) == st%nst)
729 assert(ubound(rho_mat, dim = 1) == 2*st%nst)
730 assert(ubound(rho_mat, dim = 2) == 2*st%nst)
731
732 safe_allocate(s_ad_scaled(st%nst, st%nst))
733 if (nresd > 0) then
734 safe_allocate(s_resd_scaled(nresd, st%nst))
735 end if
736
737 ! set those beyond (nst+nresd) elements to zero
738 rho_mat = 0.0_real64
739
740 ! scale S matrix
741 do lst = 1, st%nst
742 sqrt_f = sqrt(st%occ(lst, ik))
743 s_ad_scaled(1:st%nst, lst) = overlap_ad_ks(1:st%nst, lst) * sqrt_f
744 if (nresd > 0) then
745 s_resd_scaled(1:nresd, lst) = overlap_resd_ks(1:nresd, lst) * sqrt_f
746 end if
747 end do
748
749 ! ad-ad
750 call blas_herk('U', 'N', st%nst, st%nst, 1.0_real64, s_ad_scaled(1,1), st%nst, 0.0_real64, rho_mat(1, 1), 2*st%nst)
751 if (nresd > 0) then
752 ! resd-resd
753 call blas_herk('U', 'N', nresd, st%nst, 1.0_real64, s_resd_scaled(1,1), nresd, 0.0_real64, &
754 rho_mat(st%nst + 1, st%nst + 1), 2*st%nst)
755 ! ad-resd
756 call blas_gemm('N', 'C', st%nst, nresd, st%nst, m_z1, s_ad_scaled(1,1), st%nst, s_resd_scaled(1,1), &
757 nresd, m_z0, rho_mat(1, st%nst + 1), 2*st%nst)
758 end if
759
760 !$omp parallel do private(jst, ist)
761 do jst = 1, st%nst
762 ! mirror ad-ad lower triangle
763 do ist = 1, jst - 1
764 rho_mat(jst, ist) = conjg(rho_mat(ist, jst))
765 end do
766
767 ! mirror ad-resd to resd-ad (bottom-left block)
768 do ist = 1, nresd
769 rho_mat(ist + st%nst, jst) = conjg(rho_mat(jst, ist + st%nst))
770 end do
771 end do
772 !$omp end parallel do
773
774 !$omp parallel do private(jst, ist)
775 do jst = 1, nresd
776 ! mirror resd-resd lower triangle
777 do ist = 1, jst - 1
778 rho_mat(jst + st%nst, ist + st%nst) = conjg(rho_mat(ist + st%nst, jst + st%nst))
779 end do
780 end do
781 !$omp end parallel do
782
783 safe_deallocate_a(s_ad_scaled)
784 if (nresd > 0) then
785 safe_deallocate_a(s_resd_scaled)
786 end if
787
789 end subroutine construct_density_matrix
790
791 subroutine broadcast_occupation(occ, kpt, nst, parstate)
792 real(real64), intent(inout) :: occ(:, :)
793 type(distributed_t), intent(in) :: kpt
794 integer, intent(in) :: nst
795 logical, intent(in) :: parstate
796
797 integer :: incount
798 integer, allocatable :: rdispls(:), recvcnts(:)
799 real(real64), allocatable :: sendbuffer(:, :)
800
801 push_sub_with_profile(broadcast_occupation)
802
803 assert(ubound(occ, dim = 1) == nst)
804 assert(ubound(occ, dim = 2) == kpt%nglobal)
805 assert(.not. parstate)
807 if (kpt%parallel) then
808 safe_allocate(recvcnts(1:kpt%mpi_grp%size))
809 safe_allocate(rdispls(1:kpt%mpi_grp%size))
810 safe_allocate(sendbuffer(1:nst, kpt%nlocal))
811
812 incount = nst * kpt%nlocal
813 recvcnts(:) = nst * kpt%num(:)
814 sendbuffer(1:nst, 1:kpt%nlocal) = occ(:, kpt%start:kpt%end)
815
816 call mpi_displacements(recvcnts, rdispls)
817 ! send buffer and receive buffer can not be the same address
818 call kpt%mpi_grp%allgatherv(sendbuffer, incount, mpi_double_precision, &
819 occ, recvcnts, rdispls, mpi_double_precision)
820
821 safe_deallocate_a(sendbuffer)
822 safe_deallocate_a(recvcnts)
823 safe_deallocate_a(rdispls)
824 end if
825
826 pop_sub_with_profile(broadcast_occupation)
827
828 end subroutine broadcast_occupation
829
845 subroutine dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)
846 type(hamiltonian_elec_t), intent(in) :: hm
847 type(states_elec_t), intent(in) :: st
848 type(namespace_t), intent(in) :: namespace
849 integer, intent(in) :: nresd_k(:)
850 real(real64), intent(in) :: dt
851 type(dmp_t), intent(inout) :: dmp
852 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
853
854 push_sub_with_profile(dissipation)
855
856 assert(ubound(nresd_k, dim = 1) == dmp%adiabatic_st%d%kpt%nlocal)
857 assert(ubound(rho_mat_k, dim = 1) == 2*dmp%adiabatic_st%nst)
858 assert(ubound(rho_mat_k, dim = 2) == 2*dmp%adiabatic_st%nst)
859 assert(ubound(rho_mat_k, dim = 3) == dmp%adiabatic_st%d%kpt%nlocal)
860
861 if (dmp%calculation_mode == option__tddmpropagation__master_equation) then
862 select case (dmp%strategy)
863 case (1)
864 call lindblad_uniform(dmp, st%d%kpt, nresd_k, dt, rho_mat_k)
865 case (2)
866 call lindblad_2times(dmp, st%d%kpt, nresd_k, dt, rho_mat_k)
867 case (3)
868 call lindblad_from_epw(dmp, hm, st%d%kpt, st%system_grp, namespace, nresd_k, dt, rho_mat_k)
869 end select
870 else if (dmp%calculation_mode == option__tddmpropagation__collision_integral) then
871 call collision_from_epw(dmp, hm, st%d%kpt, st%system_grp, namespace, nresd_k, dt, rho_mat_k)
872 end if
873
874 pop_sub_with_profile(dissipation)
875
876 end subroutine dissipation
877
882 subroutine lindblad_uniform(dmp, kpt, nresd_k, dt, rho_mat_k)
883 type(dmp_t), intent(in) :: dmp
884 type(distributed_t), intent(in) :: kpt
885 integer, intent(in) :: nresd_k(:)
886 real(real64), intent(in) :: dt
887 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
888
889 real(real64) :: coeff
890 real(real64), allocatable :: rtrans(:, :)
891 complex(real64), allocatable :: rho_in(:, :), rho_out(:, :), rho_res(:, :), rho_tmp(:, :)
892 integer :: iorder, nst, ik, ik_, nresd
893 integer, parameter :: norder = 4
894
895 push_sub_with_profile(lindblad_uniform)
896
897 nst = dmp%adiabatic_st%nst
898
899 ! lindblad operate on the density matrix
900 safe_allocate(rtrans(1:nst, 1:nst))
901 safe_allocate(rho_in(1:2*nst, 1:2*nst))
902 safe_allocate(rho_out(1:2*nst, 1:2*nst))
903 safe_allocate(rho_res(1:2*nst, 1:2*nst))
904
905 do ik = kpt%start, kpt%end
906 ik_ = ik - kpt%start + 1
907 nresd = nresd_k(ik_)
908
909 call transition_rate_uniform(dmp%uniform, dmp%adiabatic_st, ik, rtrans)
910
911 rho_res = 0.0_real64
912 rho_in(1:2*nst, 1:2*nst) = rho_mat_k(1:2*nst, 1:2*nst, ik_)
913
914 coeff = m_one
915 do iorder = 1, norder-1
916
917 call lindblad_operator_uniform(nst, nresd, rtrans, rho_in, rho_out)
918
919 coeff = coeff * dt / iorder
920 rho_res = rho_res + coeff * rho_out
921 ! swap the pointer between rho_in and rho_out, so new rho_in is current rho_out,
922 ! and new rho_out is the prior input that will get overwritten
923 call move_alloc(rho_in, rho_tmp)
924 call move_alloc(rho_out, rho_in)
925 call move_alloc(rho_tmp, rho_out)
926 end do
927 ! iorder == norder
928 call lindblad_operator_uniform(nst, nresd, rtrans, rho_in, rho_out)
929 coeff = coeff * dt / norder
930 rho_res = rho_res + coeff * rho_out
931
932 rho_mat_k(1:2*nst, 1:2*nst, ik_) = rho_mat_k(1:2*nst, 1:2*nst, ik_) + rho_res
933 end do
934
935 safe_deallocate_a(rho_in)
936 safe_deallocate_a(rho_out)
937 safe_deallocate_a(rtrans)
938 safe_deallocate_a(rho_res)
939
940 pop_sub_with_profile(lindblad_uniform)
941
942 end subroutine lindblad_uniform
943
948 subroutine transition_rate_uniform(uniform, ad_st, ik, rtrans)
949 real(real64), intent(in) :: uniform(:)
950 type(states_elec_t), intent(in) :: ad_st
951 integer, intent(in) :: ik
952 real(real64), intent(out) :: rtrans(:, :)
953
954 integer :: ist, jst, nst
955 real(real64) :: rate_character, omega, inv_omega, nph, delta_e
956 real(real64), parameter :: small_e = 0.002_real64/(m_two*p_ry), large_e = 0.5_real64/(m_two*p_ry)
957 real(real64) :: unocc_ist, unocc_jst
958
960
961 nst = ad_st%nst
962 assert(ubound(rtrans, dim = 1) == nst)
963 assert(ubound(rtrans, dim = 2) == nst)
964
965 rate_character = 1 / uniform(1)
966 omega = units_to_atomic(unit_kelvin, uniform(2))
967 inv_omega = 1.0_real64 / max(omega, 1.0e-12_real64)
968
969 rtrans = m_zero
970 ! rtrans(ist, jst) is the transition rate from jst to ist
971 do ist = 1, nst
972 unocc_ist = 1.0_real64 - ad_st%occ(ist, ik) / ad_st%smear%el_per_state
973
974 do jst = ist + 1, nst
975 delta_e = ad_st%eigenval(jst, ik) - ad_st%eigenval(ist, ik)
976 if (delta_e < small_e) cycle
978 if (delta_e < large_e) then
979 nph = 1.0_real64/(exp(delta_e*inv_omega) - 1.0_real64)
980 else
981 nph = m_zero
982 end if
983 unocc_jst = 1.0_real64 - ad_st%occ(jst, ik) / ad_st%smear%el_per_state
984
985 rtrans(ist, jst) = merge(rate_character * unocc_ist * (nph + 1), m_zero, unocc_ist > m_zero)
986 rtrans(jst, ist) = merge(rate_character * unocc_jst * nph, m_zero, unocc_jst > m_zero)
987 end do
988 end do
989
991
992 end subroutine transition_rate_uniform
993
999 subroutine lindblad_operator_uniform(nst, nresd, rtrans, den_mat, l_mat)
1000 integer, intent(in) :: nst
1001 integer, intent(in) :: nresd
1002 real(real64), intent(in) :: rtrans(:, :)
1003 complex(real64), intent(in) :: den_mat(:, :)
1004 complex(real64), intent(out) :: l_mat(:, :)
1005
1006 integer :: ist, jst, lst
1007
1008 push_sub_with_profile(lindblad_operator_uniform)
1009
1010 assert(ubound(rtrans, dim = 1) == nst)
1011 assert(ubound(rtrans, dim = 2) == nst)
1012 assert(ubound(den_mat, dim = 1) == 2*nst)
1013 assert(ubound(den_mat, dim = 2) == 2*nst)
1014 assert(ubound(l_mat, dim = 1) == 2*nst)
1015 assert(ubound(l_mat, dim = 2) == 2*nst)
1016
1017 l_mat = 0.0_real64
1018
1019 do ist = 1, nst
1020 do jst = 1, nst
1021 if (ist == jst) cycle
1022 do lst = 1, nst + nresd
1023 l_mat(ist, lst) = l_mat(ist, lst) - m_half * rtrans(jst, ist) * den_mat(ist, lst)
1024 l_mat(lst, ist) = l_mat(lst, ist) - m_half * rtrans(jst, ist) * den_mat(lst, ist)
1025 end do
1026 l_mat(jst, jst) = l_mat(jst, jst) + rtrans(jst, ist) * den_mat(ist, ist)
1027 end do
1028 end do
1029
1030 pop_sub_with_profile(lindblad_operator_uniform)
1031
1032 end subroutine lindblad_operator_uniform
1033
1041 subroutine lindblad_2times(dmp, kpt, nresd_k, dt, rho_mat_k)
1042 type(dmp_t), intent(in) :: dmp
1043 type(distributed_t), intent(in) :: kpt
1044 integer, intent(in) :: nresd_k(:)
1045 real(real64), intent(in) :: dt
1046 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
1047
1048 real(real64) :: decay_T1, decay_T2
1049 integer :: ist, jst, nst, ik, ik_, nresd
1050
1051 push_sub_with_profile(lindblad_2times)
1052
1053 nst = dmp%adiabatic_st%nst
1054
1055 decay_t1 = exp(-dt / dmp%tmodel(1))
1056 decay_t2 = exp(-dt / dmp%tmodel(2))
1057
1058 do ik = kpt%start, kpt%end
1059 ik_ = ik - kpt%start + 1
1060 nresd = nresd_k(ik_)
1061
1062 do ist = 1, nst
1063 rho_mat_k(ist, ist, ik_) = dmp%occ_gs(ist, ik_) + (rho_mat_k(ist, ist, ik_) - dmp%occ_gs(ist, ik_)) * decay_t1
1064 end do
1065 do ist = nst + 1, nst + nresd
1066 rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * decay_t1
1067 end do
1068 do ist = 1, nst + nresd
1069 do jst = ist + 1, nst + nresd
1070 rho_mat_k(jst, ist, ik_) = rho_mat_k(jst, ist, ik_) * decay_t2
1071 rho_mat_k(ist, jst, ik_) = conjg(rho_mat_k(jst, ist, ik_))
1072 end do
1073 end do
1074 end do
1075
1076 pop_sub_with_profile(lindblad_2times)
1077 end subroutine lindblad_2times
1078
1082 subroutine lindblad_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
1083 type(dmp_t), intent(inout) :: dmp
1084 type(hamiltonian_elec_t), intent(in) :: hm
1085 type(distributed_t), intent(in) :: kpt
1086 type(mpi_grp_t), intent(in) :: system_grp
1087 type(namespace_t), intent(in) :: namespace
1088 integer, intent(in) :: nresd_k(:)
1089 real(real64), intent(in) :: dt
1090 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
1091
1092 real(real64) :: coeff
1093 real(real64), allocatable :: rho_diag(:, :)
1094 complex(real64), allocatable :: rho_in(:, :, :), rho_out(:, :, :), rho_tmp(:, :, :)
1095 integer, parameter :: norder = 4
1096 integer :: ist, iorder, ik, nst, ik_, nik
1097
1098 push_sub_with_profile(lindblad_from_epw)
1099
1100 nst = dmp%adiabatic_st%nst
1101 nik = dmp%adiabatic_st%nik
1102
1103 call dmp%update_trans_rate(hm, system_grp, namespace)
1104
1105 ! the diagonal elements of L\rho is initialized to occupation
1106 safe_allocate_source_a(rho_diag, dmp%adiabatic_st%occ)
1107
1108 ! L_D[rho_in] = rho_out
1109 safe_allocate_source(rho_in, rho_mat_k)
1110 safe_allocate(rho_out(1:2*nst, 1:2*nst, 1:kpt%nlocal))
1111
1112 ! dimensition of rho_out should be modified later
1113 coeff = 1.0_real64
1114 do iorder = 1, norder - 1
1115
1116 coeff = coeff * dt / iorder
1117
1118 do ik = kpt%start, kpt%end
1119 ik_ = ik - kpt%start + 1
1120
1121 call lindblad_operator_epw(dmp, ik, hm%kpoints%nik_skip, nresd_k(ik_), rho_diag, rho_in(:, :, ik_), rho_out(:, :, ik_))
1122
1123 call lalg_axpy(2*nst, 2*nst, coeff, rho_out(:, :, ik_), rho_mat_k(:, :, ik_))
1124 end do
1125
1126 ! update rho_diag independently
1127 do ik = kpt%start, kpt%end
1128 ik_ = ik - kpt%start + 1
1129 do ist = 1, nst
1130 rho_diag(ist, ik) = real(rho_out(ist, ist, ik_))
1131 end do
1132 end do
1133 ! broadcast occupation for all processes
1134 call broadcast_occupation(rho_diag, kpt, nst, dmp%adiabatic_st%parallel_in_states)
1135
1136 ! swap the pointer between rho_in and rho_out, so new rho_in is current rho_out,
1137 ! and new rho_out is the prior input that will get overwritten
1138 call move_alloc(rho_in, rho_tmp)
1139 call move_alloc(rho_out, rho_in)
1140 call move_alloc(rho_tmp, rho_out)
1141 end do
1142
1143 ! iorder == norder
1144 coeff = coeff * dt / norder
1145
1146 do ik = kpt%start, kpt%end
1147 ik_ = ik - kpt%start + 1
1148
1149 call lindblad_operator_epw(dmp, ik, hm%kpoints%nik_skip, nresd_k(ik_), rho_diag, rho_in(:, :, ik_), rho_out(:, :, ik_))
1150
1151 call lalg_axpy(2*nst, 2*nst, coeff, rho_out(:, :, ik_), rho_mat_k(:, :, ik_))
1152 end do
1153
1154
1155 safe_deallocate_a(rho_in)
1156 safe_deallocate_a(rho_out)
1157 safe_deallocate_a(rho_diag)
1158
1159 pop_sub_with_profile(lindblad_from_epw)
1160 end subroutine lindblad_from_epw
1161
1171 subroutine collision_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
1172 type(dmp_t), intent(inout) :: dmp
1173 type(hamiltonian_elec_t), intent(in) :: hm
1174 type(distributed_t), intent(in) :: kpt
1175 type(mpi_grp_t), intent(in) :: system_grp
1176 type(namespace_t), intent(in) :: namespace
1177 integer, intent(in) :: nresd_k(:)
1178 real(real64), intent(in) :: dt
1179 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
1180
1181 real(real64) :: gam, gam_in, gam_out
1182 real(real64), allocatable :: gam_bnd(:)
1183 real(real64), parameter :: gthresh = 1.0e-8_real64
1184 integer :: ist, jst, ik, nst, nresd, ik_, nik_skip
1185
1186 push_sub_with_profile(collision_from_epw)
1187
1188 nst = dmp%adiabatic_st%nst
1189 nik_skip = hm%kpoints%nik_skip
1190
1191 safe_allocate(gam_bnd(2*nst))
1192
1193 call dmp%update_trans_rate(hm, system_grp, namespace)
1194
1195 ! off-diagonals
1196 do ik = kpt%start, kpt%end
1197 ik_ = ik - kpt%start + 1
1198 nresd = nresd_k(ik_)
1199
1200 ! pre-calculate gamma
1201 gam_bnd = 0.0_real64
1202 do ist = dmp%istart, dmp%iend
1203 call lifetime(dmp, ik, ist, nik_skip, gam_bnd(ist))
1204 end do
1205
1206 do ist = 1, nst + nresd
1207 do jst = ist + 1, nst + nresd
1208 gam = -(gam_bnd(ist) + gam_bnd(jst)) / 2.0_real64
1209
1210 ! damping
1211 rho_mat_k(jst, ist, ik_) = rho_mat_k(jst, ist, ik_) * exp(gam * dt)
1212 rho_mat_k(ist, jst, ik_) = conjg(rho_mat_k(jst, ist, ik_))
1213 end do
1214 end do
1215 end do
1216
1217 ! diagonals, \dot{f} = -gam_out * f + gam_in, gives analytical solution
1218 do ik = kpt%start, kpt%end
1219 ik_ = ik - kpt%start + 1
1220 do ist = dmp%istart, dmp%iend
1221 call get_gamma(dmp, ik, ist, nik_skip, gam_in, gam_out)
1222 if (gam_out * dt > gthresh) then
1223 rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * exp(-gam_out * dt) &
1224 + (gam_in / gam_out) * (1.0_real64 - exp(-gam_out * dt))
1225 else
1226 ! when denominator divergent
1227 rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * (1.0_real64 - gam_out * dt) + gam_in * dt
1228 end if
1229 end do
1230 end do
1231
1232 safe_deallocate_a(gam_bnd)
1233
1234 pop_sub_with_profile(collision_from_epw)
1235 end subroutine collision_from_epw
1236
1238 subroutine dm_propagation_update_trans_rate(this, hm, system_grp, namespace)
1239 class(dmp_t), intent(inout) :: this
1240 type(hamiltonian_elec_t), intent(in) :: hm
1241 type(mpi_grp_t), intent(in) :: system_grp
1242 type(namespace_t), intent(in) :: namespace
1243
1244 integer :: ia
1245
1246 push_sub_with_profile(dmp_propagation_update_trans_rate)
1247
1248 ia = get_vector_field_index(this, hm, namespace)
1249
1250 if (ia /= this%ia) then
1251 call iopar_read_trans_rate(ia, system_grp, namespace, this)
1252 this%ia = ia
1253 end if
1254
1255 pop_sub_with_profile(dmp_propagation_update_trans_rate)
1256
1258
1260 subroutine iopar_open_trans_rate(namespace, ions, hm, system_grp, dmp)
1261 type(namespace_t), intent(in) :: namespace
1262 type(ions_t), intent(in) :: ions
1263 type(hamiltonian_elec_t), intent(in) :: hm
1264 type(mpi_grp_t), intent(in) :: system_grp
1265 type(dmp_t), intent(inout) :: dmp
1267 integer :: ierr, iostat, idim, idir, iqq, totq
1268 integer :: epw_nk(3), oct_nk(3)
1269 real(real64) :: oct_s(3), at(3,3)
1270
1271 push_sub(iopar_open_trans_rate)
1272
1273 ! metadata
1274 if (system_grp%is_root()) then
1275 open(newunit=dmp%iunit, file=trim(dmp%epw_file), form='unformatted', access='stream', status='old', &
1276 action='read', iostat=iostat)
1277 if (iostat /= 0) then
1278 dmp%iunit = -1
1279 write(message(1), '(a,a)') 'Error opening file: ', trim(dmp%epw_file)
1280 call messages_fatal(1, namespace=namespace)
1281 end if
1282 !
1283 read(dmp%iunit, iostat=ierr) iqq, totq, dmp%wnst, epw_nk(1:3), at, oct_nk(1:3), oct_s(1:3), dmp%astep(1:3), dmp%na(1:3)
1284 if (ierr /= 0) then
1285 write(message(1), '(a,a)') 'Error reading header from: ', trim(dmp%epw_file)
1286 call messages_fatal(1, namespace=namespace)
1287 end if
1288 !
1289 end if
1290
1291 call system_grp%bcast(dmp%wnst, 1, mpi_integer, 0)
1292 call system_grp%bcast(at, 9, mpi_double_precision, 0)
1293 call system_grp%bcast(oct_s, 3, mpi_double_precision, 0)
1294 call system_grp%bcast(dmp%astep, 3, mpi_double_precision, 0)
1295 call system_grp%bcast(dmp%na, 3, mpi_integer, 0)
1296 call system_grp%bcast(epw_nk, 3, mpi_integer, 0)
1297 call system_grp%bcast(oct_nk, 3, mpi_integer, 0)
1298
1299 dmp%iend = dmp%istart + dmp%wnst - 1
1300
1301 if (any(oct_nk /= hm%kpoints%nik_axis)) then
1302 write(message(1), '(a, a)') 'Inconsistent k-point mesh in KPointsGrid and ', trim(dmp%epw_file)
1303 call messages_fatal(1, namespace=namespace)
1304 end if
1305 if (any(abs(oct_s - hm%kpoints%full%shifts(:, 1)) > m_epsilon)) then
1306 write(message(1), '(a, a)') 'Inconsistent k-point mesh shifts in KPointsGrid and ', trim(dmp%epw_file)
1307 call messages_fatal(1, namespace=namespace)
1308 end if
1309 !
1310 write(message(1), '(3(a,i0), a)') 'Info: Averaged transition rates obtained from a ', epw_nk(1), ' x ', &
1311 epw_nk(2), ' x ', epw_nk(3), ' EPW k-point mesh'
1312 call messages_info(1, namespace=namespace)
1313 write(message(1), '(a, i0, a, i0)') 'Info: Damping band ', dmp%istart, ' - ', dmp%iend
1314 call messages_info(1, namespace=namespace)
1315 write(message(1),'(a)') ' EPW Lattice Vectors [1/alat]'
1316 do idim = 1, 3
1317 write(message(1+idim),'(3f12.6)') (at(idir, idim), idir = 1, 3)
1318 end do
1319 call messages_info(4, namespace=namespace)
1320 if (any(abs(at - ions%latt%rlattice_primitive) > m_epsilon)) then
1321 write(message(1),'(a)') 'Lattice settings are not fully consistent with those in EPW'
1322 call messages_warning(1, namespace=namespace)
1323 end if
1324
1325 call build_epw_kmap(namespace, hm%kpoints, dmp)
1326
1327 ! initialize vector field grid index
1328 dmp%ia = -1
1329
1330 ! shared memory for transition rates
1331 dmp%num = int(product(oct_nk), kind=int64)**2 * int(dmp%wnst, kind=int64)**2
1332#ifdef HAVE_MPI
1333 ! create shared memory window and fill it only on root
1334 call create_intranode_communicator(system_grp, dmp%intranode_grp, dmp%internode_grp)
1335 ! We inline the logic of lmpi_create_shared_memory_window because single precision is not supported.
1336 call slmpi_create_shared_memory_window(dmp%num, dmp%intranode_grp, dmp%window_trans_rate, ave_trans)
1337#else
1338 safe_allocate(ave_trans(1:dmp%num))
1339#endif
1340
1341 pop_sub(iopar_open_trans_rate)
1342 end subroutine iopar_open_trans_rate
1343
1345 subroutine iopar_read_trans_rate(ia, system_grp, namespace, dmp)
1346 integer, intent(in) :: ia
1347 type(mpi_grp_t), intent(in) :: system_grp
1348 type(namespace_t), intent(in) :: namespace
1349 type(dmp_t), intent(inout) :: dmp
1350
1351 integer(int64), parameter :: header_bytes = 168 ! head info in binary file
1352 integer(int64), parameter :: epw_bytes = 4 ! single precision
1353 integer(int64) :: offset
1354 integer :: ierr
1356 push_sub(iopar_read_trans_rate)
1357
1358 if (system_grp%is_root()) then
1359 ! single precision
1360 offset = header_bytes + int(ia - 1, kind=int64) * dmp%num * epw_bytes + 1_int64
1361 read(dmp%iunit, pos=offset, iostat=ierr) ave_trans
1362 if (ierr /= 0) then
1363 write(message(1), '(a,a)') 'Error reading transition rates from: ', trim(dmp%epw_file)
1364 call messages_fatal(1, namespace=namespace)
1365 end if
1366 end if
1367
1368#ifdef HAVE_MPI
1369 ! now broadcast the global arrays to local rank 0 on each node
1370 if (dmp%intranode_grp%rank == 0) then
1371 call smpi_grp_bcast(dmp%internode_grp, ave_trans(1), dmp%num, mpi_real, 0)
1372 end if
1373 call lmpi_sync_shared_memory_window(dmp%window_trans_rate, dmp%intranode_grp)
1374#endif
1375
1376 pop_sub(iopar_read_trans_rate)
1377 end subroutine iopar_read_trans_rate
1378
1382 subroutine iopar_close_trans_rate(system_grp, dmp)
1383 type(mpi_grp_t), intent(in) :: system_grp
1384 type(dmp_t), intent(inout) :: dmp
1385
1386 push_sub(iopar_close_trans_rate)
1387
1388 safe_deallocate_a(dmp%kmap)
1389
1390 if (system_grp%is_root()) then
1391 close(dmp%iunit)
1392 end if
1393
1394#ifdef HAVE_MPI
1395 call lmpi_destroy_shared_memory_window(dmp%window_trans_rate)
1396 nullify(ave_trans)
1397#else
1398 safe_deallocate_p(ave_trans)
1399#endif
1400
1401 pop_sub(iopar_close_trans_rate)
1402
1403 end subroutine iopar_close_trans_rate
1404
1406 subroutine build_epw_kmap(namespace, kpoints, dmp)
1407 type(namespace_t), intent(in) :: namespace
1408 type(kpoints_t), intent(in) :: kpoints
1409 type(dmp_t), intent(inout) :: dmp
1410
1411 integer :: ik, idx, nik, nik_mp
1412 integer :: kidx_(3)
1413 real(real64) :: kred(3), kidx(3)
1414 real(real64), parameter :: tol = 1.0d-10
1415
1416 push_sub(build_epw_kmap)
1417
1418 nik = dmp%adiabatic_st%nik
1419 nik_mp = nik - kpoints%nik_skip
1420 safe_allocate(dmp%kmap(nik))
1421
1422 ! mp k-points
1423 dmp%kmap = nik_mp + 1
1424
1425 do ik = 1, nik
1426 ! Map reduced coordinate into an integer MP-grid coordinate
1427 kred = kpoints%get_point(ik, .false.)
1428 kidx = (kred + 0.5_real64) * real(kpoints%nik_axis, kind=real64) - kpoints%full%shifts(:, 1)
1429
1430 ! for MP k-points
1431 if (ik <= nik_mp) then
1432 if (any(abs(kidx - nint(kidx)) > tol)) then
1433 write(message(1), '(a)') 'K-point mesh is not compatible with EPW input'
1434 call messages_fatal(1, namespace=namespace)
1435 end if
1436 end if
1437
1438 ! Fold into central cell in EPW
1439 kidx_ = modulo(nint(kidx), kpoints%nik_axis)
1440 ! Flatten into a single, contiguous index
1441 idx = kidx_(1) * kpoints%nik_axis(2) * kpoints%nik_axis(3) + kidx_(2) * kpoints%nik_axis(3) + &
1442 kidx_(3) + 1
1443
1444 dmp%kmap(ik) = idx
1445 end do
1446
1447 ! sanity check
1448 assert(all(dmp%kmap <= nik_mp))
1449
1450 pop_sub(build_epw_kmap)
1451 end subroutine build_epw_kmap
1452
1454 function get_vector_field_index(dmp, hm, namespace) result(aidx)
1455 type(dmp_t), intent(in) :: dmp
1456 type(hamiltonian_elec_t), intent(in) :: hm
1457 type(namespace_t), intent(in) :: namespace
1458
1459 real(real64) :: ared(3), approx(3)
1460 integer :: apoint_idx(3)
1461 integer :: aidx, idim
1462
1463 push_sub(get_vector_field_index)
1464
1465 ! Find the nearest discrete vector field grid point on the EPW vector field mesh
1466 if (allocated(hm%hm_base%uniform_vector_potential)) then
1467 call kpoints_to_reduced(hm%ions%latt, hm%hm_base%uniform_vector_potential, ared)
1468 else
1469 ared = 0.0_real64
1470 end if
1471 do idim = 1, 3
1472 if (dmp%astep(idim) > m_zero) then
1473 apoint_idx(idim) = nint(ared(idim) / dmp%astep(idim))
1474 approx(idim) = apoint_idx(idim) * dmp%astep(idim)
1475 else
1476 apoint_idx(idim) = 0
1477 approx(idim) = 0.0_real64
1478 end if
1479 end do
1480 if (any(abs(apoint_idx) > dmp%na)) then
1481 write(message(1), '(a, 3F8.3)') 'Vector potential exceeds mesh range: ', ared
1482 call messages_warning(1, namespace=namespace)
1483 where (apoint_idx < -dmp%na)
1484 apoint_idx = -dmp%na
1485 end where
1486 where (apoint_idx > dmp%na)
1487 apoint_idx = dmp%na
1488 end where
1489 end if
1490 ! Get the flattened 1D index
1491 ! Step 1: Find the relative offset by subtracting the minimum index (-na).
1492 ! This shifts the range [-na, na] to an offset [0, 2*na].
1493 ! e.g., if range is [-2, 2], the offset for 1 is 1 - (-2) = 3.
1494 ! Step 2: Flatten the 3D offsets into 1D, and finally add 1 for Fortran 1-based indexing.
1495 aidx = (apoint_idx(1) + dmp%na(1)) * (2 * dmp%na(2) + 1) * (2 * dmp%na(3) + 1) + &
1496 (apoint_idx(2) + dmp%na(2)) * (2 * dmp%na(3) + 1) + &
1497 (apoint_idx(3) + dmp%na(3)) + 1
1498
1499 write(message(1), '(a, x, 3F7.3)') 'Approximated vector field damping grid:', approx
1500 call messages_info(1, namespace=namespace)
1501
1502 pop_sub(get_vector_field_index)
1503 end function get_vector_field_index
1504
1509 function get_trans_rate(dmp, nik_mp, jbnd, ibnd, k, kq, p_block) result(res)
1510 type(dmp_t), intent(in) :: dmp
1511 integer, intent(in) :: nik_mp
1512 integer, intent(in) :: jbnd
1513 integer, intent(in) :: ibnd
1514 integer, intent(in) :: k
1515 integer, intent(in) :: kq
1516 logical, optional, intent(in) :: p_block
1517
1518 real(real64) :: unocc, res
1519 integer :: k_, kq_
1520 integer(int64) :: idx
1521
1522 push_sub(get_trans_rate)
1523
1524 k_ = dmp%kmap(k)
1525 kq_ = dmp%kmap(kq)
1526 ! Flatten 4D transition rate index to 1D: (kq, k, ibnd, jbnd) -> idx
1527 ! Matrix dimensions are [nik_mp, nik_mp, wnst, wnst]
1528 ! The layout follows the hierarchy: kq is the slowest index, jbnd is the fastest.
1529 idx = int(kq_-1, kind=int64) * int(nik_mp, kind=int64) * int(dmp%wnst, kind=int64)**2 + &
1530 int((k_-1) * dmp%wnst**2, kind=int64) + &
1531 int((ibnd-dmp%istart)*dmp%wnst + (jbnd-dmp%istart) + 1, kind=int64)
1532
1533 assert(idx >= 1 .and. idx <= dmp%num)
1534
1535 res = real(ave_trans(idx), kind=real64)
1536 ! Pauli blocking
1537 if (optional_default(p_block, .true.)) then
1538 unocc = 1.0_real64 - dmp%adiabatic_st%occ(jbnd, kq) / dmp%adiabatic_st%smear%el_per_state
1539 res = res * max(unocc, 0.0_real64)
1540 end if
1542 pop_sub(get_trans_rate)
1543 end function get_trans_rate
1544
1552 subroutine lindblad_operator_epw(dmp, ik, nik_skip, nresd, rho_diag, den_mat, l_mat)
1553 type(dmp_t), intent(in) :: dmp
1554 integer, intent(in) :: ik
1555 integer, intent(in) :: nik_skip
1556 integer, intent(in) :: nresd
1557 real(real64), intent(in) :: rho_diag(:, :)
1558 complex(real64), intent(in) :: den_mat(:, :)
1559 complex(real64), intent(out) :: l_mat(:, :)
1560
1561 integer :: lst, nst, nik_mp, ikq, ibnd, jbnd
1562 real(real64) :: tr
1563 push_sub(lindblad_operator_epw)
1564
1565 nst = dmp%adiabatic_st%nst
1566 nik_mp = dmp%adiabatic_st%nik - nik_skip
1567
1568 assert(ubound(rho_diag, dim = 1) == nst)
1569 assert(ubound(rho_diag, dim = 2) == dmp%adiabatic_st%nik)
1570 assert(ubound(den_mat, dim = 1) == 2*nst)
1571 assert(ubound(den_mat, dim = 2) == 2*nst)
1572 assert(ubound(l_mat, dim = 1) == 2*nst)
1573 assert(ubound(l_mat, dim = 2) == 2*nst)
1574
1575 l_mat = 0.0_real64
1576
1577 ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering
1578 ! in both case, ikq is the mp grid.
1579 do ikq = 1, nik_mp
1580 do jbnd = dmp%istart, dmp%iend
1581 do ibnd = dmp%istart, dmp%iend
1582
1583 tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)
1584 do lst = 1, nst + nresd
1585 l_mat(ibnd, lst) = l_mat(ibnd, lst) - m_half * tr * den_mat(ibnd, lst)
1586 l_mat(lst, ibnd) = l_mat(lst, ibnd) - m_half * tr * den_mat(lst, ibnd)
1587 end do
1588 l_mat(ibnd, ibnd) = l_mat(ibnd, ibnd) + get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.true.) &
1589 * rho_diag(jbnd, ikq)
1590 end do
1591 end do
1592 end do
1593
1594 pop_sub(lindblad_operator_epw)
1595
1597
1602 subroutine lifetime(dmp, ik, ibnd, nik_skip, gam)
1603 type(dmp_t), intent(in) :: dmp
1604 integer, intent(in) :: ik
1605 integer, intent(in) :: ibnd
1606 integer, intent(in) :: nik_skip
1607 real(real64), intent(out) :: gam
1608
1609 integer :: nik_mp, ikq, jbnd
1610 real(real64) :: tr
1611 push_sub(lifetime)
1612
1613 assert(ibnd >= dmp%istart .and. ibnd <= dmp%iend)
1614 nik_mp = dmp%adiabatic_st%nik - nik_skip
1615
1616 ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering
1617 ! in both case, ikq is the mp grid.
1618 gam = 0.0_real64
1619 do ikq = 1, nik_mp
1620 do jbnd = dmp%istart, dmp%iend
1621 tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)
1622 gam = gam + tr
1623 tr = get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.false.)
1624 gam = gam + tr * dmp%adiabatic_st%occ(jbnd, ikq) / dmp%adiabatic_st%smear%el_per_state
1625 end do
1626 end do
1627
1628 pop_sub(lifetime)
1629
1630 end subroutine lifetime
1631
1642 subroutine get_gamma(dmp, ik, ibnd, nik_skip, gam_in, gam_out)
1643 type(dmp_t), intent(in) :: dmp
1644 integer, intent(in) :: ik
1645 integer, intent(in) :: ibnd
1646 integer, intent(in) :: nik_skip
1647 real(real64), intent(out) :: gam_in
1648 real(real64), intent(out) :: gam_out
1649
1650 integer :: nik_mp, ikq, jbnd
1651 real(real64) :: tr
1652 push_sub(get_gamma)
1653
1654 assert(ibnd >= dmp%istart .and. ibnd <= dmp%iend)
1655 nik_mp = dmp%adiabatic_st%nik - nik_skip
1656
1657 ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering
1658 ! in both case, ikq is the mp grid.
1659 gam_in = 0.0_real64
1660 gam_out = 0.0_real64
1661 do ikq = 1, nik_mp
1662 do jbnd = dmp%istart, dmp%iend
1663 tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)
1664 gam_out = gam_out + tr
1665 tr = get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.true.)
1666 gam_in = gam_in + tr * dmp%adiabatic_st%occ(jbnd, ikq)
1667 end do
1668 end do
1669
1670 pop_sub(get_gamma)
1671
1672 end subroutine get_gamma
1673
1689 subroutine update_st(dmp, ik, gr, namespace, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, resd, st, rho_mat, pop)
1690 type(dmp_t), intent(inout) :: dmp
1691 integer, intent(in) :: ik
1692 type(grid_t), intent(in) :: gr
1693 type(namespace_t), intent(in) :: namespace
1694 integer, intent(in) :: nresd
1695 complex(real64), intent(in) :: overlap_ad_ks(:, :)
1696 complex(real64), intent(in) :: overlap_resd_ks(:, :)
1697 real(real64), intent(in) :: nrm2_tdks(:)
1698 type(states_elec_t), intent(inout) :: resd
1699 type(states_elec_t), intent(inout) :: st
1700 complex(real64), intent(inout) :: rho_mat(:, :)
1701 real(real64), intent(inout) :: pop
1702
1703 real(real64), allocatable :: occ(:)
1704 complex(real64), allocatable :: overlap(:, :), overlap_ad_resd(:, :)
1705 integer :: ist, jst, nst
1706
1707 push_sub_with_profile(update_st)
1708
1709 nst = dmp%adiabatic_st%nst
1710
1711 assert(ubound(overlap_ad_ks, dim = 1) == nst)
1712 assert(ubound(overlap_ad_ks, dim = 2) == nst)
1713 assert(ubound(overlap_resd_ks, dim = 1) == nst)
1714 assert(ubound(overlap_resd_ks, dim = 2) == nst)
1715 assert(ubound(rho_mat, dim = 1) == 2*nst)
1716 assert(ubound(rho_mat, dim = 2) == 2*nst)
1717 assert(is_hermitian(2*nst, rho_mat))
1718
1719 safe_allocate(occ(1:nst+nresd))
1720
1721 if (dmp%othn) then
1722 call lalg_eigensolve(nst+nresd, rho_mat, occ)
1723 else
1724 assert(nresd == nst)
1725
1726 ! generalized eigenvalue problem
1727 safe_allocate(overlap(1:2*nst, 1:2*nst))
1728 safe_allocate(overlap_ad_resd(1:nst, 1:nst))
1730 ! ad-ad block
1731 overlap = m_z0
1732 do ist = 1, nst
1733 overlap(ist, ist) = m_one
1734 end do
1735
1736 ! adiabatic - resd, zstates_elec_calc_projections returns the conjugate
1737 call zstates_elec_calc_projections(resd, dmp%adiabatic_st, namespace, gr, ik, overlap_ad_resd)
1738 do jst = 1, nresd
1739 do ist = 1, nst
1740 overlap(ist, jst + nst) = conjg(overlap_ad_resd(ist, jst))
1741 enddo
1742 enddo
1743
1744 ! resd - resd
1745 ! _i<d | d>_j = S^{d,\psi}_{ij} - sum_k S^{\phi,\psi}_{kj} S^{d,\phi}_{ik}
1746 overlap(nst+1:nst+nresd, nst+1:nst+nresd) = overlap_resd_ks(1:nresd, 1:nresd)
1747 call blas_gemm('T', 'N', nresd, nresd, nst, -m_z1, overlap_ad_resd(1, 1), nst, overlap_ad_ks(1, 1), nst, &
1748 m_z1, overlap(nst+1, nst+1), 2*nst)
1749
1750 ! only the upper triangle of overlap is needed
1751 call lalg_geneigensolve(nst+nresd, rho_mat, overlap, occ, preserve_mat=.false.)
1752
1753 safe_deallocate_a(overlap_ad_resd)
1754 safe_deallocate_a(overlap)
1755
1756 end if
1757
1758 if (dmp%unitary_transform) then
1759 call update_wfc_occ_procrustes(ik, dmp%adiabatic_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, &
1760 nrm2_tdks, occ, rho_mat, st, pop)
1761 else
1762 call update_wfc_occ(ik, dmp%adiabatic_st, resd, gr, nresd, occ, rho_mat, st, pop)
1763 end if
1764
1765 safe_deallocate_a(occ)
1766
1767 pop_sub_with_profile(update_st)
1768
1769 end subroutine update_st
1770
1771
1777 subroutine update_wfc_occ(ik, ad_st, resd, gr, nresd, occ, v_mat, st, pop)
1778 integer, intent(in) :: ik
1779 type(states_elec_t), intent(in) :: ad_st
1780 type(states_elec_t), intent(in) :: resd
1781 type(grid_t), intent(in) :: gr
1782 integer, intent(in) :: nresd
1783 real(real64), intent(in) :: occ(:)
1784 complex(real64), intent(in) :: v_mat(:, :)
1785 type(states_elec_t), intent(inout) :: st
1786 real(real64), intent(inout) :: pop
1787
1788 complex(real64), allocatable :: psi_j(:, :)
1789 integer :: j, ib, i_minst, i_maxst, nst
1790
1791 push_sub_with_profile(update_wfc_occ)
1792
1793 nst = ad_st%nst
1794 assert(ubound(v_mat, dim = 1) == 2*nst)
1795 assert(ubound(v_mat, dim = 2) == 2*nst)
1796
1797 safe_allocate(psi_j(1:gr%np, st%d%dim))
1798
1799 do j = nst+nresd, nst+1, -1
1800 psi_j = (0.0_real64, 0.0_real64)
1801 do ib = ad_st%group%block_start, ad_st%group%block_end
1802 i_minst = states_elec_block_min(ad_st, ib)
1803 i_maxst = states_elec_block_max(ad_st, ib)
1804 call zbatch_axpy_function(gr%np, v_mat(i_minst:i_maxst, j), &
1805 ad_st%group%psib(ib, ik), psi_j)
1806 enddo
1807 do ib = resd%group%block_start, resd%group%block_end
1808 i_minst = states_elec_block_min(resd, ib)
1809 i_maxst = min(states_elec_block_max(resd, ib), nresd)
1810 if (i_minst > nresd) cycle
1811 call zbatch_axpy_function(gr%np, v_mat(i_minst+nst:i_maxst+nst, j), &
1812 resd%group%psib(ib, ik), psi_j, nst=i_maxst-i_minst+1)
1813 enddo
1814 ! reset TDKS state
1815 call states_elec_set_state(st, gr, nst+nresd+1-j, ik, psi_j)
1816 end do
1817
1818 ! eigenvalues in ascending order, so inverse
1819 st%occ(1:nst, ik) = occ(nst+nresd:nresd+1:-1)
1820 pop = pop + sum(st%occ(1:nst, ik)) * st%kweights(ik)
1821
1822 safe_deallocate_a(psi_j)
1823 pop_sub_with_profile(update_wfc_occ)
1824 end subroutine update_wfc_occ
1825
1826
1841 subroutine update_wfc_occ_procrustes(ik, ad_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, &
1842 nrm2_tdks, occ_tilde, v_mat, st, pop)
1843 integer, intent(in) :: ik
1844 type(states_elec_t), intent(in) :: ad_st
1845 type(states_elec_t), intent(in) :: resd
1846 type(grid_t), intent(in) :: gr
1847 integer, intent(in) :: nresd
1848 complex(real64), intent(in) :: overlap_ad_ks(:, :)
1849 complex(real64), intent(in) :: overlap_resd_ks(:, :)
1850 real(real64), intent(in) :: nrm2_tdks(:)
1851 real(real64), intent(in) :: occ_tilde(:)
1852 complex(real64), intent(inout) :: v_mat(:, :)
1853 type(states_elec_t), intent(inout) :: st
1854 real(real64), intent(inout) :: pop
1855
1856 integer :: ist, jst, ib, i_minst, i_maxst, nst
1857 complex(real64), allocatable :: overlap_procrus(:, :), uproj(:, :), utrans(:, :)
1858 complex(real64), allocatable :: uu(:, :), vt(:, :)
1859 complex(real64), allocatable :: psi(:, :)
1860 real(real64), parameter :: small_occ = 5.0e-15_real64 ! for those zero occupations, assume them a small quantity
1861 real(real64) :: sg_values(1:st%nst), rocc_tilde(1:st%nst+nresd), rocc(1:st%nst)
1862 real(real64) :: qtot_transform, nrm2, occ
1863
1864 push_sub_with_profile(update_wfc_occ_procrustes)
1865
1866 nst = ad_st%nst
1867 assert(ubound(overlap_ad_ks, dim = 1) == ad_st%nst)
1868 assert(ubound(overlap_ad_ks, dim = 2) == nst)
1869 assert(ubound(overlap_resd_ks, dim = 1) == resd%nst)
1870 assert(ubound(overlap_resd_ks, dim = 2) == nst)
1871 assert(ubound(v_mat, dim = 1) == 2*nst)
1872 assert(ubound(v_mat, dim = 2) == 2*nst)
1873
1874 ! set rocc for both TDKS and new wfcs. For those zero occupations, set them a small quantity
1875 rocc_tilde(1:nst+nresd) = sqrt(max(occ_tilde(1:nst+nresd), small_occ))
1876 rocc(1:nst) = sqrt(max(st%occ(1:nst, ik), small_occ))
1877
1878 safe_allocate(overlap_procrus(1:nst+nresd, 1:nst))
1879
1880 ! Procrustes Overlap: S \propto (C_{top})^\dagger S_{ad\_ks} + (C_{bot})^\dagger S_{resd\_ks}
1881 ! Computed via in-place BLAS GEMM accumulation ('C' for conjugate transpose),
1882 ! avoiding the temporary memory allocation and copy overhead of native matmul.
1883 call blas_gemm('C', 'N', nst+nresd, nst, nst, m_z1, v_mat(1, 1), 2*nst, overlap_ad_ks(1, 1), nst, &
1884 m_z0, overlap_procrus(1, 1), nst+nresd)
1885 call blas_gemm('C', 'N', nst+nresd, nst, nresd, m_z1, v_mat(1+nst, 1), 2*nst, overlap_resd_ks(1, 1), nst, &
1886 m_z1, overlap_procrus(1, 1), nst+nresd)
1887 do jst = 1, nst
1888 do ist = 1, nst+nresd
1889 overlap_procrus(ist, jst) = overlap_procrus(ist, jst) * rocc_tilde(ist) * rocc(jst)
1890 end do
1891 end do
1892
1893 safe_allocate(uproj(1:nst+nresd, 1:nst))
1894 safe_allocate(uu(1:nst+nresd, 1:nst+nresd))
1895 safe_allocate(vt(1:nst, 1:nst))
1896
1897 call lalg_singular_value_decomp(nst+nresd, nst, overlap_procrus, uu, vt, sg_values)
1898 uproj = matmul(uu(:,1:nst), vt)
1899
1900 safe_deallocate_a(overlap_procrus)
1901 safe_deallocate_a(vt)
1902 safe_deallocate_a(uu)
1903
1904 safe_allocate(utrans(1:nst+nresd, 1:nst))
1905
1906 ! update transformation matrix
1907 do ist = 1, nst+nresd
1908 call blas_scal(nst+nresd, cmplx(rocc_tilde(ist), 0.0, real64), v_mat(1, ist), 1)
1909 end do
1910 ! utrans = matmul(v_mat(1:nst+nresd,1:nst+nresd), uproj)
1911 call blas_gemm('N', 'N', nst+nresd, nst, nst+nresd, m_z1, v_mat(1, 1), 2*nst, uproj(1, 1), nst+nresd, &
1912 m_z0, utrans(1, 1), nst+nresd)
1913
1914 safe_deallocate_a(uproj)
1915 safe_allocate(psi(1:gr%np, st%d%dim))
1916
1917 ! update state and occupations
1918 qtot_transform = 0.0_real64
1919 do jst = 1, nst
1920 psi = (0.0_real64, 0.0_real64)
1921 do ib = ad_st%group%block_start, ad_st%group%block_end
1922 i_minst = states_elec_block_min(ad_st, ib)
1923 i_maxst = states_elec_block_max(ad_st, ib)
1924 call zbatch_axpy_function(gr%np, utrans(i_minst:i_maxst, jst), &
1925 ad_st%group%psib(ib, ik), psi)
1926 end do
1927 do ib = resd%group%block_start, resd%group%block_end
1928 i_minst = states_elec_block_min(resd, ib)
1929 i_maxst = min(states_elec_block_max(resd, ib), nresd)
1930 if (i_minst > nresd) cycle
1931 call zbatch_axpy_function(gr%np, utrans(i_minst+nst:i_maxst+nst, jst), &
1932 resd%group%psib(ib, ik), psi, nst=i_maxst-i_minst+1)
1933 end do
1934 nrm2 = real(zmf_dotp(gr, st%d%dim, psi, psi, reduce = .true.), real64)
1935 qtot_transform = qtot_transform + nrm2
1936 occ = nrm2 / nrm2_tdks(jst)
1937 st%occ(jst, ik) = occ
1938 call lalg_scal(gr%np, st%d%dim, 1.0_real64/sqrt(occ), psi)
1939 call states_elec_set_state(st, gr, jst, ik, psi)
1940 end do
1941
1942 pop = pop + qtot_transform * st%kweights(ik)
1943
1944 safe_deallocate_a(utrans)
1945 safe_deallocate_a(psi)
1946
1947 pop_sub_with_profile(update_wfc_occ_procrustes)
1948
1949 end subroutine update_wfc_occ_procrustes
1950
1952 logical function is_hermitian(n, mat)
1953 integer, intent(in) :: n
1954 complex(real64), intent(in) :: mat(:, :)
1955 real(real64), parameter :: tol = 1.0e-14_real64
1956
1957 assert(ubound(mat, dim=1) == n)
1958 assert(ubound(mat, dim=2) == n)
1959
1960 is_hermitian = maxval(abs(mat - transpose(conjg(mat)))) <= tol
1961 end function is_hermitian
1962
1963#ifdef HAVE_MPI
1964 subroutine slmpi_create_shared_memory_window(number_of_elements, intranode_grp, window, array)
1965 use iso_c_binding
1966 integer(int64), intent(in) :: number_of_elements
1967 type(mpi_grp_t), intent(in) :: intranode_grp
1968 type(mpi_win), intent(out) :: window
1969 real(real32), pointer, intent(out) :: array(:)
1970
1971 type(c_ptr) :: ptr
1972 integer(kind=MPI_ADDRESS_KIND) :: window_size
1973 integer :: disp_unit
1974
1975 assert(not_in_openmp())
1976
1977 ! allocate only on rank 0 of each node
1978 if (intranode_grp%rank == 0) then
1979 window_size = number_of_elements * 4_mpi_address_kind
1980 else
1981 window_size = 0_mpi_address_kind
1982 end if
1983 assert(number_of_elements * 4 < huge(0_mpi_address_kind))
1984 assert(window_size >= 0)
1985 disp_unit = 4
1986 call mpi_win_allocate_shared(window_size, disp_unit, mpi_info_null, &
1987 intranode_grp%comm, ptr, window)
1988 ! get pointer on all ranks
1989 if (intranode_grp%rank /= 0) then
1990 call mpi_win_shared_query(window, 0, window_size, disp_unit, ptr)
1991 end if
1992 ! get fortran pointer
1993 call c_f_pointer(ptr, array, [number_of_elements])
1994
1995 ! start access epoch
1996 call mpi_win_lock_all(mpi_mode_nocheck, window)
1997
1998 end subroutine slmpi_create_shared_memory_window
1999
2000 subroutine smpi_grp_bcast(mpi_grp, buf, cnt, sendtype, root)
2001 use iso_c_binding
2002 class(mpi_grp_t), intent(in) :: mpi_grp
2003 real(real32), target, intent(inout) :: buf
2004 integer(int64), intent(in) :: cnt
2005 type(mpi_datatype), intent(in) :: sendtype
2006 integer, intent(in) :: root
2007
2008 integer :: rounds, iround, size
2009 integer(int64) :: offset
2010 real(real32), pointer :: bufptr(:)
2011
2012 assert(not_in_openmp())
2013
2014 call mpi_debug_in(mpi_grp%comm, c_mpi_bcast)
2015 if (mpi_grp%comm /= mpi_comm_undefined) then
2016 ! need to do the broadcast in rounds that fit into int32 integers
2017 call c_f_pointer(c_loc(buf), bufptr, [cnt])
2018 rounds = int(cnt/huge(0_int32), int32)
2019 do iround = 1, rounds
2020 offset = int(huge(0_int32), int64) * (iround - 1) + 1
2021 call mpi_bcast(bufptr(offset), huge(0_int32), sendtype, root, mpi_grp%comm)
2022 end do
2023 ! broadcast the remainder
2024 offset = int(huge(0_int32), int64) * rounds + 1
2025 size = int(mod(cnt, int(huge(0_int32),int64)), int32)
2026 call mpi_bcast(bufptr(offset), size, sendtype, root, mpi_grp%comm)
2027 end if
2028 call mpi_debug_out(mpi_grp%comm, c_mpi_bcast)
2029
2030 end subroutine smpi_grp_bcast
2031#endif
2032
2033end module dm_propagation_oct_m
int access(const char *__name, int __type) __attribute__((__nothrow__
--------------— gemm ---------------— performs one of the matrix-matrix operations
Definition: blas.F90:370
--------------— syrk, herk ---------------— performs one of the symmetric rank k operations
Definition: blas.F90:490
--------------— scal ---------------— Scales a vector by a constant.
Definition: blas.F90:150
constant times a vector plus a vector
Definition: lalg_basic.F90:173
scales a vector by a constant
Definition: lalg_basic.F90:159
Prints out to iunit a message in the form: ["InputVariable" = value] where "InputVariable" is given b...
Definition: messages.F90:182
double exp(double __x) __attribute__((__nothrow__
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
subroutine, public zbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:2995
This module contains interfaces for BLAS routines You should not use these routines directly....
Definition: blas.F90:120
This module implements a calculator for the density and defines related functions.
Definition: density.F90:122
subroutine, public density_calc(st, gr, density, istin)
Computes the density from the orbitals in st.
Definition: density.F90:653
subroutine lifetime(dmp, ik, ibnd, nik_skip, gam)
Calculate the total scattering rate (inverse lifetime) for a given state.
subroutine iopar_close_trans_rate(system_grp, dmp)
Finalize transition rate resources.
subroutine lindblad_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
Evolve the density matrix under EPW-derived Lindblad dissipation.
subroutine lindblad_uniform(dmp, kpt, nresd_k, dt, rho_mat_k)
Evolve the density matrix in time under uniform dissipation.
subroutine, public dm_propagation_init_run(dmp, namespace, space, gr, ions, st, hm, mc, from_scratch)
Initialise the adiabatic states prior to running TD propagation.
subroutine lindblad_operator_uniform(nst, nresd, rtrans, den_mat, l_mat)
Calculate the Lindblad dissipator matrix for uniform decay.
integer function get_vector_field_index(dmp, hm, namespace)
Get the flattened 1D index of the current vector potential on the discrete EPW vector field grid.
real(real64) function get_trans_rate(dmp, nik_mp, jbnd, ibnd, k, kq, p_block)
Get transition rate from state (k, ibnd) to (kq, jbnd).
subroutine lindblad_operator_epw(dmp, ik, nik_skip, nresd, rho_diag, den_mat, l_mat)
Calculate the Lindblad dissipator matrix using EPW electron-phonon scattering rates.
subroutine, public dm_end_run(system_grp, dmp)
logical function is_hermitian(n, mat)
Check if a matrix is Hermitian.
subroutine iopar_read_trans_rate(ia, system_grp, namespace, dmp)
Read in transition rates to the shared memory window and then broadcast via internode communicator.
subroutine dmp_init(this, namespace, st, space, hm)
Initialise an instance of density matrix dissipation.
subroutine build_epw_kmap(namespace, kpoints, dmp)
Map internal k-point indices to the 1D EPW Monkhorst-Pack grid and verify mesh compatibility.
subroutine collision_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
Evolve the density matrix subject to the electron-phonon collision integral.
subroutine broadcast_occupation(occ, kpt, nst, parstate)
subroutine total_population(ik, st, gr, nrm2, pop)
Calculate total population.
subroutine update_wfc_occ_procrustes(ik, ad_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, occ_tilde, v_mat, st, pop)
Update states using Procrustes transformation to ensure time continuity.
subroutine get_gamma(dmp, ik, ibnd, nik_skip, gam_in, gam_out)
Calculate in/out scattering rates (Gamma) for a specific state (ik, ibnd).
subroutine update_wfc_occ(ik, ad_st, resd, gr, nresd, occ, v_mat, st, pop)
Update states directly from diagonalization (no Procrustes).
subroutine construct_residuals(gr, namespace, ad_st, st, ik, othn, overlap_ad_ks, nrm2_tdks, nresd, overlap_resd_ks, resd)
Construct the residual basis and its overlap with TDKS wavefunctions.
subroutine, public dm_propagation_run(dmp, namespace, space, gr, ions, st, mc, hm, ks, iter, dt, ext_partners, update_energy)
Density matrix propagation.
subroutine dm_propagation_update_trans_rate(this, hm, system_grp, namespace)
Read and update the EPW transition matrix only when the vector field index changes.
subroutine transition_rate_uniform(uniform, ad_st, ik, rtrans)
Calculate state transition rates assuming uniform electron-phonon coupling.
subroutine construct_density_matrix(nresd, ik, st, overlap_ad_ks, overlap_resd_ks, rho_mat)
Construct the full density matrix in the adiabatic and residual basis.
subroutine lindblad_2times(dmp, kpt, nresd_k, dt, rho_mat_k)
Evolve the density matrix using the phenomenological two-time (T1/T2) relaxation model.
subroutine iopar_open_trans_rate(namespace, ions, hm, system_grp, dmp)
Read in metadata of transition rates, build intra/inter communicators and shared memory window.
subroutine dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)
Evolve the density matrix in time under dissipation.
subroutine update_st(dmp, ik, gr, namespace, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, resd, st, rho_mat, pop)
Diagonalize the density matrix to update occupations and wavefunctions.
subroutine population_in_adiabatic(ik, ad_st, st, overlap, pop)
Calculate number of electrons in the adiabatic basis.
subroutine, public eigensolver_init(eigens, namespace, gr, st, hm, mc, space, deactivate_oracle)
subroutine, public eigensolver_end(eigens)
integer, parameter, public spin_polarized
real(real64), parameter, public m_two
Definition: global.F90:202
real(real64), parameter, public m_zero
Definition: global.F90:200
real(real64), parameter, public p_ry
Definition: global.F90:239
logical pure function, public not_in_openmp()
Definition: global.F90:566
complex(real64), parameter, public m_z0
Definition: global.F90:210
real(real64), parameter, public m_epsilon
Definition: global.F90:216
complex(real64), parameter, public m_z1
Definition: global.F90:211
real(real64), parameter, public m_half
Definition: global.F90:206
real(real64), parameter, public m_one
Definition: global.F90:201
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module defines classes and functions for interaction partners.
integer, parameter, public kpoints_monkh_pack
Definition: kpoints.F90:223
subroutine, public kpoints_to_reduced(latt, kin, kout)
Definition: kpoints.F90:1150
This module defines functions over batches of mesh functions.
Definition: mesh_batch.F90:118
subroutine, public mesh_batch_nrm2(mesh, aa, nrm2, reduce)
Calculate the norms (norm2, not the square!) of a batch of mesh functions.
Definition: mesh_batch.F90:179
subroutine, public zmesh_batch_mf_dotp(mesh, aa, psi, dot, reduce, nst)
calculate the dot products between a batch and a vector of mesh functions
This module defines various routines, operating on mesh functions.
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1068
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:525
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:410
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:691
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:594
This module contains some common usage patterns of MPI routines.
Definition: mpi_lib.F90:117
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
Definition: mpi.F90:138
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147
integer function, public parse_block(namespace, name, blk, check_varinfo_)
Definition: parser.F90:623
integer, parameter, public restart_dm
Definition: restart.F90:156
integer, parameter, public restart_gs
Definition: restart.F90:156
integer, parameter, public restart_type_dump
Definition: restart.F90:184
integer, parameter, public restart_type_load
Definition: restart.F90:184
subroutine, public zstates_elec_calc_projections(st, gs_st, namespace, mesh, ik, proj, gs_nst)
This routine computes the projection between two set of states.
subroutine, public zstates_elec_orthogonalize_single_batch(st, mesh, nst, iqn, phi, normalize, mask, overlap, norm, Theta_fi, beta_ij, against_all)
orthogonalize a single wave function against a set of states
integer pure function, public states_elec_block_max(st, ib)
return index of last state in block ib
subroutine, public states_elec_end(st)
finalize the states_elec_t object
subroutine, public states_elec_copy(stout, stin, exclude_wfns, exclude_eigenval, special)
make a (selective) copy of a states_elec_t object
integer pure function, public states_elec_block_min(st, ib)
return index of first state in block ib
This module handles reading and writing restart information for the states_elec_t.
subroutine, public states_elec_load(restart, namespace, space, st, mesh, kpoints, fixed_occ, ierr, iter, lr, lowest_missing, label, verbose, skip)
returns in ierr: <0 => Fatal error, or nothing read =0 => read all wavefunctions >0 => could only rea...
brief This module defines the class unit_t which is used by the unit_systems_oct_m module.
Definition: unit.F90:134
character(len=20) pure function, public units_abbrev(this)
Definition: unit.F90:225
This module defines the unit system, used for input and output.
type(unit_system_t), public units_out
type(unit_t), public unit_kelvin
For converting energies into temperatures.
type(unit_system_t), public units_inp
the units systems for reading and writing
subroutine, public v_ks_calc(ks, namespace, space, hm, st, ions, ext_partners, calc_eigenval, time, calc_energy, calc_current, force_semilocal)
Definition: v_ks.F90:755
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Extension of space that contains the knowledge of the spin dimension.
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
This is defined even when running serial.
Definition: mpi.F90:144
Stores all communicators and groups.
Definition: multicomm.F90:208
The states_elec_t class contains all electronic wave functions.
int true(void)