Octopus
isdf.F90
Go to the documentation of this file.
1!! Copyright (C) 2024 - 2025 A. Buccheri
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17
18#include "global.h"
19
21module isdf_oct_m
22 use, intrinsic :: iso_fortran_env, only: real64
26 use comm_oct_m
27 use debug_oct_m
28 use global_oct_m
29 use io_oct_m
33 use math_oct_m
34 use mesh_oct_m
37 use mpi_oct_m, only: mpi_world
40 use space_oct_m
43
44 implicit none
45 private
46 public :: &
49
50 ! TODO(Alex) Issue #1195 Extend ISDF to spin-polarised systems
52 integer, private, parameter :: ik = 1
53
54contains
55
57 subroutine interpolative_separable_density_fitting_vectors(namespace, mesh, st, centroids, isdf_vectors)
58 type(namespace_t), intent(in ) :: namespace
59 class(mesh_t), intent(in ) :: mesh
60 type(states_elec_t), intent(in ) :: st
61 type(centroids_t), intent(in ) :: centroids
62
63 real(real64), allocatable, intent(out) :: isdf_vectors(:, :)
64
65 character(len=2) :: np_char
66 integer :: nocc, n_int_g
67 real(real64), allocatable :: psi_mu(:, :)
68 ! Shape depends on BATCH choice.
69 real(real64), allocatable :: p_r_mu(:, :)
70 ! defined for all grid points and interpolation points.
71 real(real64), allocatable :: zct(:, :)
72 real(real64), allocatable :: p_mu_nu(:, :)
73 ! with both variables defined at interpolation points.
74 real(real64), allocatable :: cct(:, :)
75 ! Gets overwritten with its inverse.
76
78
79 ! TODO(Alex) Issue #1195 Extend ISDF to spin-polarised systems
80 if (st%d%nspin > 1) then
81 call messages_not_implemented("ISDF for SPIN_POLARIZED and SPINOR calculations", namespace)
82 endif
83
84 ! TODO(Alex) Issue #1196 Template ISDF handle both real and complex states
85 if (.not. states_are_real(st)) then
86 call messages_not_implemented("ISDF handling of complex states", namespace)
87 endif
88
89 ! TODO(Alex) Issue #1276 Implement ISDF on GPU
90 if (accel_is_enabled()) then
91 call messages_not_implemented("ISDF not supported on GPU", namespace)
92 end if
93
94 ! For debug file naming - assumes testing done with up 99 MPI processes
95 write(np_char, '(I2)') mpi_world%size
96
97 ! Total number of interpolation points
98 n_int_g = centroids%npoints_global()
99
100 ! Max number of states used in ISDF expansion
101 nocc = highest_occupied_index(st, ik)
102
103 if (st%st_start <= nocc .and. nocc <= st%st_end) then
104 write(message(1),'(a, 1x, I3, 1x, a, 1x, I3)') "ISDF: Computing ISDF vectors up to state", &
105 & nocc, " on process", st%mpi_grp%rank
106 call messages_info(1, namespace=namespace, debug_only=.true.)
107 endif
108
109 ! psi_mu allocated within the routine as shape varies w.r.t. PACKED or UNPACKED
110 call dphi_at_interpolation_points(mesh, st, centroids, nocc, psi_mu)
111 if (debug%info) call output_psi_mu_for_all_states(namespace, st, nocc, psi_mu)
112
113 safe_allocate(p_r_mu(1:mesh%np, 1:n_int_g))
115 if (debug%info) call output_matrix(namespace, "p_r_mu_np"//trim(adjustl(np_char))//".txt", p_r_mu)
116
117 safe_allocate(zct(1:mesh%np, 1:n_int_g))
118 call pair_product_coefficient_matrix(p_r_mu, zct)
119 if (debug%info) call output_matrix(namespace, "zct_np"//trim(adjustl(np_char))//".txt", zct)
120 safe_deallocate_a(p_r_mu)
121
122 safe_allocate(p_mu_nu(1:n_int_g, 1:n_int_g))
123 ! Contract over the state index, ist: P_mu_nu = [psi_ist_mu]^T @ psi_ist_nu
124 call lalg_gemm(psi_mu, psi_mu, p_mu_nu, transa='T')
125 ! States may be distributed so all elements of p_mu_nu only contain a partial contribution from {ist}
126 call comm_allreduce(st%mpi_grp, p_mu_nu)
127 if (debug%info) call output_matrix(namespace, "p_mu_nu_np"//trim(adjustl(np_char))//".txt", p_mu_nu)
128 safe_deallocate_a(psi_mu)
129
130 safe_allocate(cct(1:n_int_g, 1:n_int_g))
131 call coefficient_product_matrix(p_mu_nu, cct)
132 if (debug%info) call output_matrix(namespace, "cct_np"//trim(adjustl(np_char))//".txt", cct)
133 if (debug%info) then
134 assert(is_symmetric(cct))
135 endif
136 safe_deallocate_a(p_mu_nu)
137
138 ! [CC^T]^{-1}, mutating cct in-place
139 ! NOTE, CC^T is extremely ill-conditioned once a critical number of interpolation points
140 ! are used. Using the pseudo-inverse (SVD) circumvents this problem
141 write(message(1),'(a)') "ISDF: Inverting [CC^T]"
142 call messages_info(1, namespace=namespace, debug_only=.true.)
143 call lalg_svd_inverse(n_int_g, n_int_g, cct)
144 call symmetrize_matrix(n_int_g, cct)
146 ! zeta = [ZC^T][CC^T]^-1
147 safe_allocate(isdf_vectors(1:mesh%np, 1:n_int_g))
148 call lalg_gemm(mesh%np, n_int_g, n_int_g, 1.0_real64, zct, cct, 0.0_real64, isdf_vectors)
149
150 ! ISDF vectors are distributed on the mesh, so do not output in that case
151 if (debug%info .and. .not. mesh%parallel_in_domains) then
152 call output_matrix(namespace, "isdf_np"//trim(adjustl(np_char))//".txt", isdf_vectors)
153 endif
154
155 safe_deallocate_a(zct)
156 safe_deallocate_a(cct)
157
159
161
162
173 subroutine pair_product_coefficient_matrix(p_phi, zct, p_psi)
174 real(real64), target, contiguous, intent(in ) :: p_phi(:, :)
175 ! \f$P^{\varphi}(\mathbf{r}, \mathbf{r}_\mu)\f$
176 real(real64), target, optional, contiguous, intent(in ) :: p_psi(:, :)
177 ! \f$P^{\psi}(\mathbf{r}, \mathbf{r}_\mu)\f$
178
179 real(real64), contiguous, intent(out) :: zct(:, :)
180
181 integer :: np, n_int_g, ip, i_mu
182 real(real64), pointer, contiguous :: p_2(:, :)
183
184 push_sub_with_profile(pair_product_coefficient_matrix)
185
186 write(message(1),'(a)') "ISDF: Constructing Z C^T"
187 call messages_info(1, debug_only=.true.)
188
189 if (present(p_psi)) then
190 p_2 => p_psi
191 else
192 p_2 => p_phi
193 endif
194
195 ! Quasi-density matrices require the same shape for element-wise multiplication
196 assert(all(shape(p_phi) == shape(p_2)))
197 ! zct should be allocated, and its shape should be consistent with the quasi-density matrices
198 assert(all(shape(p_phi) == shape(zct)))
199
200 np = size(p_phi, 1)
201 n_int_g = size(p_phi, 2)
202
203 ! Construct ZC^T
204 !$omp parallel
205 do i_mu = 1, n_int_g
206 !$omp do simd
207 do ip = 1, np
208 zct(ip, i_mu) = p_phi(ip, i_mu) * p_2(ip, i_mu)
209 enddo
210 !$omp end do simd nowait
211 enddo
212 !$omp end parallel
213 nullify(p_2)
214
215 pop_sub_with_profile(pair_product_coefficient_matrix)
216
218
219
231 subroutine coefficient_product_matrix(p_phi, cct, p_psi)
232 real(real64), target, contiguous, intent(in ) :: p_phi(:, :)
233 ! \f$P^{\varphi}(\mathbf{r}_\mu, \mathbf{r}_\nu)\f$
234 real(real64), target, contiguous, optional, intent(in ) :: p_psi(:, :)
235 ! \f$P^{\psi}(\mathbf{r}_\mu, \mathbf{r}_\nu)\f$
236
237 real(real64), contiguous, intent(out) :: cct(:, :)
238 ! Array should be allocated by the caller
239
240 integer :: n_int_g, i_mu, i_nu
241 real(real64), contiguous, pointer :: p_2(:, :)
242
243 push_sub_with_profile(coefficient_product_matrix)
244
245 write(message(1),'(a)') "ISDF: Construct CC^T"
246 call messages_info(1, debug_only=.true.)
247
248 if (present(p_psi)) then
249 p_2 => p_psi
250 else
251 p_2 => p_phi
252 endif
253
254 ! Quasi-density matrices require the same shape for element-wise multiplication
255 assert(all(shape(p_phi) == shape(p_2)))
256 ! cct should be allocated, and its shape should be consistent with the quasi-density matrices
257 assert(all(shape(p_phi) == shape(cct)))
258 n_int_g = size(p_phi, 1)
259
260 ! Construct CC^T
261 !$omp parallel do collapse(2) default(shared)
262 do i_nu = 1, n_int_g
263 do i_mu = 1, n_int_g
264 cct(i_mu, i_nu) = p_phi(i_mu, i_nu) * p_2(i_mu, i_nu)
265 enddo
266 enddo
267 !$omp end parallel do
268 nullify(p_2)
269
270 pop_sub_with_profile(coefficient_product_matrix)
271
272 end subroutine coefficient_product_matrix
273
274
280 subroutine interpolation_vector_gram_matrix(mesh, isdf_vectors, gram_matrix)
281 class(mesh_t), intent(in) :: mesh
282 real(real64), contiguous, intent(in ) :: isdf_vectors(:, :)
283 real(real64), contiguous, intent(out) :: gram_matrix(:, :)
284
285 integer :: n_int, i, j
286
288
289 assert(mesh%np == size(isdf_vectors, 1))
290 n_int = size(isdf_vectors, 2)
291 assert(all(shape(gram_matrix) == [n_int, n_int]))
292
293 ! It would be more efficient to use DGEMM, but dmf_dotp ensures the correct volume element
294
295 ! Diagonal elements
296 do i = 1, n_int
297 gram_matrix(i, i) = dmf_dotp(mesh, isdf_vectors(:, i), isdf_vectors(:, i), reduce=.false.)
298 enddo
299
300 ! Upper triangle elements
301 do j = 2, n_int
302 do i = 1, j - 1
303 gram_matrix(i, j) = dmf_dotp(mesh, isdf_vectors(:, i), isdf_vectors(:, j), reduce=.false.)
304 ! Lower triangle from symmetry
305 gram_matrix(j, i) = gram_matrix(i, j)
306 enddo
307 enddo
308
309 call mesh%allreduce(gram_matrix)
310
312
314
315
316 ! -------------------------------------------
317 ! Helper routines
318 ! -------------------------------------------
319
325 integer function local_number_of_states(st, max_state)
326 type(states_elec_t), intent(in) :: st
327 integer, intent(in) :: max_state
328
329 integer :: minst, maxst
330
331 push_sub(local_number_of_states)
332
333 minst = states_elec_block_min(st, st%group%block_start)
334 maxst = min(states_elec_block_max(st, st%group%block_end), max_state)
335
336 ! Handles when max_state is part of a block with index < (block_start of current process)
337 ! resulting in no states on this process being used in the ISDF expansion
338 local_number_of_states = max(maxst - minst + 1, 0)
339
341
342 end function local_number_of_states
343
344
349 subroutine gather_psi_mu_over_states(st, psi_mu, psi_global)
350 type(states_elec_t), intent(in ) :: st
351 real(real64), contiguous, intent(in ) :: psi_mu(:, :)
352 real(real64), contiguous, intent(out) :: psi_global(:, :)
353
354 integer :: max_state, n_int_g, ic, ist, st_end, is_local
355
357
358 if (st%group%psib(st%group%block_start, ik)%status() /= batch_packed) then
359 message(1) = "Developer Error: Trying to output psi_mu when not BATCH_PACKED"
360 call messages_fatal(1)
361 endif
362
363 ! Total number of interpolation points
364 n_int_g = size(psi_global, 2)
365 assert(size(psi_mu, 2) == size(psi_global, 2))
366
367 ! Total number of states used in ISDF
368 max_state = size(psi_global, 1)
369
370 ! All elements must be zeroed, such that allreduce does not
371 ! sum contributions from uninitialised elements
372 do ic = 1, n_int_g
373 do ist = 1, max_state
374 psi_global(ist, ic) = 0.0_real64
375 enddo
376 enddo
377
378 ! Ensure states does not iterate beyond the max state used in
379 ! ISDF expansion
380 st_end = min(st%st_end, max_state)
381
382 ! Sanity check on number of local states held by psi_mu
383 if (size(psi_mu, 1) > 0) then
384 assert(st_end - st%st_start + 1 == size(psi_mu, 1))
385 endif
386
387 ! Fill a section of psi_global using psi_mu
388 do ic = 1, n_int_g
389 do ist = st%st_start, st_end
390 is_local = ist - st%st_start + 1
391 psi_global(ist, ic) = psi_mu(is_local, ic)
392 enddo
393 enddo
394
395 call comm_allreduce(st%mpi_grp, psi_global)
396
398
399 end subroutine gather_psi_mu_over_states
400
401
402 ! -------------------------------------------
403 ! IO routines for testing and debugging
404 ! -------------------------------------------
405
408 subroutine output_psi_mu_for_all_states(namespace, st, max_state, psi_mu)
409 type(namespace_t), intent(in) :: namespace
410 type(states_elec_t), intent(in) :: st
411 integer, intent(in) :: max_state
412 real(real64), contiguous, intent(in) :: psi_mu(:, :)
413
414 real(real64), allocatable :: psi_global(:, :)
415 integer :: n_int_g, ic, ist, unit
416 character(len=2) :: np_char
417
419
420 if (st%group%psib(st%group%block_start, ik)%status() /= batch_packed) then
421 message(1) = "Trying to output psi_mu when not BATCH_PACKED"
422 call messages_fatal(1, namespace=namespace)
423 endif
424
425 write(message(1),'(a)') "ISDF: Writing psi_mu (all states/DD)"
426 call messages_info(1)
427
428 n_int_g = size(psi_mu, 2)
429 safe_allocate(psi_global(1:max_state, 1:n_int_g))
430
431 call gather_psi_mu_over_states(st, psi_mu, psi_global)
432
433 unit = io_open("global_psi_mu_np"//trim(adjustl(np_char))//".txt", namespace, action='write')
434
435 write(np_char, '(I2)') st%mpi_grp%size
436 do ic = 1, n_int_g
437 do ist = 1, max_state
438 write(unit, *) psi_global(ist, ic)
439 enddo
440 enddo
441
442 call io_close(unit)
443
444 safe_deallocate_a(psi_global)
445
447
448 end subroutine output_psi_mu_for_all_states
449
450#include "real.F90"
451#include "isdf_inc.F90"
452#include "undef.F90"
453
454end module isdf_oct_m
455
456!! Local Variables:
457!! mode: f90
458!! coding: utf-8
459!! End:
Matrix-matrix multiplication plus matrix.
Definition: lalg_basic.F90:227
pure logical function, public accel_is_enabled()
Definition: accel.F90:427
This module implements batches of mesh functions.
Definition: batch.F90:133
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:282
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:282
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:282
type(debug_t), save, public debug
Definition: debug.F90:156
Definition: io.F90:114
subroutine, public io_close(iunit, grp)
Definition: io.F90:418
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
Definition: io.F90:352
Interoperable Separable Density Fitting (ISDF) molecular implementation.
Definition: isdf.F90:114
integer function local_number_of_states(st, max_state)
Number of states contributing to the expansion, local to current process.
Definition: isdf.F90:419
subroutine gather_psi_mu_over_states(st, psi_mu, psi_global)
Gather state-distributed psi from multiple processes.
Definition: isdf.F90:443
subroutine dquasi_density_matrix_at_mesh_centroid_points(st, max_state, psi_mu, p_r_mu)
Compute the quasi-density matrix where one spatial coordinate is defined at grid points and the is de...
Definition: isdf.F90:686
subroutine output_psi_mu_for_all_states(namespace, st, max_state, psi_mu)
Output the gathered psi_mu for all states, such that the matrix is the same irregardless of state par...
Definition: isdf.F90:502
subroutine, public interpolative_separable_density_fitting_vectors(namespace, mesh, st, centroids, isdf_vectors)
Top-level routine for computing ISDF vectors.
Definition: isdf.F90:151
subroutine dphi_at_interpolation_points(mesh, st, centroids, max_state, psi_mu)
Construct a 2D array of states, defined only at a specific subset of grid points.
Definition: isdf.F90:601
subroutine, public interpolation_vector_gram_matrix(mesh, isdf_vectors, gram_matrix)
Compute the Gram matrix for the ISDF interpolation vectors.
Definition: isdf.F90:374
subroutine coefficient_product_matrix(p_phi, cct, p_psi)
Construct the coefficient product matrix .
Definition: isdf.F90:325
subroutine pair_product_coefficient_matrix(p_phi, zct, p_psi)
Construct the matrix-matrix product .
Definition: isdf.F90:267
subroutine, public output_matrix(namespace, fname, matrix)
Helper routine to output a 2D matrix.
Definition: isdf_utils.F90:134
integer function, public highest_occupied_index(st, ik_index)
Return the index of highest occupied Kohn-Sham state for k-point ik.
Definition: isdf_utils.F90:171
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:115
logical function, public is_symmetric(a, tol)
Check if a 2D array is symmetric.
Definition: math.F90:1475
This module defines various routines, operating on mesh functions.
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:118
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1113
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:414
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:616
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:270
pure logical function, public states_are_real(st)
integer pure function, public states_elec_block_max(st, ib)
return index of last state in block ib
integer pure function, public states_elec_block_min(st, ib)
return index of first state in block ib
Describes mesh distribution to nodes.
Definition: mesh.F90:186
The states_elec_t class contains all electronic wave functions.
int true(void)