main/doxygen_doc/isdf_8F90_source.html

!! Copyright (C) 2024 - 2025 A. Buccheri

!!

!! This program is free software; you can redistribute it and/or modify

!! it under the terms of the GNU General Public License as published by

!! the Free Software Foundation; either version 2, or (at your option)

!! any later version.

!!

!! This program is distributed in the hope that it will be useful,

!! but WITHOUT ANY WARRANTY; without even the implied warranty of

!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

!! GNU General Public License for more details.

!!

!! You should have received a copy of the GNU General Public License

!! along with this program; if not, write to the Free Software

!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

!! 02110-1301, USA.


#include "global.h"


module isdf_oct_m

  use, intrinsic :: iso_fortran_env, only: real64

  use accel_oct_m,         only: accel_is_enabled

  use batch_oct_m,         only: batch_not_packed, batch_packed, batch_device_packed

  use centroids_oct_m

  use comm_oct_m

  use debug_oct_m

  use global_oct_m

  use io_oct_m

  use isdf_utils_oct_m

  use lalg_basic_oct_m

  use lalg_adv_oct_m

  use math_oct_m

  use mesh_oct_m

  use mesh_function_oct_m, only: dmf_dotp

  use messages_oct_m

  use mpi_oct_m,           only: mpi_world

  use namespace_oct_m

  use profiling_oct_m

  use space_oct_m

  use states_abst_oct_m,   only: states_are_real

  use states_elec_oct_m


  implicit none

  private

  public :: &

    interpolative_separable_density_fitting_vectors, &

    interpolation_vector_gram_matrix


  ! TODO(Alex) Issue #1195 Extend ISDF to spin-polarised systems

  integer, private, parameter   :: ik = 1


contains


  subroutine interpolative_separable_density_fitting_vectors(namespace, mesh, st, centroids, isdf_vectors)

    type(namespace_t),          intent(in ) :: namespace

    class(mesh_t),              intent(in ) :: mesh

    type(states_elec_t),        intent(in ) :: st

    type(centroids_t),          intent(in ) :: centroids


    real(real64), allocatable,  intent(out) :: isdf_vectors(:, :)


    character(len=2)           :: np_char

    integer                    :: nocc, n_int_g

    real(real64),  allocatable :: psi_mu(:, :)

    !                                               Shape depends on BATCH choice.

    real(real64),  allocatable :: p_r_mu(:, :)

    !                                               defined for all grid points and interpolation points.

    real(real64),  allocatable :: zct(:, :)

    real(real64),  allocatable :: p_mu_nu(:, :)

    !                                               with both variables defined at interpolation points.

    real(real64),  allocatable :: cct(:, :)

    !                                               Gets overwritten with its inverse.


    push_sub_with_profile(interpolative_separable_density_fitting_vectors)


    ! TODO(Alex) Issue #1195 Extend ISDF to spin-polarised systems

    if (st%d%nspin > 1) then

      call messages_not_implemented("ISDF for SPIN_POLARIZED and SPINOR calculations", namespace)

    endif


    ! TODO(Alex) Issue #1196 Template ISDF handle both real and complex states

    if (.not. states_are_real(st)) then

      call messages_not_implemented("ISDF handling of complex states", namespace)

    endif


    ! TODO(Alex) Issue #1276 Implement ISDF on GPU

    if (accel_is_enabled()) then

      call messages_not_implemented("ISDF not supported on GPU", namespace)

    end if


    ! For debug file naming - assumes testing done with up 99 MPI processes

    write(np_char, '(I2)') mpi_world%size


    ! Total number of interpolation points

    n_int_g = centroids%npoints_global()


    ! Max number of states used in ISDF expansion

    nocc = highest_occupied_index(st, ik)


    if (st%st_start <= nocc .and. nocc <= st%st_end) then

      write(message(1),'(a, 1x, I3, 1x, a, 1x, I3)') "ISDF: Computing ISDF vectors up to state", &

      & nocc, " on process", st%mpi_grp%rank

      call messages_info(1, namespace=namespace, debug_only=.true.)

    endif


    ! psi_mu allocated within the routine as shape varies w.r.t. PACKED or UNPACKED

    call dphi_at_interpolation_points(mesh, st, centroids, nocc, psi_mu)

    if (debug%info) call output_psi_mu_for_all_states(namespace, st, nocc, psi_mu)


    safe_allocate(p_r_mu(1:mesh%np, 1:n_int_g))

    call dquasi_density_matrix_at_mesh_centroid_points(st, nocc, psi_mu, p_r_mu)

    if (debug%info) call output_matrix(namespace, "p_r_mu_np"//trim(adjustl(np_char))//".txt", p_r_mu)


    safe_allocate(zct(1:mesh%np, 1:n_int_g))

    call pair_product_coefficient_matrix(p_r_mu, zct)

    if (debug%info) call output_matrix(namespace, "zct_np"//trim(adjustl(np_char))//".txt", zct)

    safe_deallocate_a(p_r_mu)


    safe_allocate(p_mu_nu(1:n_int_g, 1:n_int_g))

    ! Contract over the state index, ist: P_mu_nu = [psi_ist_mu]^T @ psi_ist_nu

    call lalg_gemm(psi_mu, psi_mu, p_mu_nu, transa='T')

    ! States may be distributed so all elements of p_mu_nu only contain a partial contribution from {ist}

    call comm_allreduce(st%mpi_grp, p_mu_nu)

    if (debug%info) call output_matrix(namespace, "p_mu_nu_np"//trim(adjustl(np_char))//".txt", p_mu_nu)

    safe_deallocate_a(psi_mu)


    safe_allocate(cct(1:n_int_g, 1:n_int_g))

    call coefficient_product_matrix(p_mu_nu, cct)

    if (debug%info) call output_matrix(namespace, "cct_np"//trim(adjustl(np_char))//".txt", cct)

    if (debug%info) then

      assert(is_symmetric(cct))

    endif

    safe_deallocate_a(p_mu_nu)


    ! [CC^T]^{-1}, mutating cct in-place

    ! NOTE, CC^T is extremely ill-conditioned once a critical number of interpolation points

    ! are used. Using the pseudo-inverse (SVD) circumvents this problem

    write(message(1),'(a)') "ISDF: Inverting [CC^T]"

    call messages_info(1, namespace=namespace, debug_only=.true.)

    call lalg_svd_inverse(n_int_g, n_int_g, cct)

    call symmetrize_matrix(n_int_g, cct)


    ! zeta = [ZC^T][CC^T]^-1

    safe_allocate(isdf_vectors(1:mesh%np, 1:n_int_g))

    call lalg_gemm(mesh%np, n_int_g, n_int_g, 1.0_real64, zct, cct, 0.0_real64, isdf_vectors)


    ! ISDF vectors are distributed on the mesh, so do not output in that case

    if (debug%info .and. .not. mesh%parallel_in_domains) then

      call output_matrix(namespace, "isdf_np"//trim(adjustl(np_char))//".txt", isdf_vectors)

    endif


    safe_deallocate_a(zct)

    safe_deallocate_a(cct)


    pop_sub_with_profile(interpolative_separable_density_fitting_vectors)


  end subroutine interpolative_separable_density_fitting_vectors


  subroutine pair_product_coefficient_matrix(p_phi, zct, p_psi)

    real(real64), target,           contiguous, intent(in ) :: p_phi(:, :)

    !                                                                       \f$P^{\varphi}(\mathbf{r}, \mathbf{r}_\mu)\f$

    real(real64), target, optional, contiguous, intent(in ) :: p_psi(:, :)

    !                                                                       \f$P^{\psi}(\mathbf{r}, \mathbf{r}_\mu)\f$


    real(real64),                   contiguous, intent(out) :: zct(:, :)


    integer :: np, n_int_g, ip, i_mu

    real(real64), pointer, contiguous :: p_2(:, :)


    push_sub_with_profile(pair_product_coefficient_matrix)


    write(message(1),'(a)') "ISDF: Constructing Z C^T"

    call messages_info(1, debug_only=.true.)


    if (present(p_psi)) then

      p_2 => p_psi

    else

      p_2 => p_phi

    endif


    ! Quasi-density matrices require the same shape for element-wise multiplication

    assert(all(shape(p_phi) == shape(p_2)))

    ! zct should be allocated, and its shape should be consistent with the quasi-density matrices

    assert(all(shape(p_phi) == shape(zct)))


    np = size(p_phi, 1)

    n_int_g = size(p_phi, 2)


    ! Construct ZC^T

    !$omp parallel

    do i_mu = 1, n_int_g

      !$omp do simd

      do ip = 1, np

        zct(ip, i_mu) = p_phi(ip, i_mu) * p_2(ip, i_mu)

      enddo

      !$omp end do simd nowait

    enddo

    !$omp end parallel

    nullify(p_2)


    pop_sub_with_profile(pair_product_coefficient_matrix)


  end subroutine pair_product_coefficient_matrix


  subroutine coefficient_product_matrix(p_phi, cct, p_psi)

    real(real64), target, contiguous,           intent(in ) :: p_phi(:, :)

    !                                                                         \f$P^{\varphi}(\mathbf{r}_\mu, \mathbf{r}_\nu)\f$

    real(real64), target, contiguous, optional, intent(in ) :: p_psi(:, :)

    !                                                                         \f$P^{\psi}(\mathbf{r}_\mu, \mathbf{r}_\nu)\f$


    real(real64),         contiguous,           intent(out) :: cct(:, :)

    !                                                                         Array should be allocated by the caller


    integer :: n_int_g, i_mu, i_nu

    real(real64), contiguous, pointer :: p_2(:, :)


    push_sub_with_profile(coefficient_product_matrix)


    write(message(1),'(a)') "ISDF: Construct CC^T"

    call messages_info(1, debug_only=.true.)


    if (present(p_psi)) then

      p_2 => p_psi

    else

      p_2 => p_phi

    endif


    ! Quasi-density matrices require the same shape for element-wise multiplication

    assert(all(shape(p_phi) == shape(p_2)))

    ! cct should be allocated, and its shape should be consistent with the quasi-density matrices

    assert(all(shape(p_phi) == shape(cct)))

    n_int_g = size(p_phi, 1)


    ! Construct CC^T

    !$omp parallel do collapse(2) default(shared)

    do i_nu = 1, n_int_g

      do i_mu = 1, n_int_g

        cct(i_mu, i_nu) = p_phi(i_mu, i_nu) * p_2(i_mu, i_nu)

      enddo

    enddo

    !$omp end parallel do

    nullify(p_2)


    pop_sub_with_profile(coefficient_product_matrix)


  end subroutine coefficient_product_matrix


  subroutine interpolation_vector_gram_matrix(mesh, isdf_vectors, gram_matrix)

    class(mesh_t), intent(in)  :: mesh

    real(real64),  contiguous, intent(in ) :: isdf_vectors(:, :)

    real(real64),  contiguous, intent(out) :: gram_matrix(:, :)


    integer :: n_int, i, j


    push_sub(interpolation_vector_gram_matrix)


    assert(mesh%np ==  size(isdf_vectors, 1))

    n_int = size(isdf_vectors, 2)

    assert(all(shape(gram_matrix) == [n_int, n_int]))


    ! It would be more efficient to use DGEMM, but dmf_dotp ensures the correct volume element


    ! Diagonal elements

    do i = 1, n_int

      gram_matrix(i, i) = dmf_dotp(mesh, isdf_vectors(:, i), isdf_vectors(:, i), reduce=.false.)

    enddo


    ! Upper triangle elements

    do j = 2, n_int

      do i = 1, j - 1

        gram_matrix(i, j) = dmf_dotp(mesh, isdf_vectors(:, i), isdf_vectors(:, j), reduce=.false.)

        ! Lower triangle from symmetry

        gram_matrix(j, i) = gram_matrix(i, j)

      enddo

    enddo


    call mesh%allreduce(gram_matrix)


    pop_sub(interpolation_vector_gram_matrix)


  end subroutine interpolation_vector_gram_matrix


  ! -------------------------------------------

  ! Helper routines

  ! -------------------------------------------


  integer function local_number_of_states(st, max_state)

    type(states_elec_t), intent(in) :: st

    integer,             intent(in) :: max_state


    integer :: minst, maxst


    push_sub(local_number_of_states)


    minst = states_elec_block_min(st, st%group%block_start)

    maxst = min(states_elec_block_max(st, st%group%block_end), max_state)


    ! Handles when max_state is part of a block with index < (block_start of current process)

    ! resulting in no states on this process being used in the ISDF expansion

    local_number_of_states = max(maxst - minst + 1, 0)


    pop_sub(local_number_of_states)


  end function local_number_of_states


  subroutine gather_psi_mu_over_states(st, psi_mu, psi_global)

    type(states_elec_t),       intent(in ) :: st

    real(real64), contiguous,  intent(in ) :: psi_mu(:, :)

    real(real64), contiguous,  intent(out) :: psi_global(:, :)


    integer :: max_state, n_int_g, ic, ist, st_end, is_local


    push_sub(gather_psi_mu_over_states)


    if (st%group%psib(st%group%block_start, ik)%status() /= batch_packed) then

      message(1) = "Developer Error: Trying to output psi_mu when not BATCH_PACKED"

      call messages_fatal(1)

    endif


    ! Total number of interpolation points

    n_int_g = size(psi_global, 2)

    assert(size(psi_mu, 2) == size(psi_global, 2))


    ! Total number of states used in ISDF

    max_state = size(psi_global, 1)


    ! All elements must be zeroed, such that allreduce does not

    ! sum contributions from uninitialised elements

    do ic = 1, n_int_g

      do ist = 1, max_state

        psi_global(ist, ic) = 0.0_real64

      enddo

    enddo


    ! Ensure states does not iterate beyond the max state used in

    ! ISDF expansion

    st_end = min(st%st_end, max_state)


    ! Sanity check on number of local states held by psi_mu

    if (size(psi_mu, 1) > 0) then

      assert(st_end - st%st_start + 1 == size(psi_mu, 1))

    endif


    ! Fill a section of psi_global using psi_mu

    do ic = 1, n_int_g

      do ist = st%st_start, st_end

        is_local = ist - st%st_start + 1

        psi_global(ist, ic) = psi_mu(is_local, ic)

      enddo

    enddo


    call comm_allreduce(st%mpi_grp, psi_global)


    pop_sub(gather_psi_mu_over_states)


  end subroutine gather_psi_mu_over_states


  ! -------------------------------------------

  ! IO routines for testing and debugging

  ! -------------------------------------------


  subroutine output_psi_mu_for_all_states(namespace, st, max_state, psi_mu)

    type(namespace_t),         intent(in) :: namespace

    type(states_elec_t),       intent(in) :: st

    integer,                   intent(in) :: max_state

    real(real64), contiguous,  intent(in) :: psi_mu(:, :)


    real(real64),       allocatable :: psi_global(:, :)

    integer                         :: n_int_g, ic, ist, unit

    character(len=2)                :: np_char


    push_sub(output_psi_mu_for_all_states)


    if (st%group%psib(st%group%block_start, ik)%status() /= batch_packed) then

      message(1) = "Trying to output psi_mu when not BATCH_PACKED"

      call messages_fatal(1, namespace=namespace)

    endif


    write(message(1),'(a)') "ISDF: Writing psi_mu (all states/DD)"

    call messages_info(1)


    n_int_g = size(psi_mu, 2)

    safe_allocate(psi_global(1:max_state, 1:n_int_g))


    call gather_psi_mu_over_states(st, psi_mu, psi_global)


    unit = io_open("global_psi_mu_np"//trim(adjustl(np_char))//".txt", namespace, action='write')


    write(np_char, '(I2)') st%mpi_grp%size

    do ic = 1, n_int_g

      do ist = 1, max_state

        write(unit, *) psi_global(ist, ic)

      enddo

    enddo


    call io_close(unit)


    safe_deallocate_a(psi_global)


    pop_sub(output_psi_mu_for_all_states)


  end subroutine output_psi_mu_for_all_states


#include "real.F90"

#include "isdf_inc.F90"

#include "undef.F90"


end module isdf_oct_m


!! Local Variables:

!! mode: f90

!! coding: utf-8

!! End:

comm_oct_m::comm_allreduce
Definition: comm.F90:129

lalg_adv_oct_m::lalg_svd_inverse
Definition: lalg_adv.F90:214

lalg_basic_oct_m::lalg_gemm
Matrix-matrix multiplication plus matrix.
Definition: lalg_basic.F90:227

math_oct_m::symmetrize_matrix
Definition: math.F90:192

mesh_function_oct_m::dmf_dotp
Definition: mesh_function.F90:185

accel_oct_m
Definition: accel.F90:114

accel_oct_m::accel_is_enabled
pure logical function, public accel_is_enabled()
Definition: accel.F90:427

batch_oct_m
This module implements batches of mesh functions.
Definition: batch.F90:133

batch_oct_m::batch_not_packed
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:282

batch_oct_m::batch_device_packed
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:282

batch_oct_m::batch_packed
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:282

centroids_oct_m
Definition: centroids.F90:113

comm_oct_m
Definition: comm.F90:114

debug_oct_m
Definition: debug.F90:114

debug_oct_m::debug
type(debug_t), save, public debug
Definition: debug.F90:156

global_oct_m
Definition: global.F90:114

io_oct_m
Definition: io.F90:114

io_oct_m::io_close
subroutine, public io_close(iunit, grp)
Definition: io.F90:418

io_oct_m::io_open
integer function, public io_open(file, namespace, action, status, form, position, die, recl, grp)
Definition: io.F90:352

isdf_oct_m
Interoperable Separable Density Fitting (ISDF) molecular implementation.
Definition: isdf.F90:114

isdf_oct_m::local_number_of_states
integer function local_number_of_states(st, max_state)
Number of states contributing to the expansion, local to current process.
Definition: isdf.F90:419

isdf_oct_m::gather_psi_mu_over_states
subroutine gather_psi_mu_over_states(st, psi_mu, psi_global)
Gather state-distributed psi from multiple processes.
Definition: isdf.F90:443

isdf_oct_m::dquasi_density_matrix_at_mesh_centroid_points
subroutine dquasi_density_matrix_at_mesh_centroid_points(st, max_state, psi_mu, p_r_mu)
Compute the quasi-density matrix where one spatial coordinate is defined at grid points and the is de...
Definition: isdf.F90:686

isdf_oct_m::output_psi_mu_for_all_states
subroutine output_psi_mu_for_all_states(namespace, st, max_state, psi_mu)
Output the gathered psi_mu for all states, such that the matrix is the same irregardless of state par...
Definition: isdf.F90:502

isdf_oct_m::interpolative_separable_density_fitting_vectors
subroutine, public interpolative_separable_density_fitting_vectors(namespace, mesh, st, centroids, isdf_vectors)
Top-level routine for computing ISDF vectors.
Definition: isdf.F90:151

isdf_oct_m::dphi_at_interpolation_points
subroutine dphi_at_interpolation_points(mesh, st, centroids, max_state, psi_mu)
Construct a 2D array of states, defined only at a specific subset of grid points.
Definition: isdf.F90:601

isdf_oct_m::interpolation_vector_gram_matrix
subroutine, public interpolation_vector_gram_matrix(mesh, isdf_vectors, gram_matrix)
Compute the Gram matrix for the ISDF interpolation vectors.
Definition: isdf.F90:374

isdf_oct_m::coefficient_product_matrix
subroutine coefficient_product_matrix(p_phi, cct, p_psi)
Construct the coefficient product matrix .
Definition: isdf.F90:325

isdf_oct_m::pair_product_coefficient_matrix
subroutine pair_product_coefficient_matrix(p_phi, zct, p_psi)
Construct the matrix-matrix product .
Definition: isdf.F90:267

isdf_utils_oct_m
Definition: isdf_utils.F90:113

isdf_utils_oct_m::output_matrix
subroutine, public output_matrix(namespace, fname, matrix)
Helper routine to output a 2D matrix.
Definition: isdf_utils.F90:134

isdf_utils_oct_m::highest_occupied_index
integer function, public highest_occupied_index(st, ik_index)
Return the index of highest occupied Kohn-Sham state for k-point ik.
Definition: isdf_utils.F90:171

lalg_adv_oct_m
Definition: lalg_adv.F90:114

lalg_basic_oct_m
Definition: lalg_basic.F90:114

math_oct_m
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:115

math_oct_m::is_symmetric
logical function, public is_symmetric(a, tol)
Check if a 2D array is symmetric.
Definition: math.F90:1475

mesh_function_oct_m
This module defines various routines, operating on mesh functions.
Definition: mesh_function.F90:116

mesh_oct_m
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:118

messages_oct_m
Definition: messages.F90:115

messages_oct_m::messages_not_implemented
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1113

messages_oct_m::message
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160

messages_oct_m::messages_fatal
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:414

messages_oct_m::messages_info
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:616

mpi_oct_m
Definition: mpi.F90:114

mpi_oct_m::mpi_world
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:270

namespace_oct_m
Definition: namespace.F90:103

profiling_oct_m
Definition: profiling.F90:116

space_oct_m
Definition: space.F90:114

states_abst_oct_m
Definition: states_abst.F90:113

states_abst_oct_m::states_are_real
pure logical function, public states_are_real(st)
Definition: states_abst.F90:208

states_elec_oct_m
Definition: states_elec.F90:113

states_elec_oct_m::states_elec_block_max
integer pure function, public states_elec_block_max(st, ib)
return index of last state in block ib
Definition: states_elec.F90:2571

states_elec_oct_m::states_elec_block_min
integer pure function, public states_elec_block_min(st, ib)
return index of first state in block ib
Definition: states_elec.F90:2561

mesh_oct_m::mesh_t
Describes mesh distribution to nodes.
Definition: mesh.F90:186

namespace_oct_m::namespace_t
Definition: namespace.F90:115

states_elec_oct_m::states_elec_t
The states_elec_t class contains all electronic wave functions.
Definition: states_elec.F90:226

true
int true(void)
Definition: symmetries_finite.c:3153