16.4/doxygen_doc/nl__operator_8F90_source.html

!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch

!!

!! This program is free software; you can redistribute it and/or modify

!! it under the terms of the GNU General Public License as published by

!! the Free Software Foundation; either version 2, or (at your option)

!! any later version.

!!

!! This program is distributed in the hope that it will be useful,

!! but WITHOUT ANY WARRANTY; without even the implied warranty of

!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

!! GNU General Public License for more details.

!!

!! You should have received a copy of the GNU General Public License

!! along with this program; if not, write to the Free Software

!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

!! 02110-1301, USA.

!!


#include "global.h"


module nl_operator_oct_m

  use accel_oct_m

  use batch_oct_m

  use boundaries_oct_m

  use debug_oct_m

  use global_oct_m

  use index_oct_m

  use iso_c_binding

  use math_oct_m

  use mesh_oct_m

  use messages_oct_m

  use mpi_oct_m

  use multicomm_oct_m

  use namespace_oct_m

  use operate_f_oct_m

  use par_vec_oct_m

  use parser_oct_m

  use profiling_oct_m

  use space_oct_m

  use stencil_oct_m

  use types_oct_m

  use varinfo_oct_m


  implicit none


  private

  public ::                     &

    nl_operator_t,              &

    nl_operator_index_t,        &

    nl_operator_global_init,    &

    nl_operator_global_end,     &

    nl_operator_init,           &

    nl_operator_copy,           &

    nl_operator_build,          &

    dnl_operator_operate,       &

    znl_operator_operate,       &

    dnl_operator_operate_batch, &

    znl_operator_operate_batch, &

    dnl_operator_operate_diag,  &

    znl_operator_operate_diag,  &

    nl_operator_end,            &

    nl_operator_adjoint,        &

    nl_operator_get_index,      &

    nl_operator_output_weights, &

    nl_operator_np_zero_bc,     &

    nl_operator_compact_boundaries, &

    nl_operator_allocate_gpu_buffers, &

    nl_operator_update_gpu_buffers


  type nl_operator_index_t

    private

    integer              :: nri = 0

    integer, allocatable :: imin(:)

    integer, allocatable :: imax(:)

    integer, allocatable :: ri(:, :)

  end type nl_operator_index_t


  type nl_operator_t

    private

    type(stencil_t),   public :: stencil

    type(mesh_t), pointer     :: mesh => null()

    integer,      allocatable :: nn(:)

    integer,           public :: np = 0

    ! When running in parallel mode, the next three arrays are unique on each node.

    real(real64),   allocatable, public :: w(:,:)


    logical,          public :: const_w = .true.


    type(accel_mem_t), public :: buff_weights

    type(accel_mem_t), public :: buff_half_weights


    character(len=40) :: label


    integer, public :: nri = 0

    integer, allocatable, public :: ri(:,:)

    integer, allocatable, public :: rimap(:)

    integer, allocatable, public :: rimap_inv(:)


    integer                   :: ninner = 0

    integer                   :: nouter = 0


    type(nl_operator_index_t) :: inner

    type(nl_operator_index_t) :: outer


    type(accel_kernel_t) :: kernel

    type(accel_mem_t) :: buff_imin

    type(accel_mem_t) :: buff_imax

    type(accel_mem_t) :: buff_ri

    type(accel_mem_t) :: buff_map

    type(accel_mem_t) :: buff_all

    type(accel_mem_t) :: buff_inner

    type(accel_mem_t) :: buff_outer

    type(accel_mem_t) :: buff_stencil

    type(accel_mem_t) :: buff_ip_to_xyz

    type(accel_mem_t) :: buff_xyz_to_ip


    ! For multigrid solvers

    type(nl_operator_t), public, pointer :: coarser => null()


  end type nl_operator_t


  integer, parameter :: &

    OP_FORTRAN = 0,  &

    op_vec     = 1,  &

    op_min     = op_fortran, &

    op_max     = op_vec


  integer, parameter ::     &

    OP_INVMAP    = 1,       &

    op_map       = 2,       &

    op_nomap     = 3


  integer, public, parameter :: OP_ALL = 3, op_inner = 1, op_outer = 2


  logical :: compact_boundaries


  interface

    integer function op_is_available(opid, type)

      implicit none

      integer, intent(in) :: opid, type

    end function op_is_available

  end interface


  integer :: dfunction_global = -1

  integer :: zfunction_global = -1

  integer :: function_opencl


contains


  ! ---------------------------------------------------------

  subroutine nl_operator_global_init(namespace)

    type(namespace_t),         intent(in)    :: namespace


    integer :: default


    push_sub(nl_operator_global_init)


    !%Variable OperateDouble

    !%Type integer

    !%Section Execution::Optimization

    !%Default optimized

    !%Description

    !% This variable selects the subroutine used to apply non-local

    !% operators over the grid for real functions.

    !%Option fortran 0

    !% The standard Fortran function.

    !%Option optimized 1

    !% This version is optimized using vector primitives (if available).

    !%End


    !%Variable OperateComplex

    !%Type integer

    !%Section Execution::Optimization

    !%Default optimized

    !%Description

    !% This variable selects the subroutine used to apply non-local

    !% operators over the grid for complex functions.

    !%Option fortran 0

    !% The standard Fortran function.

    !%Option optimized 1

    !% This version is optimized using vector primitives (if available).

    !%End


    default = op_vec


    call parse_variable(namespace, 'OperateDouble', default, dfunction_global)

    if (.not. varinfo_valid_option('OperateDouble', dfunction_global)) call messages_input_error(namespace, 'OperateDouble')


    call parse_variable(namespace, 'OperateComplex', default, zfunction_global)

    if (.not. varinfo_valid_option('OperateComplex', zfunction_global)) call messages_input_error(namespace, 'OperateComplex')


    if (accel_is_enabled()) then


      !%Variable OperateAccel

      !%Type integer

      !%Default map

      !%Section Execution::Optimization

      !%Description

      !% This variable selects the subroutine used to apply non-local

      !% operators over the grid when an accelerator device is used.

      !%Option invmap 1

      !% The standard implementation ported to OpenCL.

      !%Option map 2

      !% A different version, more suitable for GPUs.

      !%End

      call parse_variable(namespace, 'OperateAccel',  op_map, function_opencl)


      call messages_obsolete_variable(namespace, 'OperateOpenCL', 'OperateAccel')


    end if


    !%Variable NLOperatorCompactBoundaries

    !%Type logical

    !%Default no

    !%Section Execution::Optimization

    !%Description

    !% (Experimental) When set to yes, for finite systems Octopus will

    !% map boundary points for finite-differences operators to a few

    !% memory locations. This increases performance, however it is

    !% experimental and has not been thoroughly tested.

    !%End


    call parse_variable(namespace, 'NLOperatorCompactBoundaries', .false., compact_boundaries)


    if (compact_boundaries) then

      call messages_experimental('NLOperatorCompactBoundaries')

    end if


    pop_sub(nl_operator_global_init)

  end subroutine nl_operator_global_init


  ! ---------------------------------------------------------


  subroutine nl_operator_global_end()

    push_sub(nl_operator_global_end)


    pop_sub(nl_operator_global_end)

  end subroutine nl_operator_global_end


  ! ---------------------------------------------------------

  subroutine nl_operator_init(op, label)

    type(nl_operator_t), intent(inout) :: op

    character(len=*),    intent(in)    :: label


    push_sub(nl_operator_init)


    op%label = label


    pop_sub(nl_operator_init)

  end subroutine nl_operator_init


  ! ---------------------------------------------------------

  subroutine nl_operator_copy(opo, opi)

    type(nl_operator_t),         intent(inout) :: opo

    type(nl_operator_t), target, intent(in)    :: opi


    push_sub(nl_operator_copy)


    ! We cannot currently copy the GPU kernel for the nl_operator

    assert(.not. accel_is_enabled())


    call nl_operator_end(opo)

    call nl_operator_init(opo, opi%label)


    call stencil_copy(opi%stencil, opo%stencil)


    opo%np           =  opi%np

    opo%mesh         => opi%mesh


    safe_allocate_source_a(opo%nn, opi%nn)

    safe_allocate_source_a(opo%w, opi%w)


    opo%const_w   = opi%const_w


    opo%nri       =  opi%nri

    assert(allocated(opi%ri))


    safe_allocate_source_a(opo%ri, opi%ri)

    safe_allocate_source_a(opo%rimap, opi%rimap)

    safe_allocate_source_a(opo%rimap_inv, opi%rimap_inv)


    if (opi%mesh%parallel_in_domains) then

      opo%inner%nri = opi%inner%nri

      safe_allocate_source_a(opo%inner%imin, opi%inner%imin)

      safe_allocate_source_a(opo%inner%imax, opi%inner%imax)

      safe_allocate_source_a(opo%inner%ri,   opi%inner%ri)


      opo%outer%nri = opi%outer%nri

      safe_allocate_source_a(opo%outer%imin, opi%outer%imin)

      safe_allocate_source_a(opo%outer%imax, opi%outer%imax)

      safe_allocate_source_a(opo%outer%ri,   opi%outer%ri)

    end if


    ! We create the corresponding GPU buffers

    if (accel_is_enabled() .and. opo%const_w) then

      call accel_create_buffer(opo%buff_weights, accel_mem_read_only, type_float, opo%stencil%size)

      call accel_write_buffer(opo%buff_weights, opo%stencil%size, opo%w(:, 1))

      call accel_create_buffer(opo%buff_half_weights, accel_mem_read_only, type_float, opo%stencil%size)

      call accel_write_buffer(opo%buff_half_weights, opo%stencil%size, -m_half*opo%w(:, 1))

    end if


    pop_sub(nl_operator_copy)

  end subroutine nl_operator_copy


  ! ---------------------------------------------------------

  subroutine nl_operator_build(space, mesh, op, np, const_w, regenerate)

    class(space_t),       intent(in)    :: space

    type(mesh_t), target, intent(in)    :: mesh

    type(nl_operator_t),  intent(inout) :: op

    integer,              intent(in)    :: np

    logical, optional,    intent(in)    :: const_w

    logical, optional,    intent(in)    :: regenerate


    integer :: ii, jj, p1(space%dim), time, current

    integer, allocatable :: st1(:), st2(:), st1r(:)

    integer :: nn

    integer :: ir, maxp, iinner, iouter

    logical :: change, force_change

    character(len=200) :: flags

    integer, allocatable :: inner_points(:), outer_points(:), all_points(:)


    push_sub(nl_operator_build)


    op%const_w = optional_default(const_w, .false.)

    if (mesh%parallel_in_domains .and. .not. op%const_w) then

      call messages_experimental('Domain parallelization with curvilinear coordinates')

    end if


    assert(np > 0)


    ! store values in structure

    op%np       = np

    op%mesh     => mesh


    ! allocate weights op%w

    if (op%const_w) then

      safe_allocate(op%w(1:op%stencil%size, 1))

      message(1) = 'Debug: nl_operator_build: working with constant weights.'

      call messages_info(1, debug_only=.true.)

    else

      safe_allocate(op%w(1:op%stencil%size, 1:op%np))

      message(1) = 'Debug: nl_operator_build: working with non-constant weights.'

      call messages_info(1, debug_only=.true.)

    end if


    ! set initially to zero

    op%w = m_zero


    ! Build lookup table

    safe_allocate(st1(1:op%stencil%size))

    safe_allocate(st1r(1:op%stencil%size))

    safe_allocate(st2(1:op%stencil%size))


    op%nri = 0

    do time = 1, 2

      st2 = 0

      do ii = 1, np

        p1 = 0

        call mesh_local_index_to_coords(mesh, ii, p1)


        do jj = 1, op%stencil%size

          ! Get local index of p1 plus current stencil point.

          st1(jj) = mesh_local_index_from_coords(mesh, p1 + op%stencil%points(:, jj))


          ! if boundary conditions are zero, we can remap boundary

          ! points to reduce memory accesses. We cannot do this for the

          ! first point, since it is used to build the weights, so it

          ! has to have the positions right

          if (ii > 1 .and. compact_boundaries .and. mesh_compact_boundaries(mesh) .and. .not. space%is_periodic()) then

            st1(jj) = min(st1(jj), mesh%np + 1)

          end if

          assert(st1(jj) > 0)

        end do


        st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii


        change = any(st1 /= st2)


        !the next is to detect when we move from a point that does not

        !have boundary points as neighbours to one that has

        force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)


        if (change .and. compact_boundaries .and. mesh_compact_boundaries(mesh) .and. .not. space%is_periodic()) then

          !try to repair it by changing the boundary points

          do jj = 1, op%stencil%size

            if (st1(jj) + ii > mesh%np .and. st2(jj) + ii - 1 > mesh%np .and. st2(jj) + ii <= mesh%np_part) then

              st1r(jj) = st2(jj)

            else

              st1r(jj) = st1(jj)

            end if

          end do


          change = any(st1r /= st2)


          if (.not. change) st1(:) = st1r(:)

        end if


        ! if the stencil changes

        if (change .or. force_change) then

          !store it

          st2(:) = st1(:)


          !first time, just count

          if (time == 1) op%nri = op%nri + 1


          !second time, store

          if (time == 2) then

            current = current + 1

            op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)

          end if

        end if


        if (time == 2) op%rimap(ii) = current


      end do


      !after counting, allocate

      if (time == 1) then

        safe_deallocate_a(op%ri)

        safe_deallocate_a(op%rimap)

        safe_deallocate_a(op%rimap_inv)


        safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))

        safe_allocate(op%rimap(1:op%np))

        safe_allocate(op%rimap_inv(1:op%nri + 1))

        op%ri        = 0

        op%rimap     = 0

        op%rimap_inv = 0

        current      = 0


        ! the sizes

        if (mesh%use_curvilinear) then

          safe_allocate(op%nn(1:op%nri))

          ! for the moment all the sizes are the same

          op%nn = op%stencil%size

        end if

      end if


    end do


    !the inverse mapping

    op%rimap_inv(1) = 0

    do jj = 1, op%np

      op%rimap_inv(op%rimap(jj) + 1) = jj

    end do

    op%rimap_inv(op%nri + 1) = op%np


    safe_deallocate_a(st1)

    safe_deallocate_a(st1r)

    safe_deallocate_a(st2)


    do jj = 1, op%nri

      nn = op%rimap_inv(jj + 1) - op%rimap_inv(jj)

    end do


    if (op%mesh%parallel_in_domains) then

      !now build the arrays required to apply the nl_operator by parts


      !count points

      op%inner%nri = 0

      op%outer%nri = 0

      do ir = 1, op%nri

        maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))

        if (maxp <= np) then

          !inner point

          op%inner%nri = op%inner%nri + 1

          assert(op%inner%nri <= op%nri)

        else

          !outer point

          op%outer%nri = op%outer%nri + 1

          assert(op%outer%nri <= op%nri)

        end if

      end do


      assert(op%inner%nri + op%outer%nri == op%nri)


      if (optional_default(regenerate, .false.)) then

        safe_deallocate_a(op%inner%imin)

        safe_deallocate_a(op%inner%imax)

        safe_deallocate_a(op%inner%ri)

        safe_deallocate_a(op%outer%imin)

        safe_deallocate_a(op%outer%imax)

        safe_deallocate_a(op%outer%ri)

      end if

      safe_allocate(op%inner%imin(1:op%inner%nri + 1))

      safe_allocate(op%inner%imax(1:op%inner%nri))

      safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))


      safe_allocate(op%outer%imin(1:op%outer%nri + 1))

      safe_allocate(op%outer%imax(1:op%outer%nri))

      safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))


      !now populate the arrays

      iinner = 0

      iouter = 0

      do ir = 1, op%nri

        maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))

        if (maxp <= np) then

          !inner point

          iinner = iinner + 1

          op%inner%imin(iinner) = op%rimap_inv(ir)

          op%inner%imax(iinner) = op%rimap_inv(ir + 1)

          op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)

        else

          !outer point

          iouter = iouter + 1

          op%outer%imin(iouter) = op%rimap_inv(ir)

          op%outer%imax(iouter) = op%rimap_inv(ir + 1)

          op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)

        end if

      end do


      !verify that all points in the inner operator are actually inner

      do ir = 1, op%inner%nri

        do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)

          assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))

        end do

      end do


    end if


    if (accel_is_enabled() .and. op%const_w) then


      write(flags, '(i5)') op%stencil%size

      flags='-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))


      if (op%mesh%parallel_in_domains) flags = '-DINDIRECT '//trim(flags)


      select case (function_opencl)

      case (op_invmap)

        call accel_kernel_build(op%kernel, 'operate.cl', 'operate', flags)

      case (op_map)

        call accel_kernel_build(op%kernel, 'operate.cl', 'operate_map', flags)

      end select


      ! conversion to i8 needed to avoid integer overflow

      call accel_create_buffer(op%buff_ri, accel_mem_read_only, type_integer, int(op%nri, int64)*op%stencil%size)

      call accel_write_buffer(op%buff_ri, int(op%nri, int64)*op%stencil%size, op%ri)


      select case (function_opencl)

      case (op_invmap)

        call accel_create_buffer(op%buff_imin, accel_mem_read_only, type_integer, op%nri)

        call accel_write_buffer(op%buff_imin, op%nri, op%rimap_inv(1:))

        call accel_create_buffer(op%buff_imax, accel_mem_read_only, type_integer, op%nri)

        call accel_write_buffer(op%buff_imax, op%nri, op%rimap_inv(2:))


      case (op_map)


        call accel_create_buffer(op%buff_map, accel_mem_read_only, type_integer, pad(op%mesh%np, accel_max_workgroup_size()))

        call accel_write_buffer(op%buff_map, op%mesh%np, (op%rimap - 1)*op%stencil%size)


        if (op%mesh%parallel_in_domains) then


          safe_allocate(inner_points(1:op%mesh%np))

          safe_allocate(outer_points(1:op%mesh%np))

          safe_allocate(all_points(1:op%mesh%np))


          op%ninner = 0

          op%nouter = 0


          do ii = 1, op%mesh%np

            all_points(ii) = ii - 1

            maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))

            if (maxp <= op%mesh%np) then

              op%ninner = op%ninner + 1

              inner_points(op%ninner) = ii - 1

            else

              op%nouter = op%nouter + 1

              outer_points(op%nouter) = ii - 1

            end if

          end do


          call accel_create_buffer(op%buff_all, accel_mem_read_only, type_integer, pad(op%mesh%np, accel_max_workgroup_size()))

          call accel_write_buffer(op%buff_all, op%mesh%np, all_points)


          call accel_create_buffer(op%buff_inner, accel_mem_read_only, type_integer, pad(op%ninner, accel_max_workgroup_size()))

          call accel_write_buffer(op%buff_inner, op%ninner, inner_points)


          call accel_create_buffer(op%buff_outer, accel_mem_read_only, type_integer, pad(op%nouter, accel_max_workgroup_size()))

          call accel_write_buffer(op%buff_outer, op%nouter, outer_points)


          safe_deallocate_a(inner_points)

          safe_deallocate_a(outer_points)

          safe_deallocate_a(all_points)


        end if

      end select

    end if


    pop_sub(nl_operator_build)


  end subroutine nl_operator_build


  ! ---------------------------------------------------------

  subroutine nl_operator_output_weights(this)

    type(nl_operator_t), intent(inout)  :: this


    integer :: istencil, idir


    push_sub(nl_operator_output_weights)


    write(message(1), '(3a)') 'Debug info: Finite difference weights for ', trim(this%label), '.'

    write(message(2), '(a)')  '            Spacing:'

    do idir = 1, this%mesh%box%dim

      write(message(2), '(a,f16.8)') trim(message(2)), this%mesh%spacing(idir)

    end do

    call messages_info(2, debug_only=.true.)


    do istencil = 1, this%stencil%size

      select case(this%mesh%box%dim)

      case(1)

        write(message(1), '(a,i3,1i4,f25.10)') '      ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)

      case(2)

        write(message(1), '(a,i3,2i4,f25.10)') '      ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)

      case(3)

        write(message(1), '(a,i3,3i4,f25.10)') '      ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)

      end select

      call messages_info(1, debug_only=.true.)

    end do


    pop_sub(nl_operator_output_weights)


  end subroutine nl_operator_output_weights


  ! ---------------------------------------------------------

  subroutine nl_operator_adjoint(op, opt, mesh, self_adjoint)

    type(nl_operator_t), target, intent(in)  :: op

    type(nl_operator_t), target, intent(out) :: opt

    type(mesh_t),        target, intent(in)  :: mesh

    logical,                     intent(in)  :: self_adjoint


    integer :: ip, jp, kp, lp, index, ii, p1(1:mesh%box%dim)

    real(real64), pointer, contiguous   :: vol_pp(:), weights(:, :), tmp(:)

    real(real64) :: factor


    push_sub(nl_operator_adjoint)


    if (self_adjoint) then

      ! make operator self-adjoint

      factor = m_one

    else

      ! make operator skew-adjoint

      factor = -m_one

    end if


    call nl_operator_copy(opt, op)


    if (mesh%parallel_in_domains) then

      ! get ghost point values

      safe_allocate(vol_pp(1:mesh%np_part))

      vol_pp(1:mesh%np) = mesh%vol_pp(1:mesh%np)

      call dpar_vec_ghost_update(mesh%pv, vol_pp)


      if (.not.op%const_w) then

        safe_allocate(weights(1:op%stencil%size, 1:mesh%np_part))

        safe_allocate(tmp(1:mesh%np_part))

        do ii = 1, op%stencil%size

          tmp(1:mesh%np) = op%w(ii, 1:mesh%np)

          call dpar_vec_ghost_update(mesh%pv, tmp)

          weights(ii, 1:mesh%np_part) = tmp(1:mesh%np_part)

        end do

        safe_deallocate_p(tmp)

      end if

    else

      vol_pp => mesh%vol_pp

      weights => op%w

    end if


    if (.not.op%const_w) then

      opt%w = m_zero

      do ip = 1, mesh%np

        do jp = 1, op%stencil%size

          index = nl_operator_get_index(op, jp, ip)

          if (index <= mesh%np) then

            ! point is in the mesh

            do lp = 1, op%stencil%size

              kp = nl_operator_get_index(op, lp, index)

              if (kp == ip) then

                opt%w(jp, ip) = m_half*weights(jp, ip) + factor*m_half*(vol_pp(index)/vol_pp(ip))*weights(lp, index)

              end if

            end do

          else if (index <= mesh%np + mesh%pv%np_ghost) then

            ! point is in the ghost layer

            call mesh_local_index_to_coords(mesh, index, p1)

            do lp = 1, op%stencil%size

              kp = mesh_local_index_from_coords(mesh, p1(1:mesh%box%dim) + &

                op%stencil%points(1:mesh%box%dim, lp))

              if (kp == ip) then

                opt%w(jp, ip) = m_half*weights(jp, ip) + factor*m_half*(vol_pp(index)/vol_pp(ip))*weights(lp, index)

              end if

            end do

          end if

        end do

      end do

    else

      opt%w(1:op%stencil%size, 1) = op%w(1:op%stencil%size, 1)

    end if


    call nl_operator_update_gpu_buffers(opt)


    if (mesh%parallel_in_domains) then

      safe_deallocate_p(vol_pp)

      safe_deallocate_p(weights)

    end if


    pop_sub(nl_operator_adjoint)

  end subroutine nl_operator_adjoint


  ! ---------------------------------------------------------

  subroutine nl_operator_end(op)

    type(nl_operator_t), intent(inout) :: op


    push_sub(nl_operator_end)


    if (accel_is_enabled() .and. op%const_w) then


      call accel_release_buffer(op%buff_ri)

      select case (function_opencl)

      case (op_invmap)

        call accel_release_buffer(op%buff_imin)

        call accel_release_buffer(op%buff_imax)


      case (op_map)

        call accel_release_buffer(op%buff_map)

        if (op%mesh%parallel_in_domains) then

          call accel_release_buffer(op%buff_all)

          call accel_release_buffer(op%buff_inner)

          call accel_release_buffer(op%buff_outer)

        end if


      case (op_nomap)

        call accel_release_buffer(op%buff_map)

        call accel_release_buffer(op%buff_stencil)

        call accel_release_buffer(op%buff_xyz_to_ip)

        call accel_release_buffer(op%buff_ip_to_xyz)

      end select


      call accel_release_buffer(op%buff_weights)

      call accel_release_buffer(op%buff_half_weights)

    end if


    safe_deallocate_a(op%inner%imin)

    safe_deallocate_a(op%inner%imax)

    safe_deallocate_a(op%inner%ri)

    safe_deallocate_a(op%outer%imin)

    safe_deallocate_a(op%outer%imax)

    safe_deallocate_a(op%outer%ri)


    safe_deallocate_a(op%w)


    safe_deallocate_a(op%ri)

    safe_deallocate_a(op%rimap)

    safe_deallocate_a(op%rimap_inv)

    safe_deallocate_a(op%nn)


    call stencil_end(op%stencil)


    pop_sub(nl_operator_end)

  end subroutine nl_operator_end


  ! ---------------------------------------------------------

  integer pure function nl_operator_get_index(op, is, ip) result(res)

    type(nl_operator_t), intent(in)   :: op

    integer,             intent(in)   :: is

    integer,             intent(in)   :: ip


    res = ip + op%ri(is, op%rimap(ip))

  end function nl_operator_get_index


  ! ---------------------------------------------------------


  subroutine nl_operator_allocate_gpu_buffers(op)

    type(nl_operator_t), intent(inout)   :: op


    push_sub(nl_operator_allocate_gpu_buffers)


    ! Update the GPU weights

    if (accel_is_enabled() .and. op%const_w) then

      call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)

      call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)

    end if


    pop_sub(nl_operator_allocate_gpu_buffers)

  end subroutine nl_operator_allocate_gpu_buffers


  ! ---------------------------------------------------------


  subroutine nl_operator_update_gpu_buffers(op)

    type(nl_operator_t), intent(inout)   :: op


    push_sub(nl_operator_update_gpu_buffers)


    ! Update the GPU weights

    if (accel_is_enabled() .and. op%const_w) then

      call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))

      call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))

    end if


    pop_sub(nl_operator_update_gpu_buffers)

  end subroutine nl_operator_update_gpu_buffers


  ! ---------------------------------------------------------


  integer pure function nl_operator_np_zero_bc(op) result(np_bc)

    type(nl_operator_t), intent(in)   :: op


    integer :: jj, ii


    np_bc = 0

    do jj = 1, op%nri

      ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))

      np_bc = max(np_bc, ii)

    end do


  end function nl_operator_np_zero_bc


  ! ------------------------------------------------------


  logical pure function nl_operator_compact_boundaries()


    nl_operator_compact_boundaries = compact_boundaries

  end function nl_operator_compact_boundaries


#include "undef.F90"

#include "real.F90"

#include "nl_operator_inc.F90"


#include "undef.F90"

#include "complex.F90"

#include "nl_operator_inc.F90"


end module nl_operator_oct_m


!! Local Variables:

!! mode: f90

!! coding: utf-8

!! End:

accel_oct_m::accel_create_buffer
Definition: accel.F90:311

accel_oct_m::accel_write_buffer
Definition: accel.F90:323

global_oct_m::optional_default
Definition: global.F90:270

math_oct_m::pad
Definition: math.F90:200

nl_operator_oct_m::op_is_available
Definition: nl_operator.F90:239

parser_oct_m::parse_variable
Definition: parser.F90:262

varinfo_oct_m::varinfo_valid_option
Definition: varinfo.F90:132

accel_oct_m
Definition: accel.F90:114

accel_oct_m::accel_release_buffer
subroutine, public accel_release_buffer(this)
Definition: accel.F90:1248

accel_oct_m::accel_kernel_build
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
Definition: accel.F90:2053

accel_oct_m::accel_is_enabled
pure logical function, public accel_is_enabled()
Definition: accel.F90:401

accel_oct_m::accel_mem_read_only
integer, parameter, public accel_mem_read_only
Definition: accel.F90:183

accel_oct_m::accel_max_workgroup_size
integer pure function, public accel_max_workgroup_size()
Definition: accel.F90:1474

batch_oct_m
This module implements batches of mesh functions.
Definition: batch.F90:133

boundaries_oct_m
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:122

boundaries_oct_m::dpar_vec_ghost_update
subroutine, public dpar_vec_ghost_update(pv, v_local)
Updates ghost points of every node.
Definition: boundaries.F90:1433

debug_oct_m
Definition: debug.F90:114

global_oct_m
Definition: global.F90:114

global_oct_m::m_zero
real(real64), parameter, public m_zero
Definition: global.F90:188

global_oct_m::m_half
real(real64), parameter, public m_half
Definition: global.F90:194

global_oct_m::m_one
real(real64), parameter, public m_one
Definition: global.F90:189

index_oct_m
This module implements the index, used for the mesh points.
Definition: index.F90:122

math_oct_m
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:115

math_oct_m::weights
subroutine, public weights(N, M, cc, side)
Compute the weights for finite-difference calculations:
Definition: math.F90:526

mesh_oct_m
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:118

mesh_oct_m::mesh_local_index_from_coords
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
Definition: mesh.F90:939

mesh_oct_m::mesh_local_index_to_coords
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
Definition: mesh.F90:951

mesh_oct_m::mesh_compact_boundaries
logical pure function, public mesh_compact_boundaries(mesh)
Definition: mesh.F90:826

messages_oct_m
Definition: messages.F90:115

messages_oct_m::messages_obsolete_variable
subroutine, public messages_obsolete_variable(namespace, name, rep)
Definition: messages.F90:1046

messages_oct_m::message
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:161

messages_oct_m::messages_input_error
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:714

messages_oct_m::messages_experimental
subroutine, public messages_experimental(name, namespace)
Definition: messages.F90:1086

messages_oct_m::messages_info
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:617

mpi_oct_m
Definition: mpi.F90:114

multicomm_oct_m
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:145

namespace_oct_m
Definition: namespace.F90:103

nl_operator_oct_m
This module defines non-local operators.
Definition: nl_operator.F90:116

nl_operator_oct_m::dnl_operator_operate_diag
subroutine, public dnl_operator_operate_diag(op, fo)
Definition: nl_operator.F90:1484

nl_operator_oct_m::nl_operator_init
subroutine, public nl_operator_init(op, label)
initialize an instance of a non-local operator by setting the label
Definition: nl_operator.F90:349

nl_operator_oct_m::op_map
integer, parameter op_map
Definition: nl_operator.F90:229

nl_operator_oct_m::op_max
integer, parameter op_max
Definition: nl_operator.F90:223

nl_operator_oct_m::op_vec
integer, parameter op_vec
Definition: nl_operator.F90:223

nl_operator_oct_m::dnl_operator_operate_batch
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
Definition: nl_operator.F90:1008

nl_operator_oct_m::dnl_operator_operate
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
Definition: nl_operator.F90:1459

nl_operator_oct_m::nl_operator_update_gpu_buffers
subroutine, public nl_operator_update_gpu_buffers(op)
Definition: nl_operator.F90:902

nl_operator_oct_m::nl_operator_global_init
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
Definition: nl_operator.F90:255

nl_operator_oct_m::nl_operator_copy
subroutine, public nl_operator_copy(opo, opi)
Definition: nl_operator.F90:362

nl_operator_oct_m::nl_operator_output_weights
subroutine, public nl_operator_output_weights(this)
Definition: nl_operator.F90:706

nl_operator_oct_m::nl_operator_end
subroutine, public nl_operator_end(op)
Definition: nl_operator.F90:822

nl_operator_oct_m::op_inner
integer, parameter, public op_inner
Definition: nl_operator.F90:234

nl_operator_oct_m::znl_operator_operate
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
Definition: nl_operator.F90:2027

nl_operator_oct_m::nl_operator_compact_boundaries
logical pure function, public nl_operator_compact_boundaries()
Definition: nl_operator.F90:933

nl_operator_oct_m::nl_operator_global_end
subroutine, public nl_operator_global_end()
Definition: nl_operator.F90:339

nl_operator_oct_m::op_outer
integer, parameter, public op_outer
Definition: nl_operator.F90:234

nl_operator_oct_m::nl_operator_build
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
Definition: nl_operator.F90:417

nl_operator_oct_m::compact_boundaries
logical compact_boundaries
Definition: nl_operator.F90:236

nl_operator_oct_m::znl_operator_operate_batch
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
Definition: nl_operator.F90:1576

nl_operator_oct_m::op_nomap
integer, parameter op_nomap
Definition: nl_operator.F90:229

nl_operator_oct_m::nl_operator_adjoint
subroutine, public nl_operator_adjoint(op, opt, mesh, self_adjoint)
opt has to be initialised and built.
Definition: nl_operator.F90:738

nl_operator_oct_m::nl_operator_np_zero_bc
integer pure function, public nl_operator_np_zero_bc(op)
Definition: nl_operator.F90:918

nl_operator_oct_m::znl_operator_operate_diag
subroutine, public znl_operator_operate_diag(op, fo)
Definition: nl_operator.F90:2052

nl_operator_oct_m::nl_operator_get_index
integer pure function, public nl_operator_get_index(op, is, ip)
Definition: nl_operator.F90:875

nl_operator_oct_m::nl_operator_allocate_gpu_buffers
subroutine, public nl_operator_allocate_gpu_buffers(op)
Definition: nl_operator.F90:885

nl_operator_oct_m::op_min
integer, parameter op_min
Definition: nl_operator.F90:223

operate_f_oct_m
This module contains interfaces for routines in operate.c.
Definition: operate_f.F90:117

par_vec_oct_m
Some general things and nomenclature:
Definition: par_vec.F90:171

parser_oct_m
Definition: parser.F90:114

profiling_oct_m
Definition: profiling.F90:116

space_oct_m
Definition: space.F90:114

stencil_oct_m
This module defines stencils used in Octopus.
Definition: stencil.F90:135

stencil_oct_m::stencil_end
subroutine, public stencil_end(this)
Definition: stencil.F90:214

stencil_oct_m::stencil_copy
subroutine, public stencil_copy(input, output)
Definition: stencil.F90:196

types_oct_m
Definition: types.F90:114

types_oct_m::type_integer
type(type_t), parameter, public type_integer
Definition: types.F90:135

types_oct_m::type_float
type(type_t), parameter, public type_float
Definition: types.F90:133

varinfo_oct_m
Definition: varinfo.F90:114

mesh_oct_m::mesh_t
Describes mesh distribution to nodes.
Definition: mesh.F90:186

nl_operator_oct_m::nl_operator_index_t
index type for non-local operators
Definition: nl_operator.F90:169

nl_operator_oct_m::nl_operator_t
data type for non local operators
Definition: nl_operator.F90:178

space_oct_m::space_t
Definition: space.F90:130

true
int true(void)
Definition: symmetries_finite.c:3153