79 integer,
allocatable :: imin(:)
80 integer,
allocatable :: imax(:)
81 integer,
allocatable :: ri(:, :)
87 type(stencil_t),
public :: stencil
88 type(mesh_t),
pointer :: mesh => null()
89 integer,
allocatable :: nn(:)
90 integer,
public :: np = 0
92 real(real64),
allocatable,
public :: w(:,:)
94 logical,
public :: const_w = .
true.
96 type(accel_mem_t),
public :: buff_weights
97 type(accel_mem_t),
public :: buff_half_weights
99 character(len=40) :: label
102 integer,
public :: nri = 0
103 integer,
allocatable,
public :: ri(:,:)
104 integer,
allocatable,
public :: rimap(:)
105 integer,
allocatable,
public :: rimap_inv(:)
107 integer :: ninner = 0
108 integer :: nouter = 0
110 type(nl_operator_index_t) :: inner
111 type(nl_operator_index_t) :: outer
113 type(accel_kernel_t) :: kernel
114 type(accel_mem_t) :: buff_imin
115 type(accel_mem_t) :: buff_imax
116 type(accel_mem_t) :: buff_ri
117 type(accel_mem_t) :: buff_map
118 type(accel_mem_t) :: buff_all
119 type(accel_mem_t) :: buff_inner
120 type(accel_mem_t) :: buff_outer
121 type(accel_mem_t) :: buff_stencil
122 type(accel_mem_t) :: buff_ip_to_xyz
123 type(accel_mem_t) :: buff_xyz_to_ip
126 type(nl_operator_t),
public,
pointer :: coarser => null()
130 integer,
parameter :: &
136 integer,
parameter :: &
143 logical :: compact_boundaries
148 integer,
intent(in) :: opid, type
152 integer :: dfunction_global = -1
153 integer :: zfunction_global = -1
154 integer :: function_opencl
162 type(namespace_t),
intent(in) :: namespace
199 call parse_variable(namespace,
'OperateComplex', default, zfunction_global)
234 call parse_variable(namespace,
'NLOperatorCompactBoundaries', .false., compact_boundaries)
236 if (compact_boundaries)
then
257 character(len=*),
intent(in) :: label
285 safe_allocate_source_a(opo%nn, opi%nn)
286 safe_allocate_source_a(opo%w, opi%w)
288 opo%const_w = opi%const_w
291 assert(
allocated(opi%ri))
293 safe_allocate_source_a(opo%ri, opi%ri)
294 safe_allocate_source_a(opo%rimap, opi%rimap)
295 safe_allocate_source_a(opo%rimap_inv, opi%rimap_inv)
297 if (opi%mesh%parallel_in_domains)
then
298 opo%inner%nri = opi%inner%nri
299 safe_allocate_source_a(opo%inner%imin, opi%inner%imin)
300 safe_allocate_source_a(opo%inner%imax, opi%inner%imax)
301 safe_allocate_source_a(opo%inner%ri, opi%inner%ri)
303 opo%outer%nri = opi%outer%nri
304 safe_allocate_source_a(opo%outer%imin, opi%outer%imin)
305 safe_allocate_source_a(opo%outer%imax, opi%outer%imax)
306 safe_allocate_source_a(opo%outer%ri, opi%outer%ri)
324 class(
space_t),
intent(in) :: space
325 type(
mesh_t),
target,
intent(in) :: mesh
327 integer,
intent(in) :: np
328 logical,
optional,
intent(in) :: const_w
329 logical,
optional,
intent(in) :: regenerate
331 integer :: ii, jj, p1(space%dim), time, current
332 integer,
allocatable :: st1(:), st2(:), st1r(:)
334 integer :: ir, maxp, iinner, iouter
335 logical :: change, force_change
336 character(len=200) :: flags
337 integer,
allocatable :: inner_points(:), outer_points(:), all_points(:)
341 if (mesh%parallel_in_domains .and. .not. const_w)
then
351 if (
present(const_w)) op%const_w = const_w
355 safe_allocate(op%w(1:op%stencil%size, 1))
356 message(1) =
'Debug: nl_operator_build: working with constant weights.'
359 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
360 message(1) =
'Debug: nl_operator_build: working with non-constant weights.'
368 safe_allocate(st1(1:op%stencil%size))
369 safe_allocate(st1r(1:op%stencil%size))
370 safe_allocate(st2(1:op%stencil%size))
379 do jj = 1, op%stencil%size
388 st1(jj) = min(st1(jj), mesh%np + 1)
393 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
395 change = any(st1 /= st2)
399 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
403 do jj = 1, op%stencil%size
404 if (st1(jj) + ii > mesh%np .and. st2(jj) + ii - 1 > mesh%np .and. st2(jj) + ii <= mesh%np_part)
then
411 change = any(st1r /= st2)
413 if (.not. change) st1 = st1r
417 if (change .or. force_change)
then
422 if (time == 1) op%nri = op%nri + 1
426 current = current + 1
427 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
431 if (time == 2) op%rimap(ii) = current
437 safe_deallocate_a(op%ri)
438 safe_deallocate_a(op%rimap)
439 safe_deallocate_a(op%rimap_inv)
441 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
442 safe_allocate(op%rimap(1:op%np))
443 safe_allocate(op%rimap_inv(1:op%nri + 1))
450 if (mesh%use_curvilinear)
then
451 safe_allocate(op%nn(1:op%nri))
453 op%nn = op%stencil%size
462 op%rimap_inv(op%rimap(jj) + 1) = jj
464 op%rimap_inv(op%nri + 1) = op%np
466 safe_deallocate_a(st1)
467 safe_deallocate_a(st1r)
468 safe_deallocate_a(st2)
471 nn = op%rimap_inv(jj + 1) - op%rimap_inv(jj)
474 if (op%mesh%parallel_in_domains)
then
481 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
484 op%inner%nri = op%inner%nri + 1
485 assert(op%inner%nri <= op%nri)
488 op%outer%nri = op%outer%nri + 1
489 assert(op%outer%nri <= op%nri)
493 assert(op%inner%nri + op%outer%nri == op%nri)
496 safe_deallocate_a(op%inner%imin)
497 safe_deallocate_a(op%inner%imax)
498 safe_deallocate_a(op%inner%ri)
499 safe_deallocate_a(op%outer%imin)
500 safe_deallocate_a(op%outer%imax)
501 safe_deallocate_a(op%outer%ri)
503 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
504 safe_allocate(op%inner%imax(1:op%inner%nri))
505 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
507 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
508 safe_allocate(op%outer%imax(1:op%outer%nri))
509 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
515 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
519 op%inner%imin(iinner) = op%rimap_inv(ir)
520 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
521 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
525 op%outer%imin(iouter) = op%rimap_inv(ir)
526 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
527 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
532 do ir = 1, op%inner%nri
533 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
534 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
542 write(flags,
'(i5)') op%stencil%size
543 flags=
'-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
545 if (op%mesh%parallel_in_domains) flags =
'-DINDIRECT '//trim(flags)
547 select case (function_opencl)
558 select case (function_opencl)
570 if (op%mesh%parallel_in_domains)
then
572 safe_allocate(inner_points(1:op%mesh%np))
573 safe_allocate(outer_points(1:op%mesh%np))
574 safe_allocate(all_points(1:op%mesh%np))
579 do ii = 1, op%mesh%np
580 all_points(ii) = ii - 1
581 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
582 if (maxp <= op%mesh%np)
then
583 op%ninner = op%ninner + 1
584 inner_points(op%ninner) = ii - 1
586 op%nouter = op%nouter + 1
587 outer_points(op%nouter) = ii - 1
600 safe_deallocate_a(inner_points)
601 safe_deallocate_a(outer_points)
602 safe_deallocate_a(all_points)
616 integer :: istencil, idir
620 write(
message(1),
'(3a)')
'Debug info: Finite difference weights for ', trim(this%label),
'.'
621 write(
message(2),
'(a)')
' Spacing:'
622 do idir = 1, this%mesh%box%dim
623 write(
message(2),
'(a,f16.8)') trim(
message(2)), this%mesh%spacing(idir)
627 do istencil = 1, this%stencil%size
628 select case(this%mesh%box%dim)
630 write(
message(1),
'(a,i3,1i4,f25.10)')
' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
632 write(
message(1),
'(a,i3,2i4,f25.10)')
' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
634 write(
message(1),
'(a,i3,3i4,f25.10)')
' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
648 type(
mesh_t),
target,
intent(in) :: mesh
649 logical,
intent(in) :: self_adjoint
651 integer :: ip, jp, kp, lp, index, ii, p1(1:mesh%box%dim)
652 real(real64),
pointer,
contiguous :: vol_pp(:),
weights(:, :), tmp(:)
653 real(real64) :: factor
657 if (self_adjoint)
then
667 if (mesh%parallel_in_domains)
then
669 safe_allocate(vol_pp(1:mesh%np_part))
670 vol_pp(1:mesh%np) = mesh%vol_pp(1:mesh%np)
673 if (.not.op%const_w)
then
674 safe_allocate(
weights(1:op%stencil%size, 1:mesh%np_part))
675 safe_allocate(tmp(1:mesh%np_part))
676 do ii = 1, op%stencil%size
677 tmp(1:mesh%np) = op%w(ii, 1:mesh%np)
679 weights(ii, 1:mesh%np_part) = tmp(1:mesh%np_part)
681 safe_deallocate_p(tmp)
684 vol_pp => mesh%vol_pp
688 if (.not.op%const_w)
then
691 do jp = 1, op%stencil%size
693 if (index <= mesh%np)
then
695 do lp = 1, op%stencil%size
701 else if (index <= mesh%np + mesh%pv%np_ghost)
then
704 do lp = 1, op%stencil%size
706 op%stencil%points(1:mesh%box%dim, lp))
715 opt%w(1:op%stencil%size, 1) = op%w(1:op%stencil%size, 1)
720 if (mesh%parallel_in_domains)
then
721 safe_deallocate_p(vol_pp)
737 select case (function_opencl)
744 if (op%mesh%parallel_in_domains)
then
761 safe_deallocate_a(op%inner%imin)
762 safe_deallocate_a(op%inner%imax)
763 safe_deallocate_a(op%inner%ri)
764 safe_deallocate_a(op%outer%imin)
765 safe_deallocate_a(op%outer%imax)
766 safe_deallocate_a(op%outer%ri)
768 safe_deallocate_a(op%w)
770 safe_deallocate_a(op%ri)
771 safe_deallocate_a(op%rimap)
772 safe_deallocate_a(op%rimap_inv)
773 safe_deallocate_a(op%nn)
784 integer,
intent(in) :: is
785 integer,
intent(in) :: ip
787 res = ip + op%ri(is, op%rimap(ip))
798 if (accel_is_enabled() .and. op%const_w)
then
799 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
800 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
815 if (accel_is_enabled() .and. op%const_w)
then
816 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
817 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
832 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
833 np_bc = max(np_bc, ii)
848#include "nl_operator_inc.F90"
851#include "complex.F90"
852#include "nl_operator_inc.F90"
subroutine, public accel_release_buffer(this)
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
pure logical function, public accel_is_enabled()
integer, parameter, public accel_mem_read_only
integer pure function, public accel_max_workgroup_size()
This module implements batches of mesh functions.
Module implementing boundary conditions in Octopus.
subroutine, public dpar_vec_ghost_update(pv, v_local)
Updates ghost points of every node.
real(real64), parameter, public m_zero
real(real64), parameter, public m_half
real(real64), parameter, public m_one
This module implements the index, used for the mesh points.
This module is intended to contain "only mathematical" functions and procedures.
subroutine, public weights(N, M, cc, side)
Compute the weights for finite-difference calculations:
This module defines the meshes, which are used in Octopus.
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
logical pure function, public mesh_compact_boundaries(mesh)
subroutine, public messages_obsolete_variable(namespace, name, rep)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_experimental(name, namespace)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
This module handles the communicators for the various parallelization strategies.
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
subroutine, public nl_operator_init(op, label)
initialize an instance of a non-local operator by setting the label
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_copy(opo, opi)
subroutine, public nl_operator_output_weights(this)
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
logical pure function, public nl_operator_compact_boundaries()
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
logical compact_boundaries
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
subroutine, public nl_operator_adjoint(op, opt, mesh, self_adjoint)
opt has to be initialised and built.
integer pure function, public nl_operator_np_zero_bc(op)
subroutine, public znl_operator_operate_diag(op, fo)
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Some general things and nomenclature:
This module defines stencils used in Octopus.
subroutine, public stencil_end(this)
subroutine, public stencil_copy(input, output)
type(type_t), public type_float
type(type_t), public type_integer
Describes mesh distribution to nodes.
index type for non-local operators
data type for non local operators