78 integer,
allocatable :: imin(:)
79 integer,
allocatable :: imax(:)
80 integer,
allocatable :: ri(:, :)
86 type(stencil_t),
public :: stencil
87 type(mesh_t),
pointer :: mesh => null()
88 integer,
allocatable :: nn(:)
89 integer,
public :: np = 0
91 real(real64),
allocatable,
public :: w(:,:)
93 logical,
public :: const_w = .
true.
95 type(accel_mem_t),
public :: buff_weights
96 type(accel_mem_t),
public :: buff_half_weights
98 character(len=40) :: label
101 integer,
public :: nri = 0
102 integer,
allocatable,
public :: ri(:,:)
103 integer,
allocatable,
public :: rimap(:)
104 integer,
allocatable,
public :: rimap_inv(:)
106 integer :: ninner = 0
107 integer :: nouter = 0
109 type(nl_operator_index_t) :: inner
110 type(nl_operator_index_t) :: outer
112 type(accel_kernel_t) :: kernel
113 type(accel_mem_t) :: buff_imin
114 type(accel_mem_t) :: buff_imax
115 type(accel_mem_t) :: buff_ri
116 type(accel_mem_t) :: buff_map
117 type(accel_mem_t) :: buff_all
118 type(accel_mem_t) :: buff_inner
119 type(accel_mem_t) :: buff_outer
120 type(accel_mem_t) :: buff_stencil
121 type(accel_mem_t) :: buff_ip_to_xyz
122 type(accel_mem_t) :: buff_xyz_to_ip
125 type(nl_operator_t),
public,
pointer :: coarser => null()
129 integer,
parameter :: &
135 integer,
parameter :: &
142 logical :: compact_boundaries
147 integer,
intent(in) :: opid, type
151 integer :: dfunction_global = -1
152 integer :: zfunction_global = -1
153 integer :: function_opencl
161 type(namespace_t),
intent(in) :: namespace
198 call parse_variable(namespace,
'OperateComplex', default, zfunction_global)
233 call parse_variable(namespace,
'NLOperatorCompactBoundaries', .false., compact_boundaries)
235 if (compact_boundaries)
then
256 character(len=*),
intent(in) :: label
284 safe_allocate_source_a(opo%nn, opi%nn)
285 safe_allocate_source_a(opo%w, opi%w)
287 opo%const_w = opi%const_w
290 assert(
allocated(opi%ri))
292 safe_allocate_source_a(opo%ri, opi%ri)
293 safe_allocate_source_a(opo%rimap, opi%rimap)
294 safe_allocate_source_a(opo%rimap_inv, opi%rimap_inv)
296 if (opi%mesh%parallel_in_domains)
then
297 opo%inner%nri = opi%inner%nri
298 safe_allocate_source_a(opo%inner%imin, opi%inner%imin)
299 safe_allocate_source_a(opo%inner%imax, opi%inner%imax)
300 safe_allocate_source_a(opo%inner%ri, opi%inner%ri)
302 opo%outer%nri = opi%outer%nri
303 safe_allocate_source_a(opo%outer%imin, opi%outer%imin)
304 safe_allocate_source_a(opo%outer%imax, opi%outer%imax)
305 safe_allocate_source_a(opo%outer%ri, opi%outer%ri)
323 class(
space_t),
intent(in) :: space
324 type(
mesh_t),
target,
intent(in) :: mesh
326 integer,
intent(in) :: np
327 logical,
optional,
intent(in) :: const_w
328 logical,
optional,
intent(in) :: regenerate
330 integer :: ii, jj, p1(space%dim), time, current
331 integer,
allocatable :: st1(:), st2(:), st1r(:)
333 integer :: ir, maxp, iinner, iouter
334 logical :: change, force_change
335 character(len=200) :: flags
336 integer,
allocatable :: inner_points(:), outer_points(:), all_points(:)
340 if (mesh%parallel_in_domains .and. .not. const_w)
then
350 if (
present(const_w)) op%const_w = const_w
354 safe_allocate(op%w(1:op%stencil%size, 1))
355 message(1) =
'Debug: nl_operator_build: working with constant weights.'
358 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
359 message(1) =
'Debug: nl_operator_build: working with non-constant weights.'
367 safe_allocate(st1(1:op%stencil%size))
368 safe_allocate(st1r(1:op%stencil%size))
369 safe_allocate(st2(1:op%stencil%size))
378 do jj = 1, op%stencil%size
387 st1(jj) = min(st1(jj), mesh%np + 1)
392 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
394 change = any(st1 /= st2)
398 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
402 do jj = 1, op%stencil%size
403 if (st1(jj) + ii > mesh%np .and. st2(jj) + ii - 1 > mesh%np .and. st2(jj) + ii <= mesh%np_part)
then
410 change = any(st1r /= st2)
412 if (.not. change) st1(:) = st1r(:)
416 if (change .or. force_change)
then
421 if (time == 1) op%nri = op%nri + 1
425 current = current + 1
426 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
430 if (time == 2) op%rimap(ii) = current
436 safe_deallocate_a(op%ri)
437 safe_deallocate_a(op%rimap)
438 safe_deallocate_a(op%rimap_inv)
440 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
441 safe_allocate(op%rimap(1:op%np))
442 safe_allocate(op%rimap_inv(1:op%nri + 1))
449 if (mesh%use_curvilinear)
then
450 safe_allocate(op%nn(1:op%nri))
452 op%nn = op%stencil%size
461 op%rimap_inv(op%rimap(jj) + 1) = jj
463 op%rimap_inv(op%nri + 1) = op%np
465 safe_deallocate_a(st1)
466 safe_deallocate_a(st1r)
467 safe_deallocate_a(st2)
470 nn = op%rimap_inv(jj + 1) - op%rimap_inv(jj)
473 if (op%mesh%parallel_in_domains)
then
480 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
483 op%inner%nri = op%inner%nri + 1
484 assert(op%inner%nri <= op%nri)
487 op%outer%nri = op%outer%nri + 1
488 assert(op%outer%nri <= op%nri)
492 assert(op%inner%nri + op%outer%nri == op%nri)
495 safe_deallocate_a(op%inner%imin)
496 safe_deallocate_a(op%inner%imax)
497 safe_deallocate_a(op%inner%ri)
498 safe_deallocate_a(op%outer%imin)
499 safe_deallocate_a(op%outer%imax)
500 safe_deallocate_a(op%outer%ri)
502 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
503 safe_allocate(op%inner%imax(1:op%inner%nri))
504 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
506 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
507 safe_allocate(op%outer%imax(1:op%outer%nri))
508 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
514 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
518 op%inner%imin(iinner) = op%rimap_inv(ir)
519 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
520 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
524 op%outer%imin(iouter) = op%rimap_inv(ir)
525 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
526 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
531 do ir = 1, op%inner%nri
532 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
533 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
541 write(flags,
'(i5)') op%stencil%size
542 flags=
'-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
544 if (op%mesh%parallel_in_domains) flags =
'-DINDIRECT '//trim(flags)
546 select case (function_opencl)
557 select case (function_opencl)
569 if (op%mesh%parallel_in_domains)
then
571 safe_allocate(inner_points(1:op%mesh%np))
572 safe_allocate(outer_points(1:op%mesh%np))
573 safe_allocate(all_points(1:op%mesh%np))
578 do ii = 1, op%mesh%np
579 all_points(ii) = ii - 1
580 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
581 if (maxp <= op%mesh%np)
then
582 op%ninner = op%ninner + 1
583 inner_points(op%ninner) = ii - 1
585 op%nouter = op%nouter + 1
586 outer_points(op%nouter) = ii - 1
599 safe_deallocate_a(inner_points)
600 safe_deallocate_a(outer_points)
601 safe_deallocate_a(all_points)
615 integer :: istencil, idir
619 write(
message(1),
'(3a)')
'Debug info: Finite difference weights for ', trim(this%label),
'.'
620 write(
message(2),
'(a)')
' Spacing:'
621 do idir = 1, this%mesh%box%dim
622 write(
message(2),
'(a,f16.8)') trim(
message(2)), this%mesh%spacing(idir)
626 do istencil = 1, this%stencil%size
627 select case(this%mesh%box%dim)
629 write(
message(1),
'(a,i3,1i4,f25.10)')
' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
631 write(
message(1),
'(a,i3,2i4,f25.10)')
' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
633 write(
message(1),
'(a,i3,3i4,f25.10)')
' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
651 select case (function_opencl)
658 if (op%mesh%parallel_in_domains)
then
675 safe_deallocate_a(op%inner%imin)
676 safe_deallocate_a(op%inner%imax)
677 safe_deallocate_a(op%inner%ri)
678 safe_deallocate_a(op%outer%imin)
679 safe_deallocate_a(op%outer%imax)
680 safe_deallocate_a(op%outer%ri)
682 safe_deallocate_a(op%w)
684 safe_deallocate_a(op%ri)
685 safe_deallocate_a(op%rimap)
686 safe_deallocate_a(op%rimap_inv)
687 safe_deallocate_a(op%nn)
698 integer,
intent(in) :: is
699 integer,
intent(in) :: ip
701 res = ip + op%ri(is, op%rimap(ip))
712 if (accel_is_enabled() .and. op%const_w)
then
713 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
714 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
729 if (accel_is_enabled() .and. op%const_w)
then
730 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
731 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
746 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
747 np_bc = max(np_bc, ii)
762#include "nl_operator_inc.F90"
765#include "complex.F90"
766#include "nl_operator_inc.F90"
subroutine, public accel_release_buffer(this)
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
pure logical function, public accel_is_enabled()
integer, parameter, public accel_mem_read_only
integer pure function, public accel_max_workgroup_size()
This module implements batches of mesh functions.
Module implementing boundary conditions in Octopus.
real(real64), parameter, public m_zero
real(real64), parameter, public m_half
This module implements the index, used for the mesh points.
This module is intended to contain "only mathematical" functions and procedures.
This module defines the meshes, which are used in Octopus.
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
logical pure function, public mesh_compact_boundaries(mesh)
subroutine, public messages_obsolete_variable(namespace, name, rep)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_experimental(name, namespace)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
This module handles the communicators for the various parallelization strategies.
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
subroutine, public nl_operator_init(op, label)
initialize an instance of a non-local operator by setting the label
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_copy(opo, opi)
subroutine, public nl_operator_output_weights(this)
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
logical pure function, public nl_operator_compact_boundaries()
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
logical compact_boundaries
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
integer pure function, public nl_operator_np_zero_bc(op)
subroutine, public znl_operator_operate_diag(op, fo)
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Some general things and nomenclature:
This module defines stencils used in Octopus.
subroutine, public stencil_end(this)
subroutine, public stencil_copy(input, output)
type(type_t), public type_float
type(type_t), public type_integer
Describes mesh distribution to nodes.
index type for non-local operators
data type for non local operators