79 integer,
allocatable :: imin(:)
80 integer,
allocatable :: imax(:)
81 integer,
allocatable :: ri(:, :)
82 integer,
allocatable :: ri_pos(:,:,:)
83 integer,
allocatable :: ri_neg(:,:,:)
86 integer,
public,
parameter :: &
94 type(stencil_t),
public :: stencil
95 type(mesh_t),
pointer :: mesh => null()
96 integer,
allocatable :: nn(:)
97 integer,
public :: np = 0
99 real(real64),
allocatable,
public :: w(:,:)
101 logical,
public :: const_w = .
true.
103 type(accel_mem_t),
public :: buff_weights
104 type(accel_mem_t),
public :: buff_half_weights
106 integer :: symmetry = op_general
108 character(len=40) :: label
111 integer,
public :: nri = 0
112 integer,
allocatable,
public :: ri(:,:)
113 integer,
allocatable,
public :: rimap(:)
114 integer,
allocatable,
public :: rimap_inv(:)
117 integer :: npairs = 0
118 real(real64),
allocatable :: wpair(:)
119 real(real64) :: wcenter
120 integer,
allocatable :: ri_pos(:,:,:)
121 integer,
allocatable :: ri_neg(:,:,:)
122 integer :: max_allocated_ri_pair = 0
124 integer :: ninner = 0
125 integer :: nouter = 0
127 type(nl_operator_index_t) :: inner
128 type(nl_operator_index_t) :: outer
130 type(accel_kernel_t) :: kernel
131 type(accel_mem_t) :: buff_imin
132 type(accel_mem_t) :: buff_imax
133 type(accel_mem_t) :: buff_ri
134 type(accel_mem_t) :: buff_map
135 type(accel_mem_t) :: buff_all
136 type(accel_mem_t) :: buff_inner
137 type(accel_mem_t) :: buff_outer
138 type(accel_mem_t) :: buff_stencil
139 type(accel_mem_t) :: buff_ip_to_xyz
140 type(accel_mem_t) :: buff_xyz_to_ip
143 type(nl_operator_t),
public,
pointer :: coarser => null()
147 integer,
parameter :: &
153 integer,
parameter :: &
163 integer,
intent(in) :: opid, type
167 integer :: dfunction_global = -1
168 integer :: zfunction_global = -1
169 integer :: function_accel
170 logical :: use_symmetries = .false.
267 character(len=*),
intent(in) :: label
268 integer,
optional,
intent(in) :: symm
273 op%symmetry = op_general
274 if (use_symmetries)
then
292 class(
space_t),
intent(in) :: space
293 type(
mesh_t),
target,
intent(in) :: mesh
295 integer,
intent(in) :: np
296 logical,
optional,
intent(in) :: const_w
297 logical,
optional,
intent(in) :: regenerate
299 integer :: ii, jj, p1(space%dim), time, current
300 integer,
allocatable :: st1(:), st2(:), st1r(:)
301 integer :: ir, maxp, iinner, iouter
302 logical :: change, force_change
303 character(len=200) :: flags
304 integer,
allocatable :: inner_points(:), outer_points(:), all_points(:)
308 if (mesh%parallel_in_domains .and. .not. const_w)
then
323 safe_allocate(op%w(1:op%stencil%size, 1))
324 message(1) =
'Debug: nl_operator_build: working with constant weights.'
327 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
328 message(1) =
'Debug: nl_operator_build: working with non-constant weights.'
336 safe_allocate(st1(1:op%stencil%size))
337 safe_allocate(st1r(1:op%stencil%size))
338 safe_allocate(st2(1:op%stencil%size))
347 do jj = 1, op%stencil%size
354 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
356 change = any(st1 /= st2)
360 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
363 if (change .or. force_change)
then
368 if (time == 1) op%nri = op%nri + 1
372 current = current + 1
373 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
377 if (time == 2) op%rimap(ii) = current
383 safe_deallocate_a(op%ri)
384 safe_deallocate_a(op%rimap)
385 safe_deallocate_a(op%rimap_inv)
387 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
388 safe_allocate(op%rimap(1:op%np))
389 safe_allocate(op%rimap_inv(1:op%nri + 1))
396 if (mesh%use_curvilinear)
then
397 safe_allocate(op%nn(1:op%nri))
399 op%nn = op%stencil%size
408 op%rimap_inv(op%rimap(jj) + 1) = jj
410 op%rimap_inv(op%nri + 1) = op%np
412 safe_deallocate_a(st1)
413 safe_deallocate_a(st1r)
414 safe_deallocate_a(st2)
416 if (op%mesh%parallel_in_domains)
then
423 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
426 op%inner%nri = op%inner%nri + 1
427 assert(op%inner%nri <= op%nri)
430 op%outer%nri = op%outer%nri + 1
431 assert(op%outer%nri <= op%nri)
435 assert(op%inner%nri + op%outer%nri == op%nri)
438 safe_deallocate_a(op%inner%imin)
439 safe_deallocate_a(op%inner%imax)
440 safe_deallocate_a(op%inner%ri)
441 safe_deallocate_a(op%outer%imin)
442 safe_deallocate_a(op%outer%imax)
443 safe_deallocate_a(op%outer%ri)
445 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
446 safe_allocate(op%inner%imax(1:op%inner%nri))
447 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
449 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
450 safe_allocate(op%outer%imax(1:op%outer%nri))
451 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
457 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
461 op%inner%imin(iinner) = op%rimap_inv(ir)
462 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
463 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
467 op%outer%imin(iouter) = op%rimap_inv(ir)
468 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
469 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
474 do ir = 1, op%inner%nri
475 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
476 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
484 write(flags,
'(i5)') op%stencil%size
485 flags=
'-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
487 if (op%mesh%parallel_in_domains) flags =
'-DINDIRECT '//trim(flags)
489 select case (function_accel)
500 select case (function_accel)
512 if (op%mesh%parallel_in_domains)
then
514 safe_allocate(inner_points(1:op%mesh%np))
515 safe_allocate(outer_points(1:op%mesh%np))
516 safe_allocate(all_points(1:op%mesh%np))
521 do ii = 1, op%mesh%np
522 all_points(ii) = ii - 1
523 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
524 if (maxp <= op%mesh%np)
then
525 op%ninner = op%ninner + 1
526 inner_points(op%ninner) = ii - 1
528 op%nouter = op%nouter + 1
529 outer_points(op%nouter) = ii - 1
542 safe_deallocate_a(inner_points)
543 safe_deallocate_a(outer_points)
544 safe_deallocate_a(all_points)
558 integer :: istencil, idir
562 write(
message(1),
'(3a)')
'Debug info: Finite difference weights for ', trim(this%label),
'.'
563 write(
message(2),
'(a)')
' Spacing:'
564 do idir = 1, this%mesh%box%dim
565 write(
message(2),
'(a,f16.8)') trim(
message(2)), this%mesh%spacing(idir)
569 do istencil = 1, this%stencil%size
570 select case(this%mesh%box%dim)
572 write(
message(1),
'(a,i3,1i4,f25.10)')
' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
574 write(
message(1),
'(a,i3,2i4,f25.10)')
' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
576 write(
message(1),
'(a,i3,3i4,f25.10)')
' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
595 safe_deallocate_a(op%inner%imin)
596 safe_deallocate_a(op%inner%imax)
597 safe_deallocate_a(op%inner%ri)
598 safe_deallocate_a(op%outer%imin)
599 safe_deallocate_a(op%outer%imax)
600 safe_deallocate_a(op%outer%ri)
602 safe_deallocate_a(op%w)
604 safe_deallocate_a(op%ri)
605 safe_deallocate_a(op%rimap)
606 safe_deallocate_a(op%rimap_inv)
607 safe_deallocate_a(op%nn)
609 safe_deallocate_a(op%wpair)
610 safe_deallocate_a(op%ri_pos)
611 safe_deallocate_a(op%ri_neg)
613 safe_deallocate_a(op%inner%ri_pos)
614 safe_deallocate_a(op%inner%ri_neg)
615 safe_deallocate_a(op%outer%ri_pos)
616 safe_deallocate_a(op%outer%ri_neg)
629 select case (function_accel)
636 if (op%mesh%parallel_in_domains)
then
658 integer,
intent(in) :: is
659 integer,
intent(in) :: ip
661 res = ip + op%ri(is, op%rimap(ip))
672 if (accel_is_enabled() .and. op%const_w)
then
673 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
674 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
689 if (accel_is_enabled() .and. op%const_w)
then
690 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
691 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
706 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
707 np_bc = max(np_bc, ii)
717 type(space_t),
intent(in) :: space
718 class(mesh_t),
intent(in) :: mesh
721 real(real64),
parameter :: tol = 1.0e-14_real64
722 real(real64) :: max_weight, new_w(op%stencil%size)
723 integer :: new_points(space%dim, op%stencil%size)
725 if (.not. op%const_w)
return
729 max_weight = maxval(abs(op%w(:, 1)))
731 do ip = 1, op%stencil%size
732 if (abs(op%w(ip, 1)) > tol * max_weight)
then
734 new_w(size) = op%w(ip, 1)
735 new_points(:,size) = op%stencil%points(:, ip)
740 op%stencil%size =
size
741 safe_deallocate_a(op%stencil%points)
742 safe_allocate(op%stencil%points(space%dim, op%stencil%size))
743 op%stencil%points(:, :) = new_points(:, 1:size)
744 safe_deallocate_a(op%w)
747 op%w(1:
size, 1) = new_w(1:size)
750 call stencil_init_center(op%stencil)
756 subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
757 integer,
intent(in) :: size
758 integer,
intent(in) :: ldf
759 integer,
intent(in) :: offsets(:, :)
760 real(real64),
intent(in) :: wre(:)
761 integer,
intent(in) :: ri(:, :)
762 integer,
intent(in) :: nri
763 integer,
intent(out) :: npairs
764 real(real64),
intent(inout) :: wpair(:)
765 integer,
intent(inout) :: pair_pos(:,:), pair_neg(:,:)
766 real(real64),
intent(out) :: wcenter
768 logical,
allocatable :: used(:)
769 integer :: i, j, ndim, s
771 integer,
allocatable :: idx(:)
773 real(real64),
parameter :: tol = 1.0e-12_real64
777 assert(mod(
size,2) == 1)
779 safe_allocate(used(1:size))
783 ndim = ubound(offsets, dim=1)
785 safe_allocate(idx(1:size))
786 call robust_sort_by_abs(wre, offsets, idx)
791 if (all(offsets(:, idx(i))==0))
then
792 wcenter = wre(idx(i))
802 same = abs(wre(idx(i)) - wre(idx(j))) <= tol
803 if (.not. same) cycle
806 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
810 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
811 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
813 wpair(npairs) = m_half*(wre(idx(i)) + wre(idx(j)))
821 assert(npairs == size/2)
823 safe_deallocate_a(idx)
830 integer,
intent(in) :: size
831 integer,
intent(in) :: ldf
832 integer,
intent(in) :: offsets(:, :)
833 real(real64),
intent(in) :: wre(:)
834 integer,
intent(in) :: ri(:, :)
835 integer,
intent(in) :: nri
836 integer,
intent(out) :: npairs
837 real(real64),
intent(inout) :: wpair(:)
838 integer,
intent(inout) :: pair_pos(:,:), pair_neg(:,:)
840 logical,
allocatable :: used(:)
841 integer :: i, j, ndim, s
843 integer,
allocatable :: idx(:)
845 real(real64),
parameter :: tol = 1.0e-12_real64
849 assert(mod(
size,2) == 0)
851 safe_allocate(used(1:size))
855 ndim = ubound(offsets, dim=1)
857 safe_allocate(idx(1:size))
858 call robust_sort_by_abs(wre, offsets, idx)
869 same = abs(wre(idx(i)) + wre(idx(j))) <= tol
870 if (.not. same) cycle
873 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
877 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
878 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
880 wpair(npairs) = m_half*(wre(idx(i)) - wre(idx(j)))
888 assert(npairs == size/2)
890 safe_deallocate_a(idx)
897 integer,
optional,
intent(in) :: max_size
899 integer :: ldf, start, end, ipair
906 assert(conf%target_states_block_size > 0)
908 if(
present(max_size))
then
909 start = op%max_allocated_ri_pair + 1
911 call reallocate_array(op%ri_pos, op%stencil%size/2, op%nri, op%max_allocated_ri_pair,
end)
912 call reallocate_array(op%ri_neg, op%stencil%size/2, op%nri, op%max_allocated_ri_pair,
end)
913 if (op%mesh%parallel_in_domains)
then
914 call reallocate_array(op%inner%ri_pos, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair,
end)
915 call reallocate_array(op%inner%ri_neg, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair,
end)
916 call reallocate_array(op%outer%ri_pos, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair,
end)
917 call reallocate_array(op%outer%ri_neg, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair,
end)
921 safe_allocate(op%wpair(1:op%stencil%size/2))
923 end = log2(conf%target_states_block_size)+1
925 safe_allocate(op%ri_pos(1:op%stencil%size/2, 1:op%nri, 1:
end))
926 safe_allocate(op%ri_neg(1:op%stencil%size/2, 1:op%nri, 1:
end))
927 if (op%mesh%parallel_in_domains)
then
928 safe_allocate(op%inner%ri_pos(1:op%stencil%size/2, 1:op%inner%nri, 1:
end))
929 safe_allocate(op%inner%ri_neg(1:op%stencil%size/2, 1:op%inner%nri, 1:
end))
930 safe_allocate(op%outer%ri_pos(1:op%stencil%size/2, 1:op%outer%nri, 1:
end))
931 safe_allocate(op%outer%ri_neg(1:op%stencil%size/2, 1:op%outer%nri, 1:
end))
934 op%max_allocated_ri_pair =
end
936 do ldf = start-1,
end-1
937 select case(op%symmetry)
939 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
940 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1), op%wcenter)
941 if (op%mesh%parallel_in_domains)
then
942 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
943 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1), op%wcenter)
944 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
945 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1), op%wcenter)
949 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1))
950 if (op%mesh%parallel_in_domains)
then
951 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
952 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1))
953 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
954 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1))
959 if (.not.
present(max_size))
then
960 write(message(1),
'(3a)')
'Debug info: Sorted weights for ', trim(op%label),
'.'
961 call messages_info(1, debug_only=.
true.)
963 do ipair = 1, op%npairs
964 write(message(1),
'(a,i3,f25.10,2(1x,i4))')
' ', ipair, op%wpair(ipair), op%ri_pos(ipair,1,1), op%ri_neg(ipair,1,1)
965 call messages_info(1, debug_only=.
true.)
974 integer,
allocatable,
intent(inout) :: ri(:,:,:)
975 integer,
intent(in) :: stencil_size, nri
976 integer,
intent(in) :: old_size, new_size
978 integer,
allocatable :: tmp(:,:,:)
980 safe_allocate_source_a(tmp, ri)
981 safe_deallocate_a(ri)
982 safe_allocate(ri(1:stencil_size, 1:nri, 1:new_size))
983 ri(:,:,1:old_size) = tmp
984 safe_deallocate_a(tmp)
989#include "nl_operator_inc.F90"
992#include "complex.F90"
993#include "nl_operator_inc.F90"
subroutine, public accel_free_buffer(this, async)
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
pure logical function, public accel_is_enabled()
integer, parameter, public accel_mem_read_only
integer pure function, public accel_max_block_size()
This module implements batches of mesh functions.
Module implementing boundary conditions in Octopus.
real(real64), parameter, public m_zero
This module implements the index, used for the mesh points.
This module is intended to contain "only mathematical" functions and procedures.
This module defines the meshes, which are used in Octopus.
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
subroutine, public messages_not_implemented(feature, namespace)
subroutine, public messages_obsolete_variable(namespace, name, rep)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_experimental(name, namespace)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
This module handles the communicators for the various parallelization strategies.
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public nl_operator_init(op, label, symm)
initialize an instance of a non-local operator by setting the label
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine nl_operator_clear_gpu_buffers(op)
subroutine, public nl_operator_build_symmetric_weights(op, max_size)
Builds (or rebuild) the necessary arrays for symmetric and antisymmetric stencils.
subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_output_weights(this)
integer, parameter, public op_general
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_remove_zero_weight_points(op, space, mesh)
Removes the zero-weight points for constant weight stencils.
integer, parameter, public op_symmetric
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
Creates the nonlocal operators for the stencils used for finite differences.
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
integer pure function, public nl_operator_np_zero_bc(op)
integer, parameter, public op_antisymmetric
subroutine, public znl_operator_operate_diag(op, fo)
subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
Reallocate an ri array.
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Some general things and nomenclature:
This module is intended to contain "only mathematical" functions and procedures.
This module defines stencils used in Octopus.
subroutine, public stencil_end(this)
type(type_t), public type_integer
Describes mesh distribution to nodes.
index type for non-local operators
data type for non local operators