79 integer,
allocatable :: imin(:)
80 integer,
allocatable :: imax(:)
81 integer,
allocatable :: ri(:, :)
82 integer,
allocatable :: ri_pos(:,:,:)
83 integer,
allocatable :: ri_neg(:,:,:)
86 integer,
public,
parameter :: &
94 type(stencil_t),
public :: stencil
95 type(mesh_t),
pointer :: mesh => null()
96 integer,
allocatable :: nn(:)
97 integer,
public :: np = 0
99 real(real64),
allocatable,
public :: w(:,:)
101 logical,
public :: const_w = .
true.
103 type(accel_mem_t),
public :: buff_weights
104 type(accel_mem_t),
public :: buff_half_weights
106 integer :: symmetry = op_general
108 character(len=40) :: label
111 integer,
public :: nri = 0
112 integer,
allocatable,
public :: ri(:,:)
113 integer,
allocatable,
public :: rimap(:)
114 integer,
allocatable,
public :: rimap_inv(:)
117 integer :: npairs = 0
118 real(real64),
allocatable :: wpair(:)
119 real(real64) :: wcenter
120 integer,
allocatable :: ri_pos(:,:,:)
121 integer,
allocatable :: ri_neg(:,:,:)
122 integer :: max_allocated_ri_pair = 0
124 integer :: ninner = 0
125 integer :: nouter = 0
127 type(nl_operator_index_t) :: inner
128 type(nl_operator_index_t) :: outer
130 type(accel_kernel_t) :: kernel
131 type(accel_mem_t) :: buff_imin
132 type(accel_mem_t) :: buff_imax
133 type(accel_mem_t) :: buff_ri
134 type(accel_mem_t) :: buff_map
135 type(accel_mem_t) :: buff_all
136 type(accel_mem_t) :: buff_inner
137 type(accel_mem_t) :: buff_outer
138 type(accel_mem_t) :: buff_stencil
139 type(accel_mem_t) :: buff_ip_to_xyz
140 type(accel_mem_t) :: buff_xyz_to_ip
143 type(nl_operator_t),
public,
pointer :: coarser => null()
147 integer,
parameter :: &
153 integer,
parameter :: &
163 integer,
intent(in) :: opid, type
167 integer :: dfunction_global = -1
168 integer :: zfunction_global = -1
169 integer :: function_accel
170 logical :: use_symmetries = .false.
267 character(len=*),
intent(in) :: label
268 integer,
optional,
intent(in) :: symm
273 op%symmetry = op_general
274 if (use_symmetries)
then
292 class(
space_t),
intent(in) :: space
293 type(
mesh_t),
target,
intent(in) :: mesh
295 integer,
intent(in) :: np
296 logical,
optional,
intent(in) :: const_w
297 logical,
optional,
intent(in) :: regenerate
299 integer :: ii, jj, p1(space%dim), time, current
300 integer,
allocatable :: st1(:), st2(:), st1r(:)
301 integer :: ir, maxp, iinner, iouter
302 logical :: change, force_change
303 character(len=200) :: flags
304 integer,
allocatable :: inner_points(:), outer_points(:), all_points(:)
308 if (mesh%parallel_in_domains .and. .not. const_w)
then
323 safe_allocate(op%w(1:op%stencil%size, 1))
324 message(1) =
'Debug: nl_operator_build: working with constant weights.'
327 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
328 message(1) =
'Debug: nl_operator_build: working with non-constant weights.'
336 safe_allocate(st1(1:op%stencil%size))
337 safe_allocate(st1r(1:op%stencil%size))
338 safe_allocate(st2(1:op%stencil%size))
347 do jj = 1, op%stencil%size
354 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
356 change = any(st1 /= st2)
360 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
363 if (change .or. force_change)
then
368 if (time == 1) op%nri = op%nri + 1
372 current = current + 1
373 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
377 if (time == 2) op%rimap(ii) = current
383 safe_deallocate_a(op%ri)
384 safe_deallocate_a(op%rimap)
385 safe_deallocate_a(op%rimap_inv)
387 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
388 safe_allocate(op%rimap(1:op%np))
389 safe_allocate(op%rimap_inv(1:op%nri + 1))
396 if (mesh%use_curvilinear)
then
397 safe_allocate(op%nn(1:op%nri))
399 op%nn = op%stencil%size
408 op%rimap_inv(op%rimap(jj) + 1) = jj
410 op%rimap_inv(op%nri + 1) = op%np
412 safe_deallocate_a(st1)
413 safe_deallocate_a(st1r)
414 safe_deallocate_a(st2)
416 if (op%mesh%parallel_in_domains)
then
423 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
426 op%inner%nri = op%inner%nri + 1
427 assert(op%inner%nri <= op%nri)
430 op%outer%nri = op%outer%nri + 1
431 assert(op%outer%nri <= op%nri)
435 assert(op%inner%nri + op%outer%nri == op%nri)
438 safe_deallocate_a(op%inner%imin)
439 safe_deallocate_a(op%inner%imax)
440 safe_deallocate_a(op%inner%ri)
441 safe_deallocate_a(op%outer%imin)
442 safe_deallocate_a(op%outer%imax)
443 safe_deallocate_a(op%outer%ri)
445 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
446 safe_allocate(op%inner%imax(1:op%inner%nri))
447 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
449 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
450 safe_allocate(op%outer%imax(1:op%outer%nri))
451 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
457 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
461 op%inner%imin(iinner) = op%rimap_inv(ir)
462 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
463 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
467 op%outer%imin(iouter) = op%rimap_inv(ir)
468 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
469 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
474 do ir = 1, op%inner%nri
475 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
476 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
484 write(flags,
'(i5)') op%stencil%size
485 flags=
'-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
487 if (op%mesh%parallel_in_domains) flags =
'-DINDIRECT '//trim(flags)
489 select case (function_accel)
500 select case (function_accel)
512 if (op%mesh%parallel_in_domains)
then
514 safe_allocate(inner_points(1:op%mesh%np))
515 safe_allocate(outer_points(1:op%mesh%np))
516 safe_allocate(all_points(1:op%mesh%np))
521 do ii = 1, op%mesh%np
522 all_points(ii) = ii - 1
523 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
524 if (maxp <= op%mesh%np)
then
525 op%ninner = op%ninner + 1
526 inner_points(op%ninner) = ii - 1
528 op%nouter = op%nouter + 1
529 outer_points(op%nouter) = ii - 1
542 safe_deallocate_a(inner_points)
543 safe_deallocate_a(outer_points)
544 safe_deallocate_a(all_points)
558 integer :: istencil, idir
562 write(
message(1),
'(3a)')
'Debug info: Finite difference weights for ', trim(this%label),
'.'
563 write(
message(2),
'(a)')
' Spacing:'
564 do idir = 1, this%mesh%box%dim
565 write(
message(2),
'(a,f16.8)') trim(
message(2)), this%mesh%spacing(idir)
569 do istencil = 1, this%stencil%size
570 select case(this%mesh%box%dim)
572 write(
message(1),
'(a,i3,1i4,f25.10)')
' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
574 write(
message(1),
'(a,i3,2i4,f25.10)')
' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
576 write(
message(1),
'(a,i3,3i4,f25.10)')
' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
595 safe_deallocate_a(op%inner%imin)
596 safe_deallocate_a(op%inner%imax)
597 safe_deallocate_a(op%inner%ri)
598 safe_deallocate_a(op%outer%imin)
599 safe_deallocate_a(op%outer%imax)
600 safe_deallocate_a(op%outer%ri)
602 safe_deallocate_a(op%w)
604 safe_deallocate_a(op%ri)
605 safe_deallocate_a(op%rimap)
606 safe_deallocate_a(op%rimap_inv)
607 safe_deallocate_a(op%nn)
609 safe_deallocate_a(op%wpair)
610 safe_deallocate_a(op%ri_pos)
611 safe_deallocate_a(op%ri_neg)
613 safe_deallocate_a(op%inner%ri_pos)
614 safe_deallocate_a(op%inner%ri_neg)
615 safe_deallocate_a(op%outer%ri_pos)
616 safe_deallocate_a(op%outer%ri_neg)
629 select case (function_accel)
636 if (op%mesh%parallel_in_domains)
then
658 integer,
intent(in) :: is
659 integer,
intent(in) :: ip
661 res = ip + op%ri(is, op%rimap(ip))
672 if (accel_is_enabled() .and. op%const_w)
then
673 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
674 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
689 if (accel_is_enabled() .and. op%const_w)
then
690 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
691 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
706 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
707 np_bc = max(np_bc, ii)
717 type(space_t),
intent(in) :: space
718 class(mesh_t),
intent(in) :: mesh
721 real(real64),
parameter :: tol = 1.0e-14_real64
722 real(real64) :: max_weight, new_w(op%stencil%size)
723 integer :: new_points(space%dim, op%stencil%size)
725 if (.not. op%const_w)
return
729 max_weight = maxval(abs(op%w(:, 1)))
731 do ip = 1, op%stencil%size
732 if (abs(op%w(ip, 1)) > tol * max_weight)
then
734 new_w(size) = op%w(ip, 1)
735 new_points(:,size) = op%stencil%points(:, ip)
740 op%stencil%size =
size
741 safe_deallocate_a(op%stencil%points)
742 safe_allocate(op%stencil%points(space%dim, op%stencil%size))
743 op%stencil%points(:, :) = new_points(:, 1:size)
744 safe_deallocate_a(op%w)
747 op%w(1:
size, 1) = new_w(1:size)
753 subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
754 integer,
intent(in) :: size
755 integer,
intent(in) :: ldf
756 integer,
intent(in) :: offsets(:, :)
757 real(real64),
intent(in) :: wre(:)
758 integer,
intent(in) :: ri(:, :)
759 integer,
intent(in) :: nri
760 integer,
intent(out) :: npairs
761 real(real64),
intent(inout) :: wpair(:)
762 integer,
intent(inout) :: pair_pos(:,:), pair_neg(:,:)
763 real(real64),
intent(out) :: wcenter
765 logical,
allocatable :: used(:)
766 integer :: i, j, ndim, s
768 integer,
allocatable :: idx(:)
770 real(real64),
parameter :: tol = 1.0e-12_real64
774 assert(mod(
size,2) == 1)
776 safe_allocate(used(1:size))
780 ndim = ubound(offsets, dim=1)
782 safe_allocate(idx(1:size))
783 call robust_sort_by_abs(wre, offsets, idx)
788 if (all(offsets(:, idx(i))==0))
then
789 wcenter = wre(idx(i))
799 same = abs(wre(idx(i)) - wre(idx(j))) <= tol
800 if (.not. same) cycle
803 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
807 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
808 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
810 wpair(npairs) = m_half*(wre(idx(i)) + wre(idx(j)))
818 assert(npairs == size/2)
820 safe_deallocate_a(idx)
827 integer,
intent(in) :: size
828 integer,
intent(in) :: ldf
829 integer,
intent(in) :: offsets(:, :)
830 real(real64),
intent(in) :: wre(:)
831 integer,
intent(in) :: ri(:, :)
832 integer,
intent(in) :: nri
833 integer,
intent(out) :: npairs
834 real(real64),
intent(inout) :: wpair(:)
835 integer,
intent(inout) :: pair_pos(:,:), pair_neg(:,:)
837 logical,
allocatable :: used(:)
838 integer :: i, j, ndim, s
840 integer,
allocatable :: idx(:)
842 real(real64),
parameter :: tol = 1.0e-12_real64
846 assert(mod(
size,2) == 0)
848 safe_allocate(used(1:size))
852 ndim = ubound(offsets, dim=1)
854 safe_allocate(idx(1:size))
855 call robust_sort_by_abs(wre, offsets, idx)
866 same = abs(wre(idx(i)) + wre(idx(j))) <= tol
867 if (.not. same) cycle
870 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
874 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
875 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
877 wpair(npairs) = m_half*(wre(idx(i)) - wre(idx(j)))
885 assert(npairs == size/2)
887 safe_deallocate_a(idx)
894 integer,
optional,
intent(in) :: max_size
896 integer :: ldf, start, end, ipair
903 assert(conf%target_states_block_size > 0)
905 if(
present(max_size))
then
906 start = op%max_allocated_ri_pair + 1
908 call reallocate_array(op%ri_pos, op%stencil%size/2, op%nri, op%max_allocated_ri_pair,
end)
909 call reallocate_array(op%ri_neg, op%stencil%size/2, op%nri, op%max_allocated_ri_pair,
end)
910 if (op%mesh%parallel_in_domains)
then
911 call reallocate_array(op%inner%ri_pos, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair,
end)
912 call reallocate_array(op%inner%ri_neg, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair,
end)
913 call reallocate_array(op%outer%ri_pos, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair,
end)
914 call reallocate_array(op%outer%ri_neg, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair,
end)
918 safe_allocate(op%wpair(1:op%stencil%size/2))
920 end = log2(conf%target_states_block_size)+1
922 safe_allocate(op%ri_pos(1:op%stencil%size/2, 1:op%nri, 1:
end))
923 safe_allocate(op%ri_neg(1:op%stencil%size/2, 1:op%nri, 1:
end))
924 if (op%mesh%parallel_in_domains)
then
925 safe_allocate(op%inner%ri_pos(1:op%stencil%size/2, 1:op%inner%nri, 1:
end))
926 safe_allocate(op%inner%ri_neg(1:op%stencil%size/2, 1:op%inner%nri, 1:
end))
927 safe_allocate(op%outer%ri_pos(1:op%stencil%size/2, 1:op%outer%nri, 1:
end))
928 safe_allocate(op%outer%ri_neg(1:op%stencil%size/2, 1:op%outer%nri, 1:
end))
931 op%max_allocated_ri_pair =
end
933 do ldf = start-1,
end-1
934 select case(op%symmetry)
936 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
937 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1), op%wcenter)
938 if (op%mesh%parallel_in_domains)
then
939 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
940 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1), op%wcenter)
941 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
942 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1), op%wcenter)
946 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1))
947 if (op%mesh%parallel_in_domains)
then
948 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
949 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1))
950 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
951 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1))
956 if (.not.
present(max_size))
then
957 write(message(1),
'(3a)')
'Debug info: Sorted weights for ', trim(op%label),
'.'
958 call messages_info(1, debug_only=.
true.)
960 do ipair = 1, op%npairs
961 write(message(1),
'(a,i3,f25.10,2(1x,i4))')
' ', ipair, op%wpair(ipair), op%ri_pos(ipair,1,1), op%ri_neg(ipair,1,1)
962 call messages_info(1, debug_only=.
true.)
971 integer,
allocatable,
intent(inout) :: ri(:,:,:)
972 integer,
intent(in) :: stencil_size, nri
973 integer,
intent(in) :: old_size, new_size
975 integer,
allocatable :: tmp(:,:,:)
977 safe_allocate_source_a(tmp, ri)
978 safe_deallocate_a(ri)
979 safe_allocate(ri(1:stencil_size, 1:nri, 1:new_size))
980 ri(:,:,1:old_size) = tmp
981 safe_deallocate_a(tmp)
986#include "nl_operator_inc.F90"
989#include "complex.F90"
990#include "nl_operator_inc.F90"
subroutine, public accel_free_buffer(this, async)
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
pure logical function, public accel_is_enabled()
integer, parameter, public accel_mem_read_only
integer pure function, public accel_max_block_size()
This module implements batches of mesh functions.
Module implementing boundary conditions in Octopus.
real(real64), parameter, public m_zero
This module implements the index, used for the mesh points.
This module is intended to contain "only mathematical" functions and procedures.
This module defines the meshes, which are used in Octopus.
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
subroutine, public messages_not_implemented(feature, namespace)
subroutine, public messages_obsolete_variable(namespace, name, rep)
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_input_error(namespace, var, details, row, column)
subroutine, public messages_experimental(name, namespace)
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
This module handles the communicators for the various parallelization strategies.
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public nl_operator_init(op, label, symm)
initialize an instance of a non-local operator by setting the label
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine nl_operator_clear_gpu_buffers(op)
subroutine, public nl_operator_build_symmetric_weights(op, max_size)
Builds (or rebuild) the necessary arrays for symmetric and antisymmetric stencils.
subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_output_weights(this)
integer, parameter, public op_general
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_remove_zero_weight_points(op, space, mesh)
Removes the zero-weight points for constant weight stencils.
integer, parameter, public op_symmetric
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
Creates the nonlocal operators for the stencils used for finite differences.
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
integer pure function, public nl_operator_np_zero_bc(op)
integer, parameter, public op_antisymmetric
subroutine, public znl_operator_operate_diag(op, fo)
subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
Reallocate an ri array.
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Some general things and nomenclature:
This module is intended to contain "only mathematical" functions and procedures.
This module defines stencils used in Octopus.
subroutine, public stencil_end(this)
type(type_t), public type_integer
Describes mesh distribution to nodes.
index type for non-local operators
data type for non local operators