Octopus
nl_operator.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
24 use accel_oct_m
25 use batch_oct_m
27 use debug_oct_m
28 use global_oct_m
29 use index_oct_m
30 use iso_c_binding
31 use math_oct_m
32 use mesh_oct_m
34 use mpi_oct_m
39 use parser_oct_m
41 use space_oct_m
42 use sort_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50 public :: &
71
77 private
78 integer :: nri = 0
79 integer, allocatable :: imin(:)
80 integer, allocatable :: imax(:)
81 integer, allocatable :: ri(:, :)
82 integer, allocatable :: ri_pos(:,:,:)
83 integer, allocatable :: ri_neg(:,:,:)
84 end type nl_operator_index_t
85
86 integer, public, parameter :: &
87 OP_GENERAL = 1, &
88 op_symmetric = 2, &
90
92 type nl_operator_t
93 private
94 type(stencil_t), public :: stencil
95 type(mesh_t), pointer :: mesh => null()
96 integer, allocatable :: nn(:)
97 integer, public :: np = 0
98 ! When running in parallel mode, the next three arrays are unique on each node.
99 real(real64), allocatable, public :: w(:,:)
100
101 logical, public :: const_w = .true.
102
103 type(accel_mem_t), public :: buff_weights
104 type(accel_mem_t), public :: buff_half_weights
105
106 integer :: symmetry = op_general
107
108 character(len=40) :: label
109
111 integer, public :: nri = 0
112 integer, allocatable, public :: ri(:,:)
113 integer, allocatable, public :: rimap(:)
114 integer, allocatable, public :: rimap_inv(:)
115
117 integer :: npairs = 0
118 real(real64),allocatable :: wpair(:)
119 real(real64) :: wcenter
120 integer, allocatable :: ri_pos(:,:,:)
121 integer, allocatable :: ri_neg(:,:,:)
122 integer :: max_allocated_ri_pair = 0
123
124 integer :: ninner = 0
125 integer :: nouter = 0
126
127 type(nl_operator_index_t) :: inner
128 type(nl_operator_index_t) :: outer
129
130 type(accel_kernel_t) :: kernel
131 type(accel_mem_t) :: buff_imin
132 type(accel_mem_t) :: buff_imax
133 type(accel_mem_t) :: buff_ri
134 type(accel_mem_t) :: buff_map
135 type(accel_mem_t) :: buff_all
136 type(accel_mem_t) :: buff_inner
137 type(accel_mem_t) :: buff_outer
138 type(accel_mem_t) :: buff_stencil
139 type(accel_mem_t) :: buff_ip_to_xyz
140 type(accel_mem_t) :: buff_xyz_to_ip
141
142 ! For multigrid solvers
143 type(nl_operator_t), public, pointer :: coarser => null()
144
145 end type nl_operator_t
146
147 integer, parameter :: &
148 OP_FORTRAN = 0, &
149 op_vec = 1, &
150 op_min = op_fortran, &
151 op_max = op_vec
152
153 integer, parameter :: &
154 OP_INVMAP = 1, &
155 op_map = 2, &
156 op_nomap = 3
157
158 integer, public, parameter :: OP_ALL = 3, op_inner = 1, op_outer = 2
159
160 interface
161 integer function op_is_available(opid, type)
162 implicit none
163 integer, intent(in) :: opid, type
164 end function op_is_available
165 end interface
166
167 integer :: dfunction_global = -1
168 integer :: zfunction_global = -1
169 integer :: function_accel
170 logical :: use_symmetries = .false.
172contains
174 ! ---------------------------------------------------------
177 subroutine nl_operator_global_init(namespace)
178 type(namespace_t), intent(in) :: namespace
179
180 integer :: default
183
184 !%Variable OperateDouble
185 !%Type integer
186 !%Section Execution::Optimization
187 !%Default optimized
188 !%Description
189 !% This variable selects the subroutine used to apply non-local
190 !% operators over the grid for real functions.
191 !%Option fortran 0
192 !% The standard Fortran function.
193 !%Option optimized 1
194 !% This version is optimized using vector primitives (if available).
195 !%End
197 !%Variable OperateComplex
198 !%Type integer
199 !%Section Execution::Optimization
200 !%Default optimized
201 !%Description
202 !% This variable selects the subroutine used to apply non-local
203 !% operators over the grid for complex functions.
204 !%Option fortran 0
205 !% The standard Fortran function.
206 !%Option optimized 1
207 !% This version is optimized using vector primitives (if available).
208 !%End
210 default = op_vec
211
212 call parse_variable(namespace, 'OperateDouble', default, dfunction_global)
213 if (.not. varinfo_valid_option('OperateDouble', dfunction_global)) call messages_input_error(namespace, 'OperateDouble')
215 call parse_variable(namespace, 'OperateComplex', default, zfunction_global)
216 if (.not. varinfo_valid_option('OperateComplex', zfunction_global)) call messages_input_error(namespace, 'OperateComplex')
218 !%Variable OperateUseSymmetries
219 !%Type logical
220 !%Section Execution::Optimization
221 !%Default no
222 !%Description
223 !% This variable selects if the operators are built using symmetries or not.
224 !%End
225 call parse_variable(namespace, 'OperateUseSymmetries', .false., use_symmetries)
226 if (accel_is_enabled() .and. use_symmetries) then
227 call messages_not_implemented("OperateUseSymmetries=yes with GPUs")
228 end if
232 !%Variable OperateAccel
233 !%Type integer
234 !%Default map
235 !%Section Execution::Optimization
236 !%Description
237 !% This variable selects the subroutine used to apply non-local
238 !% operators over the grid when an accelerator device is used.
239 !%Option invmap 1
240 !% The standard implementation ported to GPUs.
241 !%Option map 2
242 !% A different version, more suitable for GPUs.
243 !%End
244 call parse_variable(namespace, 'OperateAccel', op_map, function_accel)
245
246 call messages_obsolete_variable(namespace, 'OperateOpenCL', 'OperateAccel')
247
248 end if
249
251 end subroutine nl_operator_global_init
252
253 ! ---------------------------------------------------------
254
255 subroutine nl_operator_global_end()
257
259 end subroutine nl_operator_global_end
260
261 ! ---------------------------------------------------------
265 subroutine nl_operator_init(op, label, symm)
266 type(nl_operator_t), intent(inout) :: op
267 character(len=*), intent(in) :: label
268 integer, optional, intent(in) :: symm
269
270 push_sub(nl_operator_init)
271
272 op%label = label
273 op%symmetry = op_general
274 if (use_symmetries) then
275 op%symmetry = optional_default(symm, op_general)
276 end if
277
278 pop_sub(nl_operator_init)
279 end subroutine nl_operator_init
280
281 ! ---------------------------------------------------------
291 subroutine nl_operator_build(space, mesh, op, np, const_w, regenerate)
292 class(space_t), intent(in) :: space
293 type(mesh_t), target, intent(in) :: mesh
294 type(nl_operator_t), intent(inout) :: op
295 integer, intent(in) :: np
296 logical, optional, intent(in) :: const_w
297 logical, optional, intent(in) :: regenerate
298
299 integer :: ii, jj, p1(space%dim), time, current
300 integer, allocatable :: st1(:), st2(:), st1r(:)
301 integer :: ir, maxp, iinner, iouter
302 logical :: change, force_change
303 character(len=200) :: flags
304 integer, allocatable :: inner_points(:), outer_points(:), all_points(:)
305
306 push_sub(nl_operator_build)
307
308 if (mesh%parallel_in_domains .and. .not. const_w) then
309 call messages_experimental('Domain parallelization with curvilinear coordinates')
310 end if
311
312 assert(np > 0)
313
314 ! store values in structure
315 op%np = np
316 op%mesh => mesh
317 if (.not. optional_default(regenerate, .false.)) then
318 op%const_w = optional_default(const_w, .false.)
319 end if
320
321 ! allocate weights op%w
322 if (op%const_w) then
323 safe_allocate(op%w(1:op%stencil%size, 1))
324 message(1) = 'Debug: nl_operator_build: working with constant weights.'
325 call messages_info(1, debug_only=.true.)
326 else
327 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
328 message(1) = 'Debug: nl_operator_build: working with non-constant weights.'
329 call messages_info(1, debug_only=.true.)
330 end if
331
332 ! set initially to zero
333 op%w = m_zero
334
335 ! Build lookup table
336 safe_allocate(st1(1:op%stencil%size))
337 safe_allocate(st1r(1:op%stencil%size))
338 safe_allocate(st2(1:op%stencil%size))
339
340 op%nri = 0
341 do time = 1, 2
342 st2 = 0
343 do ii = 1, np
344 p1 = 0
345 call mesh_local_index_to_coords(mesh, ii, p1)
346
347 do jj = 1, op%stencil%size
348 ! Get local index of p1 plus current stencil point.
349 st1(jj) = mesh_local_index_from_coords(mesh, p1 + op%stencil%points(:, jj))
351 assert(st1(jj) > 0)
352 end do
353
354 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
355
356 change = any(st1 /= st2)
357
358 !the next is to detect when we move from a point that does not
359 !have boundary points as neighbours to one that has
360 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
361
362 ! if the stencil changes
363 if (change .or. force_change) then
364 !store it
365 st2(:) = st1(:)
366
367 !first time, just count
368 if (time == 1) op%nri = op%nri + 1
369
370 !second time, store
371 if (time == 2) then
372 current = current + 1
373 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
374 end if
375 end if
376
377 if (time == 2) op%rimap(ii) = current
378
379 end do
380
381 !after counting, allocate
382 if (time == 1) then
383 safe_deallocate_a(op%ri)
384 safe_deallocate_a(op%rimap)
385 safe_deallocate_a(op%rimap_inv)
387 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
388 safe_allocate(op%rimap(1:op%np))
389 safe_allocate(op%rimap_inv(1:op%nri + 1))
390 op%ri = 0
391 op%rimap = 0
392 op%rimap_inv = 0
393 current = 0
394
395 ! the sizes
396 if (mesh%use_curvilinear) then
397 safe_allocate(op%nn(1:op%nri))
398 ! for the moment all the sizes are the same
399 op%nn = op%stencil%size
400 end if
401 end if
402
403 end do
404
405 !the inverse mapping
406 op%rimap_inv(1) = 0
407 do jj = 1, op%np
408 op%rimap_inv(op%rimap(jj) + 1) = jj
409 end do
410 op%rimap_inv(op%nri + 1) = op%np
411
412 safe_deallocate_a(st1)
413 safe_deallocate_a(st1r)
414 safe_deallocate_a(st2)
415
416 if (op%mesh%parallel_in_domains) then
417 !now build the arrays required to apply the nl_operator by parts
418
419 !count points
420 op%inner%nri = 0
421 op%outer%nri = 0
422 do ir = 1, op%nri
423 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
424 if (maxp <= np) then
425 !inner point
426 op%inner%nri = op%inner%nri + 1
427 assert(op%inner%nri <= op%nri)
428 else
429 !outer point
430 op%outer%nri = op%outer%nri + 1
431 assert(op%outer%nri <= op%nri)
432 end if
433 end do
434
435 assert(op%inner%nri + op%outer%nri == op%nri)
436
437 if (optional_default(regenerate, .false.)) then
438 safe_deallocate_a(op%inner%imin)
439 safe_deallocate_a(op%inner%imax)
440 safe_deallocate_a(op%inner%ri)
441 safe_deallocate_a(op%outer%imin)
442 safe_deallocate_a(op%outer%imax)
443 safe_deallocate_a(op%outer%ri)
444 end if
445 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
446 safe_allocate(op%inner%imax(1:op%inner%nri))
447 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
448
449 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
450 safe_allocate(op%outer%imax(1:op%outer%nri))
451 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
452
453 !now populate the arrays
454 iinner = 0
455 iouter = 0
456 do ir = 1, op%nri
457 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
458 if (maxp <= np) then
459 !inner point
460 iinner = iinner + 1
461 op%inner%imin(iinner) = op%rimap_inv(ir)
462 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
463 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
464 else
465 !outer point
466 iouter = iouter + 1
467 op%outer%imin(iouter) = op%rimap_inv(ir)
468 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
469 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
470 end if
471 end do
472
473 !verify that all points in the inner operator are actually inner
474 do ir = 1, op%inner%nri
475 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
476 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
477 end do
478 end do
479
480 end if
481
482 if (accel_is_enabled() .and. op%const_w) then
483
484 write(flags, '(i5)') op%stencil%size
485 flags='-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
486
487 if (op%mesh%parallel_in_domains) flags = '-DINDIRECT '//trim(flags)
488
489 select case (function_accel)
490 case (op_invmap)
491 call accel_kernel_build(op%kernel, 'operate.cu', 'operate', flags)
492 case (op_map)
493 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map', flags)
494 end select
495
496 ! conversion to i8 needed to avoid integer overflow
497 call accel_create_buffer(op%buff_ri, accel_mem_read_only, type_integer, int(op%nri, int64)*op%stencil%size)
498 call accel_write_buffer(op%buff_ri, op%stencil%size, op%nri, op%ri)
499
500 select case (function_accel)
501 case (op_invmap)
502 call accel_create_buffer(op%buff_imin, accel_mem_read_only, type_integer, op%nri)
503 call accel_write_buffer(op%buff_imin, op%nri, op%rimap_inv(1:))
504 call accel_create_buffer(op%buff_imax, accel_mem_read_only, type_integer, op%nri)
505 call accel_write_buffer(op%buff_imax, op%nri, op%rimap_inv(2:))
506
507 case (op_map)
508
510 call accel_write_buffer(op%buff_map, op%mesh%np, (op%rimap - 1)*op%stencil%size)
511
512 if (op%mesh%parallel_in_domains) then
513
514 safe_allocate(inner_points(1:op%mesh%np))
515 safe_allocate(outer_points(1:op%mesh%np))
516 safe_allocate(all_points(1:op%mesh%np))
517
518 op%ninner = 0
519 op%nouter = 0
520
521 do ii = 1, op%mesh%np
522 all_points(ii) = ii - 1
523 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
524 if (maxp <= op%mesh%np) then
525 op%ninner = op%ninner + 1
526 inner_points(op%ninner) = ii - 1
527 else
528 op%nouter = op%nouter + 1
529 outer_points(op%nouter) = ii - 1
530 end if
531 end do
532
534 call accel_write_buffer(op%buff_all, op%mesh%np, all_points)
535
537 call accel_write_buffer(op%buff_inner, op%ninner, inner_points)
538
540 call accel_write_buffer(op%buff_outer, op%nouter, outer_points)
541
542 safe_deallocate_a(inner_points)
543 safe_deallocate_a(outer_points)
544 safe_deallocate_a(all_points)
545
546 end if
547 end select
548 end if
549
550 pop_sub(nl_operator_build)
551
552 end subroutine nl_operator_build
553
554 ! ---------------------------------------------------------
555 subroutine nl_operator_output_weights(this)
556 type(nl_operator_t), intent(inout) :: this
557
558 integer :: istencil, idir
559
561
562 write(message(1), '(3a)') 'Debug info: Finite difference weights for ', trim(this%label), '.'
563 write(message(2), '(a)') ' Spacing:'
564 do idir = 1, this%mesh%box%dim
565 write(message(2), '(a,f16.8)') trim(message(2)), this%mesh%spacing(idir)
566 end do
567 call messages_info(2, debug_only=.true.)
568
569 do istencil = 1, this%stencil%size
570 select case(this%mesh%box%dim)
571 case(1)
572 write(message(1), '(a,i3,1i4,f25.10)') ' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
573 case(2)
574 write(message(1), '(a,i3,2i4,f25.10)') ' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
575 case(3)
576 write(message(1), '(a,i3,3i4,f25.10)') ' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
577 end select
578 call messages_info(1, debug_only=.true.)
579 end do
580
582
583 end subroutine nl_operator_output_weights
584
585 ! ---------------------------------------------------------
586 subroutine nl_operator_end(op)
587 type(nl_operator_t), intent(inout) :: op
588
589 push_sub(nl_operator_end)
590
591 if (accel_is_enabled() .and. op%const_w) then
593 end if
594
595 safe_deallocate_a(op%inner%imin)
596 safe_deallocate_a(op%inner%imax)
597 safe_deallocate_a(op%inner%ri)
598 safe_deallocate_a(op%outer%imin)
599 safe_deallocate_a(op%outer%imax)
600 safe_deallocate_a(op%outer%ri)
601
602 safe_deallocate_a(op%w)
603
604 safe_deallocate_a(op%ri)
605 safe_deallocate_a(op%rimap)
606 safe_deallocate_a(op%rimap_inv)
607 safe_deallocate_a(op%nn)
608
609 safe_deallocate_a(op%wpair)
610 safe_deallocate_a(op%ri_pos)
611 safe_deallocate_a(op%ri_neg)
612
613 safe_deallocate_a(op%inner%ri_pos)
614 safe_deallocate_a(op%inner%ri_neg)
615 safe_deallocate_a(op%outer%ri_pos)
616 safe_deallocate_a(op%outer%ri_neg)
617 call stencil_end(op%stencil)
618
619 pop_sub(nl_operator_end)
620 end subroutine nl_operator_end
621
622
623 subroutine nl_operator_clear_gpu_buffers(op)
624 type(nl_operator_t), intent(inout) :: op
625
627
628 call accel_free_buffer(op%buff_ri)
629 select case (function_accel)
630 case (op_invmap)
631 call accel_free_buffer(op%buff_imin)
632 call accel_free_buffer(op%buff_imax)
633
634 case (op_map)
635 call accel_free_buffer(op%buff_map)
636 if (op%mesh%parallel_in_domains) then
637 call accel_free_buffer(op%buff_all)
638 call accel_free_buffer(op%buff_inner)
639 call accel_free_buffer(op%buff_outer)
640 end if
641
642 case (op_nomap)
643 call accel_free_buffer(op%buff_map)
644 call accel_free_buffer(op%buff_stencil)
645 call accel_free_buffer(op%buff_xyz_to_ip)
646 call accel_free_buffer(op%buff_ip_to_xyz)
647 end select
648
649 call accel_free_buffer(op%buff_weights)
650 call accel_free_buffer(op%buff_half_weights)
651
653 end subroutine nl_operator_clear_gpu_buffers
654
655 ! ---------------------------------------------------------
656 integer pure function nl_operator_get_index(op, is, ip) result(res)
657 type(nl_operator_t), intent(in) :: op
658 integer, intent(in) :: is
659 integer, intent(in) :: ip
660
661 res = ip + op%ri(is, op%rimap(ip))
662 end function nl_operator_get_index
663
664 ! ---------------------------------------------------------
665
667 type(nl_operator_t), intent(inout) :: op
668
670
671 ! Update the GPU weights
672 if (accel_is_enabled() .and. op%const_w) then
673 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
674 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
675 end if
676
679
680
681 ! ---------------------------------------------------------
682
683 subroutine nl_operator_update_gpu_buffers(op)
684 type(nl_operator_t), intent(inout) :: op
685
687
688 ! Update the GPU weights
689 if (accel_is_enabled() .and. op%const_w) then
690 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
691 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
692 end if
693
695 end subroutine nl_operator_update_gpu_buffers
696
697 ! ---------------------------------------------------------
698
699 integer pure function nl_operator_np_zero_bc(op) result(np_bc)
700 type(nl_operator_t), intent(in) :: op
701
702 integer :: jj, ii
703
704 np_bc = 0
705 do jj = 1, op%nri
706 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
707 np_bc = max(np_bc, ii)
708 end do
709
710 end function nl_operator_np_zero_bc
711
712
713 ! ---------------------------------------------------------
715 subroutine nl_operator_remove_zero_weight_points(op, space, mesh)
716 type(nl_operator_t), intent(inout) :: op
717 type(space_t), intent(in) :: space
718 class(mesh_t), intent(in) :: mesh
719
720 integer :: ip, size
721 real(real64), parameter :: tol = 1.0e-14_real64
722 real(real64) :: max_weight, new_w(op%stencil%size)
723 integer :: new_points(space%dim, op%stencil%size)
724
725 if (.not. op%const_w) return
726
728
729 max_weight = maxval(abs(op%w(:, 1)))
730 size = 0
731 do ip = 1, op%stencil%size
732 if (abs(op%w(ip, 1)) > tol * max_weight) then
733 size = size +1
734 new_w(size) = op%w(ip, 1)
735 new_points(:,size) = op%stencil%points(:, ip)
736 end if
737 end do
738
739 ! We regenerate the stencil without the zero-weight points
740 op%stencil%size = size
741 safe_deallocate_a(op%stencil%points)
742 safe_allocate(op%stencil%points(space%dim, op%stencil%size))
743 op%stencil%points(:, :) = new_points(:, 1:size)
744 safe_deallocate_a(op%w)
746 call nl_operator_build(space, mesh, op, mesh%np, const_w=op%const_w, regenerate=.true.)
747 op%w(1:size, 1) = new_w(1:size)
748
753 subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
754 integer, intent(in) :: size
755 integer, intent(in) :: ldf
756 integer, intent(in) :: offsets(:, :)
757 real(real64), intent(in) :: wre(:)
758 integer, intent(in) :: ri(:, :)
759 integer, intent(in) :: nri
760 integer, intent(out) :: npairs
761 real(real64), intent(inout) :: wpair(:)
762 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
763 real(real64), intent(out) :: wcenter
764
765 logical, allocatable :: used(:)
766 integer :: i, j, ndim, s
767 logical :: same
768 integer, allocatable :: idx(:)
769
770 real(real64), parameter :: tol = 1.0e-12_real64
771
772 push_sub(group_by_pairs_sym)
773
774 assert(mod(size,2) == 1)
775
776 safe_allocate(used(1:size))
777 used = .false.
778 npairs = 0
779
780 ndim = ubound(offsets, dim=1)
781
782 safe_allocate(idx(1:size))
783 call robust_sort_by_abs(wre, offsets, idx)
784
785 do i = 1, size
786 if (used(i)) cycle
787
788 if (all(offsets(:, idx(i))==0)) then
789 wcenter = wre(idx(i))
790 used(i) = .true.
791 cycle
792 end if
793
794 ! Try to find symmetric partner j
795 do j = i+1, size
796 if (used(j)) cycle
797
798 ! Weight equality
799 same = abs(wre(idx(i)) - wre(idx(j))) <= tol
800 if (.not. same) cycle
801
802 ! Offsets equal and opposite
803 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
804
805 npairs = npairs + 1
806 do s = 1, nri
807 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
808 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
809 end do
810 wpair(npairs) = m_half*(wre(idx(i)) + wre(idx(j)))
811
812 used(i) = .true.
813 used(j) = .true.
814 exit
815 end do
816 end do
817
818 assert(npairs == size/2)
819
820 safe_deallocate_a(idx)
821
822 pop_sub(group_by_pairs_sym)
823 end subroutine group_by_pairs_sym
824
826 subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
827 integer, intent(in) :: size
828 integer, intent(in) :: ldf
829 integer, intent(in) :: offsets(:, :)
830 real(real64), intent(in) :: wre(:)
831 integer, intent(in) :: ri(:, :)
832 integer, intent(in) :: nri
833 integer, intent(out) :: npairs
834 real(real64), intent(inout) :: wpair(:)
835 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
836
837 logical, allocatable :: used(:)
838 integer :: i, j, ndim, s
839 logical :: same
840 integer, allocatable :: idx(:)
841
842 real(real64), parameter :: tol = 1.0e-12_real64
843
844 push_sub(group_by_pairs_antisym)
845
846 assert(mod(size,2) == 0)
847
848 safe_allocate(used(1:size))
849 used = .false.
850 npairs = 0
851
852 ndim = ubound(offsets, dim=1)
853
854 safe_allocate(idx(1:size))
855 call robust_sort_by_abs(wre, offsets, idx)
856
857 ! Max pairs = n/2
858 do i = 1, size
859 if (used(i)) cycle
860
861 ! Try to find symmetric partner j
862 do j = i+1, size
863 if (used(j)) cycle
864
865 ! Weight equality
866 same = abs(wre(idx(i)) + wre(idx(j))) <= tol
867 if (.not. same) cycle
868
869 ! Offsets equal and opposite
870 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
871
872 npairs = npairs + 1
873 do s = 1, nri
874 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
875 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
876 end do
877 wpair(npairs) = m_half*(wre(idx(i)) - wre(idx(j)))
878
879 used(i) = .true.
880 used(j) = .true.
881 exit
882 end do
883 end do
884
885 assert(npairs == size/2)
886
887 safe_deallocate_a(idx)
889 end subroutine group_by_pairs_antisym
890
892 subroutine nl_operator_build_symmetric_weights(op, max_size)
893 type(nl_operator_t), intent(inout) :: op
894 integer, optional, intent(in) :: max_size
895
896 integer :: ldf, start, end, ipair
897
898 if (op%symmetry == op_general) return
899
901
902 assert(op%const_w)
903 assert(conf%target_states_block_size > 0)
904
905 if(present(max_size)) then
906 start = op%max_allocated_ri_pair + 1
907 end = max_size
908 call reallocate_array(op%ri_pos, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
909 call reallocate_array(op%ri_neg, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
910 if (op%mesh%parallel_in_domains) then
911 call reallocate_array(op%inner%ri_pos, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
912 call reallocate_array(op%inner%ri_neg, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
913 call reallocate_array(op%outer%ri_pos, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
914 call reallocate_array(op%outer%ri_neg, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
915 end if
916 else
917 ! Max pairs = n/2
918 safe_allocate(op%wpair(1:op%stencil%size/2))
919 start = 1
920 end = log2(conf%target_states_block_size)+1
922 safe_allocate(op%ri_pos(1:op%stencil%size/2, 1:op%nri, 1:end))
923 safe_allocate(op%ri_neg(1:op%stencil%size/2, 1:op%nri, 1:end))
924 if (op%mesh%parallel_in_domains) then
925 safe_allocate(op%inner%ri_pos(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
926 safe_allocate(op%inner%ri_neg(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
927 safe_allocate(op%outer%ri_pos(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
928 safe_allocate(op%outer%ri_neg(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
929 end if
930 end if
931 op%max_allocated_ri_pair = end
932
933 do ldf = start-1, end-1
934 select case(op%symmetry)
935 case(op_symmetric)
936 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
937 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1), op%wcenter)
938 if (op%mesh%parallel_in_domains) then
939 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
940 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1), op%wcenter)
941 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
942 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1), op%wcenter)
943 end if
944 case(op_antisymmetric)
945 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
946 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1))
947 if (op%mesh%parallel_in_domains) then
948 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
949 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1))
950 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
951 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1))
952 end if
953 end select
954 end do
955
956 if (.not. present(max_size)) then
957 write(message(1), '(3a)') 'Debug info: Sorted weights for ', trim(op%label), '.'
958 call messages_info(1, debug_only=.true.)
959
960 do ipair = 1, op%npairs
961 write(message(1), '(a,i3,f25.10,2(1x,i4))') ' ', ipair, op%wpair(ipair), op%ri_pos(ipair,1,1), op%ri_neg(ipair,1,1)
962 call messages_info(1, debug_only=.true.)
963 end do
964 end if
965
968
970 subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
971 integer, allocatable, intent(inout) :: ri(:,:,:)
972 integer, intent(in) :: stencil_size, nri
973 integer, intent(in) :: old_size, new_size
974
975 integer, allocatable :: tmp(:,:,:)
976
977 safe_allocate_source_a(tmp, ri)
978 safe_deallocate_a(ri)
979 safe_allocate(ri(1:stencil_size, 1:nri, 1:new_size))
980 ri(:,:,1:old_size) = tmp
981 safe_deallocate_a(tmp)
982 end subroutine reallocate_array
983
984#include "undef.F90"
985#include "real.F90"
986#include "nl_operator_inc.F90"
988#include "undef.F90"
989#include "complex.F90"
990#include "nl_operator_inc.F90"
991
992end module nl_operator_oct_m
993
994!! Local Variables:
995!! mode: f90
996!! coding: utf-8
997!! End:
subroutine, public accel_free_buffer(this, async)
Definition: accel.F90:1005
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
Definition: accel.F90:1371
pure logical function, public accel_is_enabled()
Definition: accel.F90:402
integer, parameter, public accel_mem_read_only
Definition: accel.F90:185
integer pure function, public accel_max_block_size()
Definition: accel.F90:1182
This module implements batches of mesh functions.
Definition: batch.F90:135
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
real(real64), parameter, public m_zero
Definition: global.F90:191
This module implements the index, used for the mesh points.
Definition: index.F90:124
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
Definition: mesh.F90:938
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
Definition: mesh.F90:950
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1067
subroutine, public messages_obsolete_variable(namespace, name, rep)
Definition: messages.F90:999
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:161
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:690
subroutine, public messages_experimental(name, namespace)
Definition: messages.F90:1039
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:593
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public nl_operator_init(op, label, symm)
initialize an instance of a non-local operator by setting the label
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine nl_operator_clear_gpu_buffers(op)
subroutine, public nl_operator_build_symmetric_weights(op, max_size)
Builds (or rebuild) the necessary arrays for symmetric and antisymmetric stencils.
subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_output_weights(this)
integer, parameter, public op_general
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_remove_zero_weight_points(op, space, mesh)
Removes the zero-weight points for constant weight stencils.
integer, parameter, public op_symmetric
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
Creates the nonlocal operators for the stencils used for finite differences.
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
integer pure function, public nl_operator_np_zero_bc(op)
integer, parameter, public op_antisymmetric
subroutine, public znl_operator_operate_diag(op, fo)
subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
Reallocate an ri array.
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Definition: operate_f.F90:119
Some general things and nomenclature:
Definition: par_vec.F90:173
This module is intended to contain "only mathematical" functions and procedures.
Definition: sort.F90:119
This module defines stencils used in Octopus.
Definition: stencil.F90:137
subroutine, public stencil_end(this)
Definition: stencil.F90:216
type(type_t), public type_integer
Definition: types.F90:137
Describes mesh distribution to nodes.
Definition: mesh.F90:187
index type for non-local operators
data type for non local operators
int true(void)