Octopus
nl_operator.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
24 use accel_oct_m
25 use batch_oct_m
27 use debug_oct_m
28 use global_oct_m
29 use index_oct_m
30 use iso_c_binding
31 use math_oct_m
32 use mesh_oct_m
34 use mpi_oct_m
39 use parser_oct_m
41 use space_oct_m
42 use sort_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50 public :: &
71
77 private
78 integer :: nri = 0
79 integer, allocatable :: imin(:)
80 integer, allocatable :: imax(:)
81 integer, allocatable :: ri(:, :)
82 integer, allocatable :: ri_pos(:,:,:)
83 integer, allocatable :: ri_neg(:,:,:)
84 end type nl_operator_index_t
85
86 integer, public, parameter :: &
87 OP_GENERAL = 1, &
88 op_symmetric = 2, &
90
92 type nl_operator_t
93 private
94 type(stencil_t), public :: stencil
95 type(mesh_t), pointer :: mesh => null()
96 integer, allocatable :: nn(:)
97 integer, public :: np = 0
98 ! When running in parallel mode, the next three arrays are unique on each node.
99 real(real64), allocatable, public :: w(:,:)
100
101 logical, public :: const_w = .true.
102
103 type(accel_mem_t), public :: buff_weights
104 type(accel_mem_t), public :: buff_half_weights
105
106 integer :: symmetry = op_general
107
108 character(len=40) :: label
109
111 integer, public :: nri = 0
112 integer, allocatable, public :: ri(:,:)
113 integer, allocatable, public :: rimap(:)
114 integer, allocatable, public :: rimap_inv(:)
115
117 integer :: npairs = 0
118 real(real64),allocatable :: wpair(:)
119 real(real64) :: wcenter
120 integer, allocatable :: ri_pos(:,:,:)
121 integer, allocatable :: ri_neg(:,:,:)
122 integer :: max_allocated_ri_pair = 0
123
124 integer :: ninner = 0
125 integer :: nouter = 0
126
127 type(nl_operator_index_t) :: inner
128 type(nl_operator_index_t) :: outer
129
130 type(accel_kernel_t) :: kernel
131 type(accel_mem_t) :: buff_imin
132 type(accel_mem_t) :: buff_imax
133 type(accel_mem_t) :: buff_ri
134 type(accel_mem_t) :: buff_map
135 type(accel_mem_t) :: buff_all
136 type(accel_mem_t) :: buff_inner
137 type(accel_mem_t) :: buff_outer
138 type(accel_mem_t) :: buff_stencil
139 type(accel_mem_t) :: buff_ip_to_xyz
140 type(accel_mem_t) :: buff_xyz_to_ip
141
143 type(accel_mem_t), public :: buff_wpair
144 type(accel_mem_t), public :: buff_half_wpair
145 type(accel_mem_t), public :: buff_ri_pos
146 type(accel_mem_t), public :: buff_ri_neg
147 type(accel_mem_t), public :: buff_map_sym
148 integer :: max_allocated_ri_pair_gpu = 0
149
150 ! For multigrid solvers
151 type(nl_operator_t), public, pointer :: coarser => null()
152
153 end type nl_operator_t
154
155 integer, parameter :: &
156 OP_FORTRAN = 0, &
157 op_vec = 1, &
158 op_min = op_fortran, &
159 op_max = op_vec
160
161 integer, parameter :: &
162 OP_INVMAP = 1, &
163 op_map = 2, &
164 op_nomap = 3
165
166 integer, public, parameter :: OP_ALL = 3, op_inner = 1, op_outer = 2
167
168 interface
169 integer function op_is_available(opid, type)
170 implicit none
171 integer, intent(in) :: opid, type
172 end function op_is_available
173 end interface
175 integer :: dfunction_global = -1
176 integer :: zfunction_global = -1
177 integer :: function_accel
178 logical :: use_symmetries = .false.
179
180contains
182 ! ---------------------------------------------------------
185 subroutine nl_operator_global_init(namespace)
186 type(namespace_t), intent(in) :: namespace
188 integer :: default
192 !%Variable OperateDouble
193 !%Type integer
194 !%Section Execution::Optimization
195 !%Default optimized
196 !%Description
197 !% This variable selects the subroutine used to apply non-local
198 !% operators over the grid for real functions.
199 !%Option fortran 0
200 !% The standard Fortran function.
201 !%Option optimized 1
202 !% This version is optimized using vector primitives (if available).
203 !%End
204
205 !%Variable OperateComplex
206 !%Type integer
207 !%Section Execution::Optimization
208 !%Default optimized
209 !%Description
210 !% This variable selects the subroutine used to apply non-local
211 !% operators over the grid for complex functions.
212 !%Option fortran 0
213 !% The standard Fortran function.
214 !%Option optimized 1
215 !% This version is optimized using vector primitives (if available).
216 !%End
218 default = op_vec
220 call parse_variable(namespace, 'OperateDouble', default, dfunction_global)
221 if (.not. varinfo_valid_option('OperateDouble', dfunction_global)) call messages_input_error(namespace, 'OperateDouble')
223 call parse_variable(namespace, 'OperateComplex', default, zfunction_global)
224 if (.not. varinfo_valid_option('OperateComplex', zfunction_global)) call messages_input_error(namespace, 'OperateComplex')
226 !%Variable OperateUseSymmetries
227 !%Type logical
228 !%Section Execution::Optimization
229 !%Default no
230 !%Description
231 !% This variable selects if the operators are built using symmetries or not.
232 !%End
233 call parse_variable(namespace, 'OperateUseSymmetries', .false., use_symmetries)
236
237 !%Variable OperateAccel
238 !%Type integer
239 !%Default map
240 !%Section Execution::Optimization
241 !%Description
242 !% This variable selects the subroutine used to apply non-local
243 !% operators over the grid when an accelerator device is used.
244 !%Option invmap 1
245 !% The standard implementation ported to GPUs.
246 !%Option map 2
247 !% A different version, more suitable for GPUs.
248 !%End
249 call parse_variable(namespace, 'OperateAccel', op_map, function_accel)
251 call messages_obsolete_variable(namespace, 'OperateOpenCL', 'OperateAccel')
252
253 end if
254
257
258 ! ---------------------------------------------------------
259
260 subroutine nl_operator_global_end()
262
265
266 ! ---------------------------------------------------------
270 subroutine nl_operator_init(op, label, symm)
271 type(nl_operator_t), intent(inout) :: op
272 character(len=*), intent(in) :: label
273 integer, optional, intent(in) :: symm
274
275 push_sub(nl_operator_init)
276
277 op%label = label
278 op%symmetry = op_general
279 if (use_symmetries) then
280 op%symmetry = optional_default(symm, op_general)
281 end if
282
283 pop_sub(nl_operator_init)
284 end subroutine nl_operator_init
285
286 ! ---------------------------------------------------------
296 subroutine nl_operator_build(space, mesh, op, np, const_w, regenerate)
297 class(space_t), intent(in) :: space
298 type(mesh_t), target, intent(in) :: mesh
299 type(nl_operator_t), intent(inout) :: op
300 integer, intent(in) :: np
301 logical, optional, intent(in) :: const_w
302 logical, optional, intent(in) :: regenerate
303
304 integer :: ii, jj, p1(space%dim), time, current
305 integer, allocatable :: st1(:), st2(:), st1r(:)
306 integer :: ir, maxp, iinner, iouter
307 logical :: change, force_change
308 character(len=200) :: flags
309 integer, allocatable :: inner_points(:), outer_points(:), all_points(:)
310
311 push_sub(nl_operator_build)
312
313 if (mesh%parallel_in_domains .and. .not. const_w) then
314 call messages_experimental('Domain parallelization with curvilinear coordinates')
315 end if
316
317 assert(np > 0)
318
319 ! store values in structure
320 op%np = np
321 op%mesh => mesh
322 if (.not. optional_default(regenerate, .false.)) then
323 op%const_w = optional_default(const_w, .false.)
324 end if
325
326 ! allocate weights op%w
327 if (op%const_w) then
328 safe_allocate(op%w(1:op%stencil%size, 1))
329 message(1) = 'Debug: nl_operator_build: working with constant weights.'
330 call messages_info(1, debug_only=.true.)
331 else
332 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
333 message(1) = 'Debug: nl_operator_build: working with non-constant weights.'
334 call messages_info(1, debug_only=.true.)
335 end if
336
337 ! set initially to zero
338 op%w = m_zero
339
340 ! Build lookup table
341 safe_allocate(st1(1:op%stencil%size))
342 safe_allocate(st1r(1:op%stencil%size))
343 safe_allocate(st2(1:op%stencil%size))
344
345 op%nri = 0
346 do time = 1, 2
347 st2 = 0
348 do ii = 1, np
349 p1 = 0
350 call mesh_local_index_to_coords(mesh, ii, p1)
351
352 do jj = 1, op%stencil%size
353 ! Get local index of p1 plus current stencil point.
354 st1(jj) = mesh_local_index_from_coords(mesh, p1 + op%stencil%points(:, jj))
356 assert(st1(jj) > 0)
357 end do
358
359 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
360
361 change = any(st1 /= st2)
362
363 !the next is to detect when we move from a point that does not
364 !have boundary points as neighbours to one that has
365 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
366
367 ! if the stencil changes
368 if (change .or. force_change) then
369 !store it
370 st2(:) = st1(:)
371
372 !first time, just count
373 if (time == 1) op%nri = op%nri + 1
374
375 !second time, store
376 if (time == 2) then
377 current = current + 1
378 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
379 end if
380 end if
381
382 if (time == 2) op%rimap(ii) = current
383
384 end do
385
386 !after counting, allocate
387 if (time == 1) then
388 safe_deallocate_a(op%ri)
389 safe_deallocate_a(op%rimap)
390 safe_deallocate_a(op%rimap_inv)
392 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
393 safe_allocate(op%rimap(1:op%np))
394 safe_allocate(op%rimap_inv(1:op%nri + 1))
395 op%ri = 0
396 op%rimap = 0
397 op%rimap_inv = 0
398 current = 0
399
400 ! the sizes
401 if (mesh%use_curvilinear) then
402 safe_allocate(op%nn(1:op%nri))
403 ! for the moment all the sizes are the same
404 op%nn = op%stencil%size
405 end if
406 end if
407
408 end do
409
410 !the inverse mapping
411 op%rimap_inv(1) = 0
412 do jj = 1, op%np
413 op%rimap_inv(op%rimap(jj) + 1) = jj
414 end do
415 op%rimap_inv(op%nri + 1) = op%np
416
417 safe_deallocate_a(st1)
418 safe_deallocate_a(st1r)
419 safe_deallocate_a(st2)
420
421 if (op%mesh%parallel_in_domains) then
422 !now build the arrays required to apply the nl_operator by parts
423
424 !count points
425 op%inner%nri = 0
426 op%outer%nri = 0
427 do ir = 1, op%nri
428 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
429 if (maxp <= np) then
430 !inner point
431 op%inner%nri = op%inner%nri + 1
432 assert(op%inner%nri <= op%nri)
433 else
434 !outer point
435 op%outer%nri = op%outer%nri + 1
436 assert(op%outer%nri <= op%nri)
437 end if
438 end do
439
440 assert(op%inner%nri + op%outer%nri == op%nri)
441
442 if (optional_default(regenerate, .false.)) then
443 safe_deallocate_a(op%inner%imin)
444 safe_deallocate_a(op%inner%imax)
445 safe_deallocate_a(op%inner%ri)
446 safe_deallocate_a(op%outer%imin)
447 safe_deallocate_a(op%outer%imax)
448 safe_deallocate_a(op%outer%ri)
449 end if
450 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
451 safe_allocate(op%inner%imax(1:op%inner%nri))
452 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
453
454 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
455 safe_allocate(op%outer%imax(1:op%outer%nri))
456 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
457
458 !now populate the arrays
459 iinner = 0
460 iouter = 0
461 do ir = 1, op%nri
462 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
463 if (maxp <= np) then
464 !inner point
465 iinner = iinner + 1
466 op%inner%imin(iinner) = op%rimap_inv(ir)
467 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
468 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
469 else
470 !outer point
471 iouter = iouter + 1
472 op%outer%imin(iouter) = op%rimap_inv(ir)
473 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
474 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
475 end if
476 end do
477
478 !verify that all points in the inner operator are actually inner
479 do ir = 1, op%inner%nri
480 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
481 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
482 end do
483 end do
484
485 end if
486
487 if (accel_is_enabled() .and. op%const_w) then
488
489 write(flags, '(i5)') op%stencil%size
490 flags='-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
491
492 if (op%symmetry /= op_general) then
493 block
494 character(len=16) :: npairs_str
495 write(npairs_str, '(i0)') op%stencil%size/2
496 flags = trim(flags)//' -DNPAIRS='//trim(adjustl(npairs_str))
497 end block
498 end if
499
500 if (op%mesh%parallel_in_domains) flags = '-DINDIRECT '//trim(flags)
501
502 select case (function_accel)
503 case (op_invmap)
504 if (op%symmetry /= op_general) then
505 call messages_not_implemented("OperateUseSymmetries=yes with OperateAccel=invmap")
506 end if
507 call accel_kernel_build(op%kernel, 'operate.cu', 'operate', flags)
508 case (op_map)
509 select case (op%symmetry)
510 case (op_general)
511 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map', flags)
512 case (op_symmetric)
513 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map_sym', flags)
514 case (op_antisymmetric)
515 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map_antisym', flags)
516 end select
517 end select
518
519 ! conversion to i8 needed to avoid integer overflow
520 call accel_create_buffer(op%buff_ri, accel_mem_read_only, type_integer, int(op%nri, int64)*op%stencil%size)
521 call accel_write_buffer(op%buff_ri, op%stencil%size, op%nri, op%ri)
522
523 select case (function_accel)
524 case (op_invmap)
525 call accel_create_buffer(op%buff_imin, accel_mem_read_only, type_integer, op%nri)
526 call accel_write_buffer(op%buff_imin, op%nri, op%rimap_inv(1:))
527 call accel_create_buffer(op%buff_imax, accel_mem_read_only, type_integer, op%nri)
528 call accel_write_buffer(op%buff_imax, op%nri, op%rimap_inv(2:))
529
530 case (op_map)
531
533 call accel_write_buffer(op%buff_map, op%mesh%np, (op%rimap - 1)*op%stencil%size)
534
535 if (op%symmetry /= op_general) then
536 call accel_create_buffer(op%buff_map_sym, accel_mem_read_only, type_integer, pad(op%mesh%np, accel_max_block_size()))
537 call accel_write_buffer(op%buff_map_sym, op%mesh%np, (op%rimap - 1)*(op%stencil%size/2))
538 end if
539
540 if (op%mesh%parallel_in_domains) then
541
542 safe_allocate(inner_points(1:op%mesh%np))
543 safe_allocate(outer_points(1:op%mesh%np))
544 safe_allocate(all_points(1:op%mesh%np))
545
546 op%ninner = 0
547 op%nouter = 0
548
549 do ii = 1, op%mesh%np
550 all_points(ii) = ii - 1
551 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
552 if (maxp <= op%mesh%np) then
553 op%ninner = op%ninner + 1
554 inner_points(op%ninner) = ii - 1
555 else
556 op%nouter = op%nouter + 1
557 outer_points(op%nouter) = ii - 1
558 end if
559 end do
560
562 call accel_write_buffer(op%buff_all, op%mesh%np, all_points)
563
565 call accel_write_buffer(op%buff_inner, op%ninner, inner_points)
566
568 call accel_write_buffer(op%buff_outer, op%nouter, outer_points)
569
570 safe_deallocate_a(inner_points)
571 safe_deallocate_a(outer_points)
572 safe_deallocate_a(all_points)
573
574 end if
575 end select
576 end if
577
578 pop_sub(nl_operator_build)
579
580 end subroutine nl_operator_build
581
582 ! ---------------------------------------------------------
583 subroutine nl_operator_output_weights(this)
584 type(nl_operator_t), intent(inout) :: this
585
586 integer :: istencil, idir
587
589
590 write(message(1), '(3a)') 'Debug info: Finite difference weights for ', trim(this%label), '.'
591 write(message(2), '(a)') ' Spacing:'
592 do idir = 1, this%mesh%box%dim
593 write(message(2), '(a,f16.8)') trim(message(2)), this%mesh%spacing(idir)
594 end do
595 call messages_info(2, debug_only=.true.)
596
597 do istencil = 1, this%stencil%size
598 select case(this%mesh%box%dim)
599 case(1)
600 write(message(1), '(a,i3,1i4,f25.10)') ' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
601 case(2)
602 write(message(1), '(a,i3,2i4,f25.10)') ' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
603 case(3)
604 write(message(1), '(a,i3,3i4,f25.10)') ' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
605 end select
606 call messages_info(1, debug_only=.true.)
607 end do
608
610
611 end subroutine nl_operator_output_weights
612
613 ! ---------------------------------------------------------
614 subroutine nl_operator_end(op)
615 type(nl_operator_t), intent(inout) :: op
616
617 push_sub(nl_operator_end)
618
619 if (accel_is_enabled() .and. op%const_w) then
621 end if
622
623 safe_deallocate_a(op%inner%imin)
624 safe_deallocate_a(op%inner%imax)
625 safe_deallocate_a(op%inner%ri)
626 safe_deallocate_a(op%outer%imin)
627 safe_deallocate_a(op%outer%imax)
628 safe_deallocate_a(op%outer%ri)
629
630 safe_deallocate_a(op%w)
631
632 safe_deallocate_a(op%ri)
633 safe_deallocate_a(op%rimap)
634 safe_deallocate_a(op%rimap_inv)
635 safe_deallocate_a(op%nn)
636
637 safe_deallocate_a(op%wpair)
638 safe_deallocate_a(op%ri_pos)
639 safe_deallocate_a(op%ri_neg)
640
641 safe_deallocate_a(op%inner%ri_pos)
642 safe_deallocate_a(op%inner%ri_neg)
643 safe_deallocate_a(op%outer%ri_pos)
644 safe_deallocate_a(op%outer%ri_neg)
645 call stencil_end(op%stencil)
646
647 pop_sub(nl_operator_end)
648 end subroutine nl_operator_end
649
650
651 subroutine nl_operator_clear_gpu_buffers(op)
652 type(nl_operator_t), intent(inout) :: op
653
655
656 call accel_free_buffer(op%buff_ri)
657 select case (function_accel)
658 case (op_invmap)
659 call accel_free_buffer(op%buff_imin)
660 call accel_free_buffer(op%buff_imax)
661
662 case (op_map)
663 call accel_free_buffer(op%buff_map)
664 if (op%symmetry /= op_general) then
665 call accel_free_buffer(op%buff_map_sym)
666 end if
667 if (op%mesh%parallel_in_domains) then
668 call accel_free_buffer(op%buff_all)
669 call accel_free_buffer(op%buff_inner)
670 call accel_free_buffer(op%buff_outer)
671 end if
672
673 case (op_nomap)
674 call accel_free_buffer(op%buff_map)
675 call accel_free_buffer(op%buff_stencil)
676 call accel_free_buffer(op%buff_xyz_to_ip)
677 call accel_free_buffer(op%buff_ip_to_xyz)
678 end select
679
680 call accel_free_buffer(op%buff_weights)
681 call accel_free_buffer(op%buff_half_weights)
682
683 if (op%symmetry /= op_general) then
684 call accel_free_buffer(op%buff_wpair)
685 call accel_free_buffer(op%buff_half_wpair)
686 if (op%max_allocated_ri_pair_gpu > 0) then
687 call accel_free_buffer(op%buff_ri_pos)
688 call accel_free_buffer(op%buff_ri_neg)
689 op%max_allocated_ri_pair_gpu = 0
690 end if
691 end if
692
694 end subroutine nl_operator_clear_gpu_buffers
695
696 ! ---------------------------------------------------------
697 integer pure function nl_operator_get_index(op, is, ip) result(res)
698 type(nl_operator_t), intent(in) :: op
699 integer, intent(in) :: is
700 integer, intent(in) :: ip
701
702 res = ip + op%ri(is, op%rimap(ip))
703 end function nl_operator_get_index
704
705 ! ---------------------------------------------------------
706
708 type(nl_operator_t), intent(inout) :: op
711
712 ! Update the GPU weights
713 if (accel_is_enabled() .and. op%const_w) then
714 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
715 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
716 if (op%symmetry /= op_general) then
717 call accel_create_buffer(op%buff_wpair, accel_mem_read_only, type_float, op%stencil%size/2)
718 call accel_create_buffer(op%buff_half_wpair, accel_mem_read_only, type_float, op%stencil%size/2)
719 end if
720 end if
721
724
725
726 ! ---------------------------------------------------------
727
728 subroutine nl_operator_update_gpu_buffers(op)
729 type(nl_operator_t), intent(inout) :: op
730
731 integer(int64) :: buf_size
732
734
735 ! Update the GPU weights
736 if (accel_is_enabled() .and. op%const_w) then
737 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
738 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
739
740 if (op%symmetry /= op_general) then
741 call accel_write_buffer(op%buff_wpair, op%npairs, op%wpair)
742 call accel_write_buffer(op%buff_half_wpair, op%npairs, -m_half*op%wpair)
743
744 ! (Re)allocate the pair-index buffers if max_allocated_ri_pair has grown
745 if (op%max_allocated_ri_pair > op%max_allocated_ri_pair_gpu) then
746 if (op%max_allocated_ri_pair_gpu > 0) then
747 call accel_free_buffer(op%buff_ri_pos)
748 call accel_free_buffer(op%buff_ri_neg)
749 end if
750 buf_size = int(op%npairs, int64)*op%nri*op%max_allocated_ri_pair
751 call accel_create_buffer(op%buff_ri_pos, accel_mem_read_only, type_integer, buf_size)
752 call accel_create_buffer(op%buff_ri_neg, accel_mem_read_only, type_integer, buf_size)
753 op%max_allocated_ri_pair_gpu = op%max_allocated_ri_pair
754 end if
755
756 if (op%max_allocated_ri_pair > 0) then
757 call accel_write_buffer(op%buff_ri_pos, op%npairs, op%nri, op%max_allocated_ri_pair, op%ri_pos)
758 call accel_write_buffer(op%buff_ri_neg, op%npairs, op%nri, op%max_allocated_ri_pair, op%ri_neg)
759 end if
760 end if
761 end if
762
764 end subroutine nl_operator_update_gpu_buffers
765
766 ! ---------------------------------------------------------
767
768 integer pure function nl_operator_np_zero_bc(op) result(np_bc)
769 type(nl_operator_t), intent(in) :: op
770
771 integer :: jj, ii
772
773 np_bc = 0
774 do jj = 1, op%nri
775 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
776 np_bc = max(np_bc, ii)
777 end do
778
779 end function nl_operator_np_zero_bc
780
781
782 ! ---------------------------------------------------------
784 subroutine nl_operator_remove_zero_weight_points(op, space, mesh)
785 type(nl_operator_t), intent(inout) :: op
786 type(space_t), intent(in) :: space
787 class(mesh_t), intent(in) :: mesh
788
789 integer :: ip, size
790 real(real64), parameter :: tol = 1.0e-14_real64
791 real(real64) :: max_weight, new_w(op%stencil%size)
792 integer :: new_points(space%dim, op%stencil%size)
793
794 if (.not. op%const_w) return
795
797
798 max_weight = maxval(abs(op%w(:, 1)))
799 size = 0
800 do ip = 1, op%stencil%size
801 if (abs(op%w(ip, 1)) > tol * max_weight) then
802 size = size +1
803 new_w(size) = op%w(ip, 1)
804 new_points(:,size) = op%stencil%points(:, ip)
805 end if
806 end do
807
808 ! We regenerate the stencil without the zero-weight points
809 op%stencil%size = size
810 safe_deallocate_a(op%stencil%points)
811 safe_allocate(op%stencil%points(space%dim, op%stencil%size))
812 op%stencil%points(:, :) = new_points(:, 1:size)
813 safe_deallocate_a(op%w)
815 call nl_operator_build(space, mesh, op, mesh%np, const_w=op%const_w, regenerate=.true.)
816 op%w(1:size, 1) = new_w(1:size)
817
818 !Update Stencil%center
819 call stencil_init_center(op%stencil)
820
825 subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
826 integer, intent(in) :: size
827 integer, intent(in) :: ldf
828 integer, intent(in) :: offsets(:, :)
829 real(real64), intent(in) :: wre(:)
830 integer, intent(in) :: ri(:, :)
831 integer, intent(in) :: nri
832 integer, intent(out) :: npairs
833 real(real64), intent(inout) :: wpair(:)
834 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
835 real(real64), intent(out) :: wcenter
836
837 logical, allocatable :: used(:)
838 integer :: i, j, ndim, s
839 logical :: same
840 integer, allocatable :: idx(:)
841
842 real(real64), parameter :: tol = 1.0e-12_real64
843
844 push_sub(group_by_pairs_sym)
845
846 assert(mod(size,2) == 1)
847
848 safe_allocate(used(1:size))
849 used = .false.
850 npairs = 0
851
852 ndim = ubound(offsets, dim=1)
853
854 safe_allocate(idx(1:size))
855 call robust_sort_by_abs(wre, offsets, idx)
856
857 do i = 1, size
858 if (used(i)) cycle
859
860 if (all(offsets(:, idx(i))==0)) then
861 wcenter = wre(idx(i))
862 used(i) = .true.
863 cycle
864 end if
865
866 ! Try to find symmetric partner j
867 do j = i+1, size
868 if (used(j)) cycle
869
870 ! Weight equality
871 same = abs(wre(idx(i)) - wre(idx(j))) <= tol
872 if (.not. same) cycle
873
874 ! Offsets equal and opposite
875 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
876
877 npairs = npairs + 1
878 do s = 1, nri
879 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
880 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
881 end do
882 wpair(npairs) = m_half*(wre(idx(i)) + wre(idx(j)))
883
884 used(i) = .true.
885 used(j) = .true.
886 exit
887 end do
888 end do
889
890 assert(npairs == size/2)
891
892 safe_deallocate_a(idx)
893
894 pop_sub(group_by_pairs_sym)
895 end subroutine group_by_pairs_sym
896
898 subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
899 integer, intent(in) :: size
900 integer, intent(in) :: ldf
901 integer, intent(in) :: offsets(:, :)
902 real(real64), intent(in) :: wre(:)
903 integer, intent(in) :: ri(:, :)
904 integer, intent(in) :: nri
905 integer, intent(out) :: npairs
906 real(real64), intent(inout) :: wpair(:)
907 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
908
909 logical, allocatable :: used(:)
910 integer :: i, j, ndim, s
911 logical :: same
912 integer, allocatable :: idx(:)
913
914 real(real64), parameter :: tol = 1.0e-12_real64
915
916 push_sub(group_by_pairs_antisym)
917
918 assert(mod(size,2) == 0)
919
920 safe_allocate(used(1:size))
921 used = .false.
922 npairs = 0
923
924 ndim = ubound(offsets, dim=1)
925
926 safe_allocate(idx(1:size))
927 call robust_sort_by_abs(wre, offsets, idx)
928
929 ! Max pairs = n/2
930 do i = 1, size
931 if (used(i)) cycle
932
933 ! Try to find symmetric partner j
934 do j = i+1, size
935 if (used(j)) cycle
936
937 ! Weight equality
938 same = abs(wre(idx(i)) + wre(idx(j))) <= tol
939 if (.not. same) cycle
940
941 ! Offsets equal and opposite
942 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
943
944 npairs = npairs + 1
945 do s = 1, nri
946 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
947 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
948 end do
949 wpair(npairs) = m_half*(wre(idx(i)) - wre(idx(j)))
950
951 used(i) = .true.
952 used(j) = .true.
953 exit
954 end do
955 end do
956
957 assert(npairs == size/2)
958
959 safe_deallocate_a(idx)
961 end subroutine group_by_pairs_antisym
962
964 subroutine nl_operator_build_symmetric_weights(op, max_size)
965 type(nl_operator_t), intent(inout) :: op
966 integer, optional, intent(in) :: max_size
967
968 integer :: ldf, start, end, ipair
969
970 if (op%symmetry == op_general) return
971
973
974 assert(op%const_w)
975 assert(conf%target_states_block_size > 0)
976
977 if(present(max_size)) then
978 start = op%max_allocated_ri_pair + 1
979 end = max_size
980 call reallocate_array(op%ri_pos, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
981 call reallocate_array(op%ri_neg, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
982 if (op%mesh%parallel_in_domains) then
983 call reallocate_array(op%inner%ri_pos, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
984 call reallocate_array(op%inner%ri_neg, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
985 call reallocate_array(op%outer%ri_pos, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
986 call reallocate_array(op%outer%ri_neg, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
987 end if
988 else
989 ! Max pairs = n/2
990 safe_allocate(op%wpair(1:op%stencil%size/2))
991 start = 1
992 end = log2(conf%target_states_block_size)+1
994 safe_allocate(op%ri_pos(1:op%stencil%size/2, 1:op%nri, 1:end))
995 safe_allocate(op%ri_neg(1:op%stencil%size/2, 1:op%nri, 1:end))
996 if (op%mesh%parallel_in_domains) then
997 safe_allocate(op%inner%ri_pos(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
998 safe_allocate(op%inner%ri_neg(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
999 safe_allocate(op%outer%ri_pos(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
1000 safe_allocate(op%outer%ri_neg(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
1001 end if
1002 end if
1003 op%max_allocated_ri_pair = end
1004
1005 do ldf = start-1, end-1
1006 select case(op%symmetry)
1007 case(op_symmetric)
1008 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
1009 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1), op%wcenter)
1010 if (op%mesh%parallel_in_domains) then
1011 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
1012 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1), op%wcenter)
1013 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
1014 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1), op%wcenter)
1015 end if
1016 case(op_antisymmetric)
1017 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
1018 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1))
1019 if (op%mesh%parallel_in_domains) then
1020 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
1021 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1))
1022 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
1023 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1))
1024 end if
1025 end select
1026 end do
1027
1028 if (.not. present(max_size)) then
1029 write(message(1), '(3a)') 'Debug info: Sorted weights for ', trim(op%label), '.'
1030 call messages_info(1, debug_only=.true.)
1031
1032 do ipair = 1, op%npairs
1033 write(message(1), '(a,i3,f25.10,2(1x,i4))') ' ', ipair, op%wpair(ipair), op%ri_pos(ipair,1,1), op%ri_neg(ipair,1,1)
1034 call messages_info(1, debug_only=.true.)
1035 end do
1036 end if
1037
1040
1042 subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
1043 integer, allocatable, intent(inout) :: ri(:,:,:)
1044 integer, intent(in) :: stencil_size, nri
1045 integer, intent(in) :: old_size, new_size
1046
1047 integer, allocatable :: tmp(:,:,:)
1048
1049 safe_allocate_source_a(tmp, ri)
1050 safe_deallocate_a(ri)
1051 safe_allocate(ri(1:stencil_size, 1:nri, 1:new_size))
1052 ri(:,:,1:old_size) = tmp
1053 safe_deallocate_a(tmp)
1054 end subroutine reallocate_array
1055
1056#include "undef.F90"
1057#include "real.F90"
1058#include "nl_operator_inc.F90"
1060#include "undef.F90"
1061#include "complex.F90"
1062#include "nl_operator_inc.F90"
1063
1064end module nl_operator_oct_m
1065
1066!! Local Variables:
1067!! mode: f90
1068!! coding: utf-8
1069!! End:
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
Definition: accel.F90:1371
pure logical function, public accel_is_enabled()
Definition: accel.F90:402
integer, parameter, public accel_mem_read_only
Definition: accel.F90:185
integer pure function, public accel_max_block_size()
Definition: accel.F90:1182
This module implements batches of mesh functions.
Definition: batch.F90:135
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
real(real64), parameter, public m_zero
Definition: global.F90:191
This module implements the index, used for the mesh points.
Definition: index.F90:124
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
Definition: mesh.F90:938
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
Definition: mesh.F90:950
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1067
subroutine, public messages_obsolete_variable(namespace, name, rep)
Definition: messages.F90:999
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:161
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:690
subroutine, public messages_experimental(name, namespace)
Definition: messages.F90:1039
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:593
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public nl_operator_init(op, label, symm)
initialize an instance of a non-local operator by setting the label
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine nl_operator_clear_gpu_buffers(op)
subroutine, public nl_operator_build_symmetric_weights(op, max_size)
Builds (or rebuild) the necessary arrays for symmetric and antisymmetric stencils.
integer dfunction_global
subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_output_weights(this)
integer, parameter, public op_general
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_remove_zero_weight_points(op, space, mesh)
Removes the zero-weight points for constant weight stencils.
integer zfunction_global
integer, parameter op_invmap
integer, parameter, public op_symmetric
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
Creates the nonlocal operators for the stencils used for finite differences.
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
integer pure function, public nl_operator_np_zero_bc(op)
integer, parameter, public op_antisymmetric
subroutine, public znl_operator_operate_diag(op, fo)
subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
Reallocate an ri array.
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Definition: operate_f.F90:119
Some general things and nomenclature:
Definition: par_vec.F90:173
This module is intended to contain "only mathematical" functions and procedures.
Definition: sort.F90:119
This module defines stencils used in Octopus.
Definition: stencil.F90:137
type(type_t), public type_integer
Definition: types.F90:137
Describes mesh distribution to nodes.
Definition: mesh.F90:187
index type for non-local operators
data type for non local operators
int true(void)