Octopus
nl_operator.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
24 use accel_oct_m
25 use batch_oct_m
27 use debug_oct_m
28 use global_oct_m
29 use index_oct_m
30 use iso_c_binding
31 use math_oct_m
32 use mesh_oct_m
34 use mpi_oct_m
39 use parser_oct_m
41 use space_oct_m
42 use sort_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50 public :: &
72
78 private
79 integer :: nri = 0
80 integer, allocatable :: imin(:)
81 integer, allocatable :: imax(:)
82 integer, allocatable :: ri(:, :)
83 integer, allocatable :: ri_pos(:,:,:)
84 integer, allocatable :: ri_neg(:,:,:)
85 end type nl_operator_index_t
86
87 integer, public, parameter :: &
88 OP_GENERAL = 1, &
89 op_symmetric = 2, &
91
93 type nl_operator_t
94 private
95 type(stencil_t), public :: stencil
96 type(mesh_t), pointer :: mesh => null()
97 integer, allocatable :: nn(:)
98 integer, public :: np = 0
99 ! When running in parallel mode, the next three arrays are unique on each node.
100 real(real64), allocatable, public :: w(:,:)
101
102 logical, public :: const_w = .true.
103
104 type(accel_mem_t), public :: buff_weights
105 type(accel_mem_t), public :: buff_half_weights
106
107 integer :: symmetry = op_general
108
109 character(len=40) :: label
110
112 integer, public :: nri = 0
113 integer, allocatable, public :: ri(:,:)
114 integer, allocatable, public :: rimap(:)
115 integer, allocatable, public :: rimap_inv(:)
116
118 integer :: npairs = 0
119 real(real64),allocatable :: wpair(:)
120 real(real64) :: wcenter
121 integer, allocatable :: ri_pos(:,:,:)
122 integer, allocatable :: ri_neg(:,:,:)
123 integer :: max_allocated_ri_pair = 0
124
125 integer :: ninner = 0
126 integer :: nouter = 0
127
128 type(nl_operator_index_t) :: inner
129 type(nl_operator_index_t) :: outer
130
131 type(accel_kernel_t) :: kernel
132 type(accel_mem_t) :: buff_imin
133 type(accel_mem_t) :: buff_imax
134 type(accel_mem_t) :: buff_ri
135 type(accel_mem_t) :: buff_map
136 type(accel_mem_t) :: buff_all
137 type(accel_mem_t) :: buff_inner
138 type(accel_mem_t) :: buff_outer
139 type(accel_mem_t) :: buff_stencil
140 type(accel_mem_t) :: buff_ip_to_xyz
141 type(accel_mem_t) :: buff_xyz_to_ip
142
143 ! For multigrid solvers
144 type(nl_operator_t), public, pointer :: coarser => null()
145
146 end type nl_operator_t
147
148 integer, parameter :: &
149 OP_FORTRAN = 0, &
150 op_vec = 1, &
151 op_min = op_fortran, &
152 op_max = op_vec
153
154 integer, parameter :: &
155 OP_INVMAP = 1, &
156 op_map = 2, &
157 op_nomap = 3
158
159 integer, public, parameter :: OP_ALL = 3, op_inner = 1, op_outer = 2
160
161 interface
162 integer function op_is_available(opid, type)
163 implicit none
164 integer, intent(in) :: opid, type
165 end function op_is_available
166 end interface
167
168 integer :: dfunction_global = -1
169 integer :: zfunction_global = -1
170 integer :: function_accel
171 logical :: use_symmetries = .false.
173contains
175 ! ---------------------------------------------------------
178 subroutine nl_operator_global_init(namespace)
179 type(namespace_t), intent(in) :: namespace
180
181 integer :: default
184
185 !%Variable OperateDouble
186 !%Type integer
187 !%Section Execution::Optimization
188 !%Default optimized
189 !%Description
190 !% This variable selects the subroutine used to apply non-local
191 !% operators over the grid for real functions.
192 !%Option fortran 0
193 !% The standard Fortran function.
194 !%Option optimized 1
195 !% This version is optimized using vector primitives (if available).
196 !%End
198 !%Variable OperateComplex
199 !%Type integer
200 !%Section Execution::Optimization
201 !%Default optimized
202 !%Description
203 !% This variable selects the subroutine used to apply non-local
204 !% operators over the grid for complex functions.
205 !%Option fortran 0
206 !% The standard Fortran function.
207 !%Option optimized 1
208 !% This version is optimized using vector primitives (if available).
209 !%End
211 default = op_vec
212
213 call parse_variable(namespace, 'OperateDouble', default, dfunction_global)
214 if (.not. varinfo_valid_option('OperateDouble', dfunction_global)) call messages_input_error(namespace, 'OperateDouble')
216 call parse_variable(namespace, 'OperateComplex', default, zfunction_global)
217 if (.not. varinfo_valid_option('OperateComplex', zfunction_global)) call messages_input_error(namespace, 'OperateComplex')
219 !%Variable OperateUseSymmetries
220 !%Type logical
221 !%Section Execution::Optimization
222 !%Default no
223 !%Description
224 !% This variable selects if the operators are built using symmetries or not.
225 !%End
226 call parse_variable(namespace, 'OperateUseSymmetries', .false., use_symmetries)
227 if (accel_is_enabled() .and. use_symmetries) then
228 call messages_not_implemented("OperateUseSymmetries=yes with GPUs")
229 end if
233 !%Variable OperateAccel
234 !%Type integer
235 !%Default map
236 !%Section Execution::Optimization
237 !%Description
238 !% This variable selects the subroutine used to apply non-local
239 !% operators over the grid when an accelerator device is used.
240 !%Option invmap 1
241 !% The standard implementation ported to GPUs.
242 !%Option map 2
243 !% A different version, more suitable for GPUs.
244 !%End
245 call parse_variable(namespace, 'OperateAccel', op_map, function_accel)
246
247 call messages_obsolete_variable(namespace, 'OperateOpenCL', 'OperateAccel')
248
249 end if
250
252 end subroutine nl_operator_global_init
253
254 ! ---------------------------------------------------------
255
256 subroutine nl_operator_global_end()
258
260 end subroutine nl_operator_global_end
261
262 ! ---------------------------------------------------------
266 subroutine nl_operator_init(op, label, symm)
267 type(nl_operator_t), intent(inout) :: op
268 character(len=*), intent(in) :: label
269 integer, optional, intent(in) :: symm
270
271 push_sub(nl_operator_init)
272
273 op%label = label
274 op%symmetry = op_general
275 if (use_symmetries) then
276 op%symmetry = optional_default(symm, op_general)
277 end if
278
279 pop_sub(nl_operator_init)
280 end subroutine nl_operator_init
281
282
283 ! ---------------------------------------------------------
284 subroutine nl_operator_copy(opo, opi)
285 type(nl_operator_t), intent(inout) :: opo
286 type(nl_operator_t), target, intent(in) :: opi
287
288 push_sub(nl_operator_copy)
289
290 ! We cannot currently copy the GPU kernel for the nl_operator
291 assert(.not. accel_is_enabled())
292
293 call nl_operator_end(opo)
294 call nl_operator_init(opo, opi%label)
295
296 call stencil_copy(opi%stencil, opo%stencil)
297
298 opo%np = opi%np
299 opo%mesh => opi%mesh
300 opo%symmetry = opi%symmetry
301
302 safe_allocate_source_a(opo%nn, opi%nn)
303 safe_allocate_source_a(opo%w, opi%w)
304
305 opo%const_w = opi%const_w
306
307 opo%nri = opi%nri
308 assert(allocated(opi%ri))
309
310 safe_allocate_source_a(opo%ri, opi%ri)
311 safe_allocate_source_a(opo%rimap, opi%rimap)
312 safe_allocate_source_a(opo%rimap_inv, opi%rimap_inv)
313
314 opo%npairs = opi%npairs
315 if (opi%symmetry /= op_general) then
316 opo%wcenter = opi%wcenter
317 safe_allocate_source_a(opo%wpair, opi%wpair)
318 safe_allocate_source_a(opo%ri_pos, opi%ri_pos)
319 safe_allocate_source_a(opo%ri_neg, opi%ri_neg)
320 end if
321
322 if (opi%mesh%parallel_in_domains) then
323 opo%inner%nri = opi%inner%nri
324 safe_allocate_source_a(opo%inner%imin, opi%inner%imin)
325 safe_allocate_source_a(opo%inner%imax, opi%inner%imax)
326 safe_allocate_source_a(opo%inner%ri, opi%inner%ri)
327
328 opo%outer%nri = opi%outer%nri
329 safe_allocate_source_a(opo%outer%imin, opi%outer%imin)
330 safe_allocate_source_a(opo%outer%imax, opi%outer%imax)
331 safe_allocate_source_a(opo%outer%ri, opi%outer%ri)
332
333 if (opi%symmetry /= op_general) then
334 safe_allocate_source_a(opo%inner%ri_pos, opi%inner%ri_pos)
335 safe_allocate_source_a(opo%inner%ri_neg, opi%inner%ri_neg)
336 safe_allocate_source_a(opo%outer%ri_pos, opi%outer%ri_pos)
337 safe_allocate_source_a(opo%outer%ri_neg, opi%outer%ri_neg)
338 end if
339 end if
340
341 ! We create the corresponding GPU buffers
342 if (accel_is_enabled() .and. opo%const_w) then
343 call accel_create_buffer(opo%buff_weights, accel_mem_read_only, type_float, opo%stencil%size)
344 call accel_write_buffer(opo%buff_weights, opo%stencil%size, opo%w(:, 1))
345 call accel_create_buffer(opo%buff_half_weights, accel_mem_read_only, type_float, opo%stencil%size)
346 call accel_write_buffer(opo%buff_half_weights, opo%stencil%size, -m_half*opo%w(:, 1))
347 end if
348
349
350 pop_sub(nl_operator_copy)
351 end subroutine nl_operator_copy
352
353
354 ! ---------------------------------------------------------
364 subroutine nl_operator_build(space, mesh, op, np, const_w, regenerate)
365 class(space_t), intent(in) :: space
366 type(mesh_t), target, intent(in) :: mesh
367 type(nl_operator_t), intent(inout) :: op
368 integer, intent(in) :: np
369 logical, optional, intent(in) :: const_w
370 logical, optional, intent(in) :: regenerate
371
372 integer :: ii, jj, p1(space%dim), time, current
373 integer, allocatable :: st1(:), st2(:), st1r(:)
374 integer :: ir, maxp, iinner, iouter
375 logical :: change, force_change
376 character(len=200) :: flags
377 integer, allocatable :: inner_points(:), outer_points(:), all_points(:)
378
380
381 if (mesh%parallel_in_domains .and. .not. const_w) then
382 call messages_experimental('Domain parallelization with curvilinear coordinates')
383 end if
384
385 assert(np > 0)
386
387 ! store values in structure
388 op%np = np
389 op%mesh => mesh
390 if (.not. optional_default(regenerate, .false.)) then
391 op%const_w = optional_default(const_w, .false.)
392 end if
393
394 ! allocate weights op%w
395 if (op%const_w) then
396 safe_allocate(op%w(1:op%stencil%size, 1))
397 message(1) = 'Debug: nl_operator_build: working with constant weights.'
398 call messages_info(1, debug_only=.true.)
399 else
400 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
401 message(1) = 'Debug: nl_operator_build: working with non-constant weights.'
402 call messages_info(1, debug_only=.true.)
403 end if
404
405 ! set initially to zero
406 op%w = m_zero
407
408 ! Build lookup table
409 safe_allocate(st1(1:op%stencil%size))
410 safe_allocate(st1r(1:op%stencil%size))
411 safe_allocate(st2(1:op%stencil%size))
412
413 op%nri = 0
414 do time = 1, 2
415 st2 = 0
416 do ii = 1, np
417 p1 = 0
418 call mesh_local_index_to_coords(mesh, ii, p1)
419
420 do jj = 1, op%stencil%size
421 ! Get local index of p1 plus current stencil point.
422 st1(jj) = mesh_local_index_from_coords(mesh, p1 + op%stencil%points(:, jj))
423
424 assert(st1(jj) > 0)
425 end do
426
427 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
428
429 change = any(st1 /= st2)
430
431 !the next is to detect when we move from a point that does not
432 !have boundary points as neighbours to one that has
433 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
434
435 ! if the stencil changes
436 if (change .or. force_change) then
437 !store it
438 st2(:) = st1(:)
439
440 !first time, just count
441 if (time == 1) op%nri = op%nri + 1
442
443 !second time, store
444 if (time == 2) then
445 current = current + 1
446 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
447 end if
448 end if
449
450 if (time == 2) op%rimap(ii) = current
451
452 end do
453
454 !after counting, allocate
455 if (time == 1) then
456 safe_deallocate_a(op%ri)
457 safe_deallocate_a(op%rimap)
458 safe_deallocate_a(op%rimap_inv)
460 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
461 safe_allocate(op%rimap(1:op%np))
462 safe_allocate(op%rimap_inv(1:op%nri + 1))
463 op%ri = 0
464 op%rimap = 0
465 op%rimap_inv = 0
466 current = 0
467
468 ! the sizes
469 if (mesh%use_curvilinear) then
470 safe_allocate(op%nn(1:op%nri))
471 ! for the moment all the sizes are the same
472 op%nn = op%stencil%size
473 end if
474 end if
475
476 end do
477
478 !the inverse mapping
479 op%rimap_inv(1) = 0
480 do jj = 1, op%np
481 op%rimap_inv(op%rimap(jj) + 1) = jj
482 end do
483 op%rimap_inv(op%nri + 1) = op%np
484
485 safe_deallocate_a(st1)
486 safe_deallocate_a(st1r)
487 safe_deallocate_a(st2)
488
489 if (op%mesh%parallel_in_domains) then
490 !now build the arrays required to apply the nl_operator by parts
491
492 !count points
493 op%inner%nri = 0
494 op%outer%nri = 0
495 do ir = 1, op%nri
496 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
497 if (maxp <= np) then
498 !inner point
499 op%inner%nri = op%inner%nri + 1
500 assert(op%inner%nri <= op%nri)
501 else
502 !outer point
503 op%outer%nri = op%outer%nri + 1
504 assert(op%outer%nri <= op%nri)
505 end if
506 end do
507
508 assert(op%inner%nri + op%outer%nri == op%nri)
509
510 if (optional_default(regenerate, .false.)) then
511 safe_deallocate_a(op%inner%imin)
512 safe_deallocate_a(op%inner%imax)
513 safe_deallocate_a(op%inner%ri)
514 safe_deallocate_a(op%outer%imin)
515 safe_deallocate_a(op%outer%imax)
516 safe_deallocate_a(op%outer%ri)
517 end if
518 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
519 safe_allocate(op%inner%imax(1:op%inner%nri))
520 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
521
522 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
523 safe_allocate(op%outer%imax(1:op%outer%nri))
524 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
525
526 !now populate the arrays
527 iinner = 0
528 iouter = 0
529 do ir = 1, op%nri
530 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
531 if (maxp <= np) then
532 !inner point
533 iinner = iinner + 1
534 op%inner%imin(iinner) = op%rimap_inv(ir)
535 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
536 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
537 else
538 !outer point
539 iouter = iouter + 1
540 op%outer%imin(iouter) = op%rimap_inv(ir)
541 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
542 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
543 end if
544 end do
545
546 !verify that all points in the inner operator are actually inner
547 do ir = 1, op%inner%nri
548 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
549 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
550 end do
551 end do
552
553 end if
554
555 if (accel_is_enabled() .and. op%const_w) then
556
557 write(flags, '(i5)') op%stencil%size
558 flags='-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
559
560 if (op%mesh%parallel_in_domains) flags = '-DINDIRECT '//trim(flags)
561
562 select case (function_accel)
563 case (op_invmap)
564 call accel_kernel_build(op%kernel, 'operate.cu', 'operate', flags)
565 case (op_map)
566 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map', flags)
567 end select
568
569 ! conversion to i8 needed to avoid integer overflow
570 call accel_create_buffer(op%buff_ri, accel_mem_read_only, type_integer, int(op%nri, int64)*op%stencil%size)
571 call accel_write_buffer(op%buff_ri, op%stencil%size, op%nri, op%ri)
572
573 select case (function_accel)
574 case (op_invmap)
575 call accel_create_buffer(op%buff_imin, accel_mem_read_only, type_integer, op%nri)
576 call accel_write_buffer(op%buff_imin, op%nri, op%rimap_inv(1:))
577 call accel_create_buffer(op%buff_imax, accel_mem_read_only, type_integer, op%nri)
578 call accel_write_buffer(op%buff_imax, op%nri, op%rimap_inv(2:))
579
580 case (op_map)
581
583 call accel_write_buffer(op%buff_map, op%mesh%np, (op%rimap - 1)*op%stencil%size)
584
585 if (op%mesh%parallel_in_domains) then
586
587 safe_allocate(inner_points(1:op%mesh%np))
588 safe_allocate(outer_points(1:op%mesh%np))
589 safe_allocate(all_points(1:op%mesh%np))
590
591 op%ninner = 0
592 op%nouter = 0
593
594 do ii = 1, op%mesh%np
595 all_points(ii) = ii - 1
596 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
597 if (maxp <= op%mesh%np) then
598 op%ninner = op%ninner + 1
599 inner_points(op%ninner) = ii - 1
600 else
601 op%nouter = op%nouter + 1
602 outer_points(op%nouter) = ii - 1
603 end if
604 end do
605
607 call accel_write_buffer(op%buff_all, op%mesh%np, all_points)
608
610 call accel_write_buffer(op%buff_inner, op%ninner, inner_points)
611
613 call accel_write_buffer(op%buff_outer, op%nouter, outer_points)
614
615 safe_deallocate_a(inner_points)
616 safe_deallocate_a(outer_points)
617 safe_deallocate_a(all_points)
618
619 end if
620 end select
621 end if
622
623 pop_sub(nl_operator_build)
624
625 end subroutine nl_operator_build
626
627 ! ---------------------------------------------------------
628 subroutine nl_operator_output_weights(this)
629 type(nl_operator_t), intent(inout) :: this
630
631 integer :: istencil, idir
632
634
635 write(message(1), '(3a)') 'Debug info: Finite difference weights for ', trim(this%label), '.'
636 write(message(2), '(a)') ' Spacing:'
637 do idir = 1, this%mesh%box%dim
638 write(message(2), '(a,f16.8)') trim(message(2)), this%mesh%spacing(idir)
639 end do
640 call messages_info(2, debug_only=.true.)
641
642 do istencil = 1, this%stencil%size
643 select case(this%mesh%box%dim)
644 case(1)
645 write(message(1), '(a,i3,1i4,f25.10)') ' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
646 case(2)
647 write(message(1), '(a,i3,2i4,f25.10)') ' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
648 case(3)
649 write(message(1), '(a,i3,3i4,f25.10)') ' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
650 end select
651 call messages_info(1, debug_only=.true.)
652 end do
653
655
656 end subroutine nl_operator_output_weights
657
658 ! ---------------------------------------------------------
659 subroutine nl_operator_end(op)
660 type(nl_operator_t), intent(inout) :: op
661
662 push_sub(nl_operator_end)
663
664 if (accel_is_enabled() .and. op%const_w) then
666 end if
667
668 safe_deallocate_a(op%inner%imin)
669 safe_deallocate_a(op%inner%imax)
670 safe_deallocate_a(op%inner%ri)
671 safe_deallocate_a(op%outer%imin)
672 safe_deallocate_a(op%outer%imax)
673 safe_deallocate_a(op%outer%ri)
674
675 safe_deallocate_a(op%w)
676
677 safe_deallocate_a(op%ri)
678 safe_deallocate_a(op%rimap)
679 safe_deallocate_a(op%rimap_inv)
680 safe_deallocate_a(op%nn)
681
682 safe_deallocate_a(op%wpair)
683 safe_deallocate_a(op%ri_pos)
684 safe_deallocate_a(op%ri_neg)
685
686 safe_deallocate_a(op%inner%ri_pos)
687 safe_deallocate_a(op%inner%ri_neg)
688 safe_deallocate_a(op%outer%ri_pos)
689 safe_deallocate_a(op%outer%ri_neg)
690 call stencil_end(op%stencil)
691
692 pop_sub(nl_operator_end)
693 end subroutine nl_operator_end
694
695
696 subroutine nl_operator_clear_gpu_buffers(op)
697 type(nl_operator_t), intent(inout) :: op
698
700
701 call accel_release_buffer(op%buff_ri)
702 select case (function_accel)
703 case (op_invmap)
704 call accel_release_buffer(op%buff_imin)
705 call accel_release_buffer(op%buff_imax)
706
707 case (op_map)
708 call accel_release_buffer(op%buff_map)
709 if (op%mesh%parallel_in_domains) then
710 call accel_release_buffer(op%buff_all)
711 call accel_release_buffer(op%buff_inner)
712 call accel_release_buffer(op%buff_outer)
713 end if
714
715 case (op_nomap)
716 call accel_release_buffer(op%buff_map)
717 call accel_release_buffer(op%buff_stencil)
718 call accel_release_buffer(op%buff_xyz_to_ip)
719 call accel_release_buffer(op%buff_ip_to_xyz)
720 end select
721
722 call accel_release_buffer(op%buff_weights)
723 call accel_release_buffer(op%buff_half_weights)
724
726 end subroutine nl_operator_clear_gpu_buffers
727
728 ! ---------------------------------------------------------
729 integer pure function nl_operator_get_index(op, is, ip) result(res)
730 type(nl_operator_t), intent(in) :: op
731 integer, intent(in) :: is
732 integer, intent(in) :: ip
733
734 res = ip + op%ri(is, op%rimap(ip))
735 end function nl_operator_get_index
736
737 ! ---------------------------------------------------------
738
740 type(nl_operator_t), intent(inout) :: op
741
743
744 ! Update the GPU weights
745 if (accel_is_enabled() .and. op%const_w) then
746 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
747 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
748 end if
749
752
753
754 ! ---------------------------------------------------------
755
756 subroutine nl_operator_update_gpu_buffers(op)
757 type(nl_operator_t), intent(inout) :: op
758
760
761 ! Update the GPU weights
762 if (accel_is_enabled() .and. op%const_w) then
763 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
764 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
765 end if
766
768 end subroutine nl_operator_update_gpu_buffers
769
770 ! ---------------------------------------------------------
771
772 integer pure function nl_operator_np_zero_bc(op) result(np_bc)
773 type(nl_operator_t), intent(in) :: op
774
775 integer :: jj, ii
776
777 np_bc = 0
778 do jj = 1, op%nri
779 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
780 np_bc = max(np_bc, ii)
781 end do
782
783 end function nl_operator_np_zero_bc
784
785
786 ! ---------------------------------------------------------
788 subroutine nl_operator_remove_zero_weight_points(op, space, mesh)
789 type(nl_operator_t), intent(inout) :: op
790 type(space_t), intent(in) :: space
791 class(mesh_t), intent(in) :: mesh
792
793 integer :: ip, size
794 real(real64), parameter :: tol = 1.0e-14_real64
795 real(real64) :: max_weight, new_w(op%stencil%size)
796 integer :: new_points(space%dim, op%stencil%size)
797
798 if (.not. op%const_w) return
799
801
802 max_weight = maxval(abs(op%w(:, 1)))
803 size = 0
804 do ip = 1, op%stencil%size
805 if (abs(op%w(ip, 1)) > tol * max_weight) then
806 size = size +1
807 new_w(size) = op%w(ip, 1)
808 new_points(:,size) = op%stencil%points(:, ip)
809 end if
810 end do
811
812 ! We regenerate the stencil without the zero-weight points
813 op%stencil%size = size
814 safe_deallocate_a(op%stencil%points)
815 safe_allocate(op%stencil%points(space%dim, op%stencil%size))
816 op%stencil%points(:, :) = new_points(:, 1:size)
817 safe_deallocate_a(op%w)
819 call nl_operator_build(space, mesh, op, mesh%np, const_w=op%const_w, regenerate=.true.)
820 op%w(1:size, 1) = new_w(1:size)
821
826 subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
827 integer, intent(in) :: size
828 integer, intent(in) :: ldf
829 integer, intent(in) :: offsets(:, :)
830 real(real64), intent(in) :: wre(:)
831 integer, intent(in) :: ri(:, :)
832 integer, intent(in) :: nri
833 integer, intent(out) :: npairs
834 real(real64), intent(inout) :: wpair(:)
835 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
836 real(real64), intent(out) :: wcenter
837
838 logical, allocatable :: used(:)
839 integer :: i, j, ndim, s
840 logical :: same
841 integer, allocatable :: idx(:)
842
843 real(real64), parameter :: tol = 1.0e-12_real64
844
845 push_sub(group_by_pairs_sym)
846
847 assert(mod(size,2) == 1)
848
849 safe_allocate(used(1:size))
850 used = .false.
851 npairs = 0
852
853 ndim = ubound(offsets, dim=1)
854
855 safe_allocate(idx(1:size))
856 call robust_sort_by_abs(wre, offsets, idx)
857
858 do i = 1, size
859 if (used(i)) cycle
860
861 if (all(offsets(:, idx(i))==0)) then
862 wcenter = wre(idx(i))
863 used(i) = .true.
864 cycle
865 end if
866
867 ! Try to find symmetric partner j
868 do j = i+1, size
869 if (used(j)) cycle
870
871 ! Weight equality
872 same = abs(wre(idx(i)) - wre(idx(j))) <= tol
873 if (.not. same) cycle
874
875 ! Offsets equal and opposite
876 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
877
878 npairs = npairs + 1
879 do s = 1, nri
880 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
881 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
882 end do
883 wpair(npairs) = m_half*(wre(idx(i)) + wre(idx(j)))
884
885 used(i) = .true.
886 used(j) = .true.
887 exit
888 end do
889 end do
890
891 assert(npairs == size/2)
892
893 safe_deallocate_a(idx)
894
895 pop_sub(group_by_pairs_sym)
896 end subroutine group_by_pairs_sym
897
899 subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
900 integer, intent(in) :: size
901 integer, intent(in) :: ldf
902 integer, intent(in) :: offsets(:, :)
903 real(real64), intent(in) :: wre(:)
904 integer, intent(in) :: ri(:, :)
905 integer, intent(in) :: nri
906 integer, intent(out) :: npairs
907 real(real64), intent(inout) :: wpair(:)
908 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
909
910 logical, allocatable :: used(:)
911 integer :: i, j, ndim, s
912 logical :: same
913 integer, allocatable :: idx(:)
914
915 real(real64), parameter :: tol = 1.0e-12_real64
916
917 push_sub(group_by_pairs_antisym)
918
919 assert(mod(size,2) == 0)
920
921 safe_allocate(used(1:size))
922 used = .false.
923 npairs = 0
924
925 ndim = ubound(offsets, dim=1)
926
927 safe_allocate(idx(1:size))
928 call robust_sort_by_abs(wre, offsets, idx)
929
930 ! Max pairs = n/2
931 do i = 1, size
932 if (used(i)) cycle
933
934 ! Try to find symmetric partner j
935 do j = i+1, size
936 if (used(j)) cycle
937
938 ! Weight equality
939 same = abs(wre(idx(i)) + wre(idx(j))) <= tol
940 if (.not. same) cycle
941
942 ! Offsets equal and opposite
943 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
944
945 npairs = npairs + 1
946 do s = 1, nri
947 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
948 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
949 end do
950 wpair(npairs) = m_half*(wre(idx(i)) - wre(idx(j)))
951
952 used(i) = .true.
953 used(j) = .true.
954 exit
955 end do
956 end do
957
958 assert(npairs == size/2)
959
960 safe_deallocate_a(idx)
962 end subroutine group_by_pairs_antisym
963
965 subroutine nl_operator_build_symmetric_weights(op, max_size)
966 type(nl_operator_t), intent(inout) :: op
967 integer, optional, intent(in) :: max_size
968
969 integer :: ldf, start, end, ipair
970
971 if (op%symmetry == op_general) return
972
974
975 assert(op%const_w)
976 assert(conf%target_states_block_size > 0)
977
978 if(present(max_size)) then
979 start = op%max_allocated_ri_pair + 1
980 end = max_size
981 call reallocate_array(op%ri_pos, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
982 call reallocate_array(op%ri_neg, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
983 if (op%mesh%parallel_in_domains) then
984 call reallocate_array(op%inner%ri_pos, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
985 call reallocate_array(op%inner%ri_neg, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
986 call reallocate_array(op%outer%ri_pos, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
987 call reallocate_array(op%outer%ri_neg, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
988 end if
989 else
990 ! Max pairs = n/2
991 safe_allocate(op%wpair(1:op%stencil%size/2))
992 start = 1
993 end = log2(conf%target_states_block_size)+1
995 safe_allocate(op%ri_pos(1:op%stencil%size/2, 1:op%nri, 1:end))
996 safe_allocate(op%ri_neg(1:op%stencil%size/2, 1:op%nri, 1:end))
997 if (op%mesh%parallel_in_domains) then
998 safe_allocate(op%inner%ri_pos(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
999 safe_allocate(op%inner%ri_neg(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
1000 safe_allocate(op%outer%ri_pos(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
1001 safe_allocate(op%outer%ri_neg(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
1002 end if
1003 end if
1004 op%max_allocated_ri_pair = end
1005
1006 do ldf = start-1, end-1
1007 select case(op%symmetry)
1008 case(op_symmetric)
1009 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
1010 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1), op%wcenter)
1011 if (op%mesh%parallel_in_domains) then
1012 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
1013 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1), op%wcenter)
1014 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
1015 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1), op%wcenter)
1016 end if
1017 case(op_antisymmetric)
1018 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
1019 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1))
1020 if (op%mesh%parallel_in_domains) then
1021 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
1022 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1))
1023 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
1024 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1))
1025 end if
1026 end select
1027 end do
1028
1029 if (.not. present(max_size)) then
1030 write(message(1), '(3a)') 'Debug info: Sorted weights for ', trim(op%label), '.'
1031 call messages_info(1, debug_only=.true.)
1032
1033 do ipair = 1, op%npairs
1034 write(message(1), '(a,i3,f25.10,2(1x,i4))') ' ', ipair, op%wpair(ipair), op%ri_pos(ipair,1,1), op%ri_neg(ipair,1,1)
1035 call messages_info(1, debug_only=.true.)
1036 end do
1037 end if
1038
1041
1043 subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
1044 integer, allocatable, intent(inout) :: ri(:,:,:)
1045 integer, intent(in) :: stencil_size, nri
1046 integer, intent(in) :: old_size, new_size
1047
1048 integer, allocatable :: tmp(:,:,:)
1049
1050 safe_allocate_source_a(tmp, ri)
1051 safe_deallocate_a(ri)
1052 safe_allocate(ri(1:stencil_size, 1:nri, 1:new_size))
1053 ri(:,:,1:old_size) = tmp
1054 safe_deallocate_a(tmp)
1055 end subroutine reallocate_array
1056
1057#include "undef.F90"
1058#include "real.F90"
1059#include "nl_operator_inc.F90"
1061#include "undef.F90"
1062#include "complex.F90"
1063#include "nl_operator_inc.F90"
1064
1065end module nl_operator_oct_m
1066
1067!! Local Variables:
1068!! mode: f90
1069!! coding: utf-8
1070!! End:
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
Definition: accel.F90:1227
subroutine, public accel_release_buffer(this, async)
Definition: accel.F90:879
pure logical function, public accel_is_enabled()
Definition: accel.F90:392
integer, parameter, public accel_mem_read_only
Definition: accel.F90:182
integer pure function, public accel_max_workgroup_size()
Definition: accel.F90:1038
This module implements batches of mesh functions.
Definition: batch.F90:135
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
real(real64), parameter, public m_zero
Definition: global.F90:191
real(real64), parameter, public m_half
Definition: global.F90:197
This module implements the index, used for the mesh points.
Definition: index.F90:124
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
Definition: mesh.F90:938
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
Definition: mesh.F90:950
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1091
subroutine, public messages_obsolete_variable(namespace, name, rep)
Definition: messages.F90:1023
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:691
subroutine, public messages_experimental(name, namespace)
Definition: messages.F90:1063
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:594
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public nl_operator_init(op, label, symm)
initialize an instance of a non-local operator by setting the label
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine nl_operator_clear_gpu_buffers(op)
subroutine, public nl_operator_build_symmetric_weights(op, max_size)
Builds (or rebuild) the necessary arrays for symmetric and antisymmetric stencils.
subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_copy(opo, opi)
subroutine, public nl_operator_output_weights(this)
integer, parameter, public op_general
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_remove_zero_weight_points(op, space, mesh)
Removes the zero-weight points for constant weight stencils.
integer, parameter, public op_symmetric
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
Creates the nonlocal operators for the stencils used for finite differences.
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
integer pure function, public nl_operator_np_zero_bc(op)
integer, parameter, public op_antisymmetric
subroutine, public znl_operator_operate_diag(op, fo)
subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
Reallocate an ri array.
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Definition: operate_f.F90:119
Some general things and nomenclature:
Definition: par_vec.F90:173
This module is intended to contain "only mathematical" functions and procedures.
Definition: sort.F90:119
This module defines stencils used in Octopus.
Definition: stencil.F90:137
subroutine, public stencil_end(this)
Definition: stencil.F90:216
subroutine, public stencil_copy(input, output)
Definition: stencil.F90:198
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_integer
Definition: types.F90:137
Describes mesh distribution to nodes.
Definition: mesh.F90:187
index type for non-local operators
data type for non local operators
int true(void)