Octopus
nl_operator.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
24 use accel_oct_m
25 use batch_oct_m
27 use debug_oct_m
28 use global_oct_m
29 use index_oct_m
30 use iso_c_binding
31 use math_oct_m
32 use mesh_oct_m
34 use mpi_oct_m
39 use parser_oct_m
41 use space_oct_m
43 use types_oct_m
45
46 implicit none
47
48 private
49 public :: &
71
77 private
78 integer :: nri = 0
79 integer, allocatable :: imin(:)
80 integer, allocatable :: imax(:)
81 integer, allocatable :: ri(:, :)
82 end type nl_operator_index_t
83
85 type nl_operator_t
86 private
87 type(stencil_t), public :: stencil
88 type(mesh_t), pointer :: mesh => null()
89 integer, allocatable :: nn(:)
90 integer, public :: np = 0
91 ! When running in parallel mode, the next three arrays are unique on each node.
92 integer, allocatable, public :: index(:,:)
93 real(real64), allocatable, public :: w(:,:)
94
95 logical, public :: const_w = .true.
96
97 type(accel_mem_t), public :: buff_weights
98 type(accel_mem_t), public :: buff_half_weights
99
100 character(len=40) :: label
101
103 integer, public :: nri = 0
104 integer, allocatable, public :: ri(:,:)
105 integer, allocatable, public :: rimap(:)
106 integer, allocatable, public :: rimap_inv(:)
107
108 integer :: ninner = 0
109 integer :: nouter = 0
110
111 type(nl_operator_index_t) :: inner
112 type(nl_operator_index_t) :: outer
113
114 type(accel_kernel_t) :: kernel
115 type(accel_mem_t) :: buff_imin
116 type(accel_mem_t) :: buff_imax
117 type(accel_mem_t) :: buff_ri
118 type(accel_mem_t) :: buff_map
119 type(accel_mem_t) :: buff_all
120 type(accel_mem_t) :: buff_inner
121 type(accel_mem_t) :: buff_outer
122 type(accel_mem_t) :: buff_stencil
123 type(accel_mem_t) :: buff_ip_to_xyz
124 type(accel_mem_t) :: buff_xyz_to_ip
125
126 ! For multigrid solvers
127 type(nl_operator_t), public, pointer :: coarser => null()
128
129 end type nl_operator_t
130
131 integer, parameter :: &
132 OP_FORTRAN = 0, &
133 op_vec = 1, &
134 op_min = op_fortran, &
135 op_max = op_vec
136
137 integer, parameter :: &
138 OP_INVMAP = 1, &
139 op_map = 2, &
140 op_nomap = 3
141
142 integer, public, parameter :: OP_ALL = 3, op_inner = 1, op_outer = 2
143
144 logical :: compact_boundaries
145
146 interface
147 integer function op_is_available(opid, type)
148 implicit none
149 integer, intent(in) :: opid, type
150 end function op_is_available
151 end interface
152
153 integer :: dfunction_global = -1
154 integer :: zfunction_global = -1
155 integer :: function_opencl
156
157contains
158
159 ! ---------------------------------------------------------
162 subroutine nl_operator_global_init(namespace)
163 type(namespace_t), intent(in) :: namespace
164
165 integer :: default
166
168
169 !%Variable OperateDouble
170 !%Type integer
171 !%Section Execution::Optimization
172 !%Default optimized
173 !%Description
174 !% This variable selects the subroutine used to apply non-local
175 !% operators over the grid for real functions.
176 !%Option fortran 0
177 !% The standard Fortran function.
178 !%Option optimized 1
179 !% This version is optimized using vector primitives (if available).
180 !%End
182 !%Variable OperateComplex
183 !%Type integer
184 !%Section Execution::Optimization
185 !%Default optimized
186 !%Description
187 !% This variable selects the subroutine used to apply non-local
188 !% operators over the grid for complex functions.
189 !%Option fortran 0
190 !% The standard Fortran function.
191 !%Option optimized 1
192 !% This version is optimized using vector primitives (if available).
193 !%End
194
195 default = op_vec
197 call parse_variable(namespace, 'OperateDouble', default, dfunction_global)
198 if (.not. varinfo_valid_option('OperateDouble', dfunction_global)) call messages_input_error(namespace, 'OperateDouble')
200 call parse_variable(namespace, 'OperateComplex', default, zfunction_global)
201 if (.not. varinfo_valid_option('OperateComplex', zfunction_global)) call messages_input_error(namespace, 'OperateComplex')
203
206 !%Variable OperateAccel
207 !%Type integer
208 !%Default map
209 !%Section Execution::Optimization
210 !%Description
211 !% This variable selects the subroutine used to apply non-local
212 !% operators over the grid when an accelerator device is used.
213 !%Option invmap 1
214 !% The standard implementation ported to OpenCL.
215 !%Option map 2
216 !% A different version, more suitable for GPUs.
217 !%End
218 call parse_variable(namespace, 'OperateAccel', op_map, function_opencl)
219
220 call messages_obsolete_variable(namespace, 'OperateOpenCL', 'OperateAccel')
221
222 end if
223
224 !%Variable NLOperatorCompactBoundaries
225 !%Type logical
226 !%Default no
227 !%Section Execution::Optimization
228 !%Description
229 !% (Experimental) When set to yes, for finite systems Octopus will
230 !% map boundary points for finite-differences operators to a few
231 !% memory locations. This increases performance, however it is
232 !% experimental and has not been thoroughly tested.
233 !%End
234
235 call parse_variable(namespace, 'NLOperatorCompactBoundaries', .false., compact_boundaries)
236
237 if (compact_boundaries) then
238 call messages_experimental('NLOperatorCompactBoundaries')
239 end if
242 end subroutine nl_operator_global_init
243
244 ! ---------------------------------------------------------
245
250 end subroutine nl_operator_global_end
251
252 ! ---------------------------------------------------------
256 subroutine nl_operator_init(op, label)
257 type(nl_operator_t), intent(inout) :: op
258 character(len=*), intent(in) :: label
259
260 push_sub(nl_operator_init)
261
262 op%label = label
263
264 pop_sub(nl_operator_init)
265 end subroutine nl_operator_init
266
267
268 ! ---------------------------------------------------------
269 subroutine nl_operator_copy(opo, opi)
270 type(nl_operator_t), intent(inout) :: opo
271 type(nl_operator_t), target, intent(in) :: opi
272
273 push_sub(nl_operator_copy)
274
275 ! We cannot currently copy the GPU kernel for the nl_operator
276 assert(.not. accel_is_enabled())
277
278 call nl_operator_end(opo)
279 call nl_operator_init(opo, opi%label)
280
281 call stencil_copy(opi%stencil, opo%stencil)
282
283 opo%np = opi%np
284 opo%mesh => opi%mesh
285
286 safe_allocate_source_a(opo%nn, opi%nn)
287 safe_allocate_source_a(opo%index, opi%index)
288 safe_allocate_source_a(opo%w, opi%w)
289
290 opo%const_w = opi%const_w
291
292 opo%nri = opi%nri
293 assert(allocated(opi%ri))
294
295 safe_allocate_source_a(opo%ri, opi%ri)
296 safe_allocate_source_a(opo%rimap, opi%rimap)
297 safe_allocate_source_a(opo%rimap_inv, opi%rimap_inv)
298
299 if (opi%mesh%parallel_in_domains) then
300 opo%inner%nri = opi%inner%nri
301 safe_allocate_source_a(opo%inner%imin, opi%inner%imin)
302 safe_allocate_source_a(opo%inner%imax, opi%inner%imax)
303 safe_allocate_source_a(opo%inner%ri, opi%inner%ri)
304
305 opo%outer%nri = opi%outer%nri
306 safe_allocate_source_a(opo%outer%imin, opi%outer%imin)
307 safe_allocate_source_a(opo%outer%imax, opi%outer%imax)
308 safe_allocate_source_a(opo%outer%ri, opi%outer%ri)
309 end if
310
311 ! We create the corresponding GPU buffers
312 if (accel_is_enabled() .and. opo%const_w) then
313 call accel_create_buffer(opo%buff_weights, accel_mem_read_only, type_float, opo%stencil%size)
314 call accel_write_buffer(opo%buff_weights, opo%stencil%size, opo%w(:, 1))
315 call accel_create_buffer(opo%buff_half_weights, accel_mem_read_only, type_float, opo%stencil%size)
316 call accel_write_buffer(opo%buff_half_weights, opo%stencil%size, -m_half*opo%w(:, 1))
317 end if
318
319
320 pop_sub(nl_operator_copy)
321 end subroutine nl_operator_copy
322
323
324 ! ---------------------------------------------------------
325 subroutine nl_operator_build(space, mesh, op, np, const_w, regenerate)
326 class(space_t), intent(in) :: space
327 type(mesh_t), target, intent(in) :: mesh
328 type(nl_operator_t), intent(inout) :: op
329 integer, intent(in) :: np
330 logical, optional, intent(in) :: const_w
331 logical, optional, intent(in) :: regenerate
332
333 integer :: ii, jj, p1(space%dim), time, current
334 integer, allocatable :: st1(:), st2(:), st1r(:)
335 integer :: nn
336 integer :: ir, maxp, iinner, iouter
337 logical :: change, force_change
338 character(len=200) :: flags
339 integer, allocatable :: inner_points(:), outer_points(:), all_points(:)
340
341 push_sub(nl_operator_build)
342
343 if (mesh%parallel_in_domains .and. .not. const_w) then
344 call messages_experimental('Domain parallelization with curvilinear coordinates')
345 end if
346
347 assert(np > 0)
348
349 ! store values in structure
350 op%np = np
351 op%mesh => mesh
352 op%const_w = .false.
353 if (present(const_w)) op%const_w = const_w
354
355 ! allocate weights op%w
356 if (op%const_w) then
357 safe_allocate(op%w(1:op%stencil%size, 1))
358 if (debug%info) then
359 message(1) = 'Info: nl_operator_build: working with constant weights.'
360 call messages_info(1)
361 end if
362 else
363 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
364 if (debug%info) then
365 message(1) = 'Info: nl_operator_build: working with non-constant weights.'
366 call messages_info(1)
367 end if
368 end if
369
370 ! set initially to zero
371 op%w = m_zero
372
373 ! Build lookup table
374 safe_allocate(st1(1:op%stencil%size))
375 safe_allocate(st1r(1:op%stencil%size))
376 safe_allocate(st2(1:op%stencil%size))
377
378 op%nri = 0
379 do time = 1, 2
380 st2 = 0
381 do ii = 1, np
382 p1 = 0
383 call mesh_local_index_to_coords(mesh, ii, p1)
384
385 do jj = 1, op%stencil%size
386 ! Get local index of p1 plus current stencil point.
387 st1(jj) = mesh_local_index_from_coords(mesh, p1 + op%stencil%points(:, jj))
388
389 ! if boundary conditions are zero, we can remap boundary
390 ! points to reduce memory accesses. We cannot do this for the
391 ! first point, since it is used to build the weights, so it
392 ! has to have the positions right
393 if (ii > 1 .and. compact_boundaries .and. mesh_compact_boundaries(mesh) .and. .not. space%is_periodic()) then
394 st1(jj) = min(st1(jj), mesh%np + 1)
395 end if
396 assert(st1(jj) > 0)
397 end do
398
399 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
400
401 change = any(st1 /= st2)
402
403 !the next is to detect when we move from a point that does not
404 !have boundary points as neighbours to one that has
405 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
406
407 if (change .and. compact_boundaries .and. mesh_compact_boundaries(mesh) .and. .not. space%is_periodic()) then
408 !try to repair it by changing the boundary points
409 do jj = 1, op%stencil%size
410 if (st1(jj) + ii > mesh%np .and. st2(jj) + ii - 1 > mesh%np .and. st2(jj) + ii <= mesh%np_part) then
411 st1r(jj) = st2(jj)
412 else
413 st1r(jj) = st1(jj)
414 end if
415 end do
416
417 change = any(st1r /= st2)
419 if (.not. change) st1 = st1r
420 end if
421
422 ! if the stencil changes
423 if (change .or. force_change) then
424 !store it
425 st2 = st1
426
427 !first time, just count
428 if (time == 1) op%nri = op%nri + 1
429
430 !second time, store
431 if (time == 2) then
432 current = current + 1
433 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
434 end if
435 end if
436
437 if (time == 2) op%rimap(ii) = current
438
439 end do
440
441 !after counting, allocate
442 if (time == 1) then
443 safe_deallocate_a(op%ri)
444 safe_deallocate_a(op%rimap)
445 safe_deallocate_a(op%rimap_inv)
446
447 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
448 safe_allocate(op%rimap(1:op%np))
449 safe_allocate(op%rimap_inv(1:op%nri + 1))
450 op%ri = 0
451 op%rimap = 0
452 op%rimap_inv = 0
453 current = 0
454
455 ! the sizes
456 if (mesh%use_curvilinear) then
457 safe_allocate(op%nn(1:op%nri))
458 ! for the moment all the sizes are the same
459 op%nn = op%stencil%size
460 end if
461 end if
462
463 end do
464
465 !the inverse mapping
466 op%rimap_inv(1) = 0
467 do jj = 1, op%np
468 op%rimap_inv(op%rimap(jj) + 1) = jj
469 end do
470 op%rimap_inv(op%nri + 1) = op%np
471
472 safe_deallocate_a(st1)
473 safe_deallocate_a(st1r)
474 safe_deallocate_a(st2)
475
476 do jj = 1, op%nri
477 nn = op%rimap_inv(jj + 1) - op%rimap_inv(jj)
478 end do
479
480 if (op%mesh%parallel_in_domains) then
481 !now build the arrays required to apply the nl_operator by parts
482
483 !count points
484 op%inner%nri = 0
485 op%outer%nri = 0
486 do ir = 1, op%nri
487 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
488 if (maxp <= np) then
489 !inner point
490 op%inner%nri = op%inner%nri + 1
491 assert(op%inner%nri <= op%nri)
492 else
493 !outer point
494 op%outer%nri = op%outer%nri + 1
495 assert(op%outer%nri <= op%nri)
496 end if
497 end do
498
499 assert(op%inner%nri + op%outer%nri == op%nri)
500
501 if (optional_default(regenerate, .false.)) then
502 safe_deallocate_a(op%inner%imin)
503 safe_deallocate_a(op%inner%imax)
504 safe_deallocate_a(op%inner%ri)
505 safe_deallocate_a(op%outer%imin)
506 safe_deallocate_a(op%outer%imax)
507 safe_deallocate_a(op%outer%ri)
508 end if
509 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
510 safe_allocate(op%inner%imax(1:op%inner%nri))
511 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
512
513 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
514 safe_allocate(op%outer%imax(1:op%outer%nri))
515 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
516
517 !now populate the arrays
518 iinner = 0
519 iouter = 0
520 do ir = 1, op%nri
521 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
522 if (maxp <= np) then
523 !inner point
524 iinner = iinner + 1
525 op%inner%imin(iinner) = op%rimap_inv(ir)
526 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
527 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
528 else
529 !outer point
530 iouter = iouter + 1
531 op%outer%imin(iouter) = op%rimap_inv(ir)
532 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
533 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
534 end if
535 end do
536
537 !verify that all points in the inner operator are actually inner
538 do ir = 1, op%inner%nri
539 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
540 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
541 end do
542 end do
543
544 end if
545
546 if (accel_is_enabled() .and. op%const_w) then
547
548 write(flags, '(i5)') op%stencil%size
549 flags='-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
550
551 if (op%mesh%parallel_in_domains) flags = '-DINDIRECT '//trim(flags)
552
553 select case (function_opencl)
554 case (op_invmap)
555 call accel_kernel_build(op%kernel, 'operate.cl', 'operate', flags)
556 case (op_map)
557 call accel_kernel_build(op%kernel, 'operate.cl', 'operate_map', flags)
558 end select
559
560 ! conversion to i8 needed to avoid integer overflow
561 call accel_create_buffer(op%buff_ri, accel_mem_read_only, type_integer, int(op%nri, int64)*op%stencil%size)
562 call accel_write_buffer(op%buff_ri, int(op%nri, int64)*op%stencil%size, op%ri)
563
564 select case (function_opencl)
565 case (op_invmap)
566 call accel_create_buffer(op%buff_imin, accel_mem_read_only, type_integer, op%nri)
567 call accel_write_buffer(op%buff_imin, op%nri, op%rimap_inv(1:))
568 call accel_create_buffer(op%buff_imax, accel_mem_read_only, type_integer, op%nri)
569 call accel_write_buffer(op%buff_imax, op%nri, op%rimap_inv(2:))
570
571 case (op_map)
572
574 call accel_write_buffer(op%buff_map, op%mesh%np, (op%rimap - 1)*op%stencil%size)
575
576 if (op%mesh%parallel_in_domains) then
577
578 safe_allocate(inner_points(1:op%mesh%np))
579 safe_allocate(outer_points(1:op%mesh%np))
580 safe_allocate(all_points(1:op%mesh%np))
581
582 op%ninner = 0
583 op%nouter = 0
584
585 do ii = 1, op%mesh%np
586 all_points(ii) = ii - 1
587 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
588 if (maxp <= op%mesh%np) then
589 op%ninner = op%ninner + 1
590 inner_points(op%ninner) = ii - 1
591 else
592 op%nouter = op%nouter + 1
593 outer_points(op%nouter) = ii - 1
594 end if
595 end do
596
598 call accel_write_buffer(op%buff_all, op%mesh%np, all_points)
599
601 call accel_write_buffer(op%buff_inner, op%ninner, inner_points)
602
604 call accel_write_buffer(op%buff_outer, op%nouter, outer_points)
605
606 safe_deallocate_a(inner_points)
607 safe_deallocate_a(outer_points)
608 safe_deallocate_a(all_points)
609
610 end if
611 end select
612 end if
613
614 pop_sub(nl_operator_build)
615
616 end subroutine nl_operator_build
617
618 ! ---------------------------------------------------------
619 subroutine nl_operator_output_weights(this)
620 type(nl_operator_t), intent(inout) :: this
621
622 integer :: istencil, idir
623
625
626 if (debug%info) then
627
628 write(message(1), '(3a)') 'Debug info: Finite difference weights for ', trim(this%label), '.'
629 write(message(2), '(a)') ' Spacing:'
630 do idir = 1, this%mesh%box%dim
631 write(message(2), '(a,f16.8)') trim(message(2)), this%mesh%spacing(idir)
632 end do
633 call messages_info(2)
634
635 do istencil = 1, this%stencil%size
636 select case(this%mesh%box%dim)
637 case(1)
638 write(message(1), '(a,i3,1i4,f25.10)') ' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
639 case(2)
640 write(message(1), '(a,i3,2i4,f25.10)') ' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
641 case(3)
642 write(message(1), '(a,i3,3i4,f25.10)') ' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
643 end select
644 call messages_info(1)
645 end do
646
647 end if
648
650
651 end subroutine nl_operator_output_weights
652
653 ! ---------------------------------------------------------
655 subroutine nl_operator_adjoint(op, opt, mesh, self_adjoint)
656 type(nl_operator_t), target, intent(in) :: op
657 type(nl_operator_t), target, intent(out) :: opt
658 type(mesh_t), target, intent(in) :: mesh
659 logical, intent(in) :: self_adjoint
660
661 integer :: ip, jp, kp, lp, index, ii, p1(1:mesh%box%dim)
662 real(real64), pointer, contiguous :: vol_pp(:), weights(:, :), tmp(:)
663 real(real64) :: factor
664
665 push_sub(nl_operator_adjoint)
666
667 if (self_adjoint) then
668 ! make operator self-adjoint
669 factor = m_one
670 else
671 ! make operator skew-adjoint
672 factor = -m_one
673 end if
674
675 call nl_operator_copy(opt, op)
676
677 if (mesh%parallel_in_domains) then
678 ! get ghost point values
679 safe_allocate(vol_pp(1:mesh%np_part))
680 vol_pp(1:mesh%np) = mesh%vol_pp(1:mesh%np)
681 call dpar_vec_ghost_update(mesh%pv, vol_pp)
682
683 if (.not.op%const_w) then
684 safe_allocate(weights(1:op%stencil%size, 1:mesh%np_part))
685 safe_allocate(tmp(1:mesh%np_part))
686 do ii = 1, op%stencil%size
687 tmp(1:mesh%np) = op%w(ii, 1:mesh%np)
688 call dpar_vec_ghost_update(mesh%pv, tmp)
689 weights(ii, 1:mesh%np_part) = tmp(1:mesh%np_part)
690 end do
691 safe_deallocate_p(tmp)
692 end if
693 else
694 vol_pp => mesh%vol_pp
695 weights => op%w
696 end if
697
698 if (.not.op%const_w) then
699 opt%w = m_zero
700 do ip = 1, mesh%np
701 do jp = 1, op%stencil%size
702 index = nl_operator_get_index(op, jp, ip)
703 if (index <= mesh%np) then
704 ! point is in the mesh
705 do lp = 1, op%stencil%size
706 kp = nl_operator_get_index(op, lp, index)
707 if (kp == ip) then
708 opt%w(jp, ip) = m_half*weights(jp, ip) + factor*m_half*(vol_pp(index)/vol_pp(ip))*weights(lp, index)
709 end if
710 end do
711 else if (index <= mesh%np + mesh%pv%np_ghost) then
712 ! point is in the ghost layer
713 call mesh_local_index_to_coords(mesh, index, p1)
714 do lp = 1, op%stencil%size
715 kp = mesh_local_index_from_coords(mesh, p1(1:mesh%box%dim) + &
716 op%stencil%points(1:mesh%box%dim, lp))
717 if (kp == ip) then
718 opt%w(jp, ip) = m_half*weights(jp, ip) + factor*m_half*(vol_pp(index)/vol_pp(ip))*weights(lp, index)
719 end if
720 end do
721 end if
722 end do
723 end do
724 else
725 opt%w(1:op%stencil%size, 1) = op%w(1:op%stencil%size, 1)
726 end if
727
729
730 if (mesh%parallel_in_domains) then
731 safe_deallocate_p(vol_pp)
732 safe_deallocate_p(weights)
733 end if
734
735 pop_sub(nl_operator_adjoint)
736 end subroutine nl_operator_adjoint
737
738 ! ---------------------------------------------------------
739 subroutine nl_operator_end(op)
740 type(nl_operator_t), intent(inout) :: op
741
742 push_sub(nl_operator_end)
743
744 if (accel_is_enabled() .and. op%const_w) then
745
746 call accel_release_buffer(op%buff_ri)
747 select case (function_opencl)
748 case (op_invmap)
749 call accel_release_buffer(op%buff_imin)
750 call accel_release_buffer(op%buff_imax)
751
752 case (op_map)
753 call accel_release_buffer(op%buff_map)
754 if (op%mesh%parallel_in_domains) then
755 call accel_release_buffer(op%buff_all)
756 call accel_release_buffer(op%buff_inner)
757 call accel_release_buffer(op%buff_outer)
758 end if
759
760 case (op_nomap)
761 call accel_release_buffer(op%buff_map)
762 call accel_release_buffer(op%buff_stencil)
763 call accel_release_buffer(op%buff_xyz_to_ip)
764 call accel_release_buffer(op%buff_ip_to_xyz)
765 end select
766
767 call accel_release_buffer(op%buff_weights)
768 call accel_release_buffer(op%buff_half_weights)
769 end if
770
771 safe_deallocate_a(op%inner%imin)
772 safe_deallocate_a(op%inner%imax)
773 safe_deallocate_a(op%inner%ri)
774 safe_deallocate_a(op%outer%imin)
775 safe_deallocate_a(op%outer%imax)
776 safe_deallocate_a(op%outer%ri)
777
778 safe_deallocate_a(op%index)
779 safe_deallocate_a(op%w)
780
781 safe_deallocate_a(op%ri)
782 safe_deallocate_a(op%rimap)
783 safe_deallocate_a(op%rimap_inv)
784 safe_deallocate_a(op%nn)
785
786 call stencil_end(op%stencil)
787
788 pop_sub(nl_operator_end)
789 end subroutine nl_operator_end
790
791
792 ! ---------------------------------------------------------
793 integer pure function nl_operator_get_index(op, is, ip) result(res)
794 type(nl_operator_t), intent(in) :: op
795 integer, intent(in) :: is
796 integer, intent(in) :: ip
797
798 res = ip + op%ri(is, op%rimap(ip))
799 end function nl_operator_get_index
800
801 ! ---------------------------------------------------------
802
804 type(nl_operator_t), intent(inout) :: op
805
807
808 ! Update the GPU weights
809 if (accel_is_enabled() .and. op%const_w) then
810 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
811 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
812 end if
813
816
817
818 ! ---------------------------------------------------------
819
820 subroutine nl_operator_update_gpu_buffers(op)
821 type(nl_operator_t), intent(inout) :: op
822
824
825 ! Update the GPU weights
826 if (accel_is_enabled() .and. op%const_w) then
827 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
828 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
829 end if
830
833
834 ! ---------------------------------------------------------
835
836 integer pure function nl_operator_np_zero_bc(op) result(np_bc)
837 type(nl_operator_t), intent(in) :: op
838
839 integer :: jj, ii
840
841 np_bc = 0
842 do jj = 1, op%nri
843 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
844 np_bc = max(np_bc, ii)
845 end do
846
847 end function nl_operator_np_zero_bc
848
849 ! ------------------------------------------------------
850
851 logical pure function nl_operator_compact_boundaries()
852
855
856
857#include "undef.F90"
858#include "real.F90"
859#include "nl_operator_inc.F90"
860
861#include "undef.F90"
862#include "complex.F90"
863#include "nl_operator_inc.F90"
864
865end module nl_operator_oct_m
866
867!! Local Variables:
868!! mode: f90
869!! coding: utf-8
870!! End:
subroutine, public accel_release_buffer(this)
Definition: accel.F90:1246
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
Definition: accel.F90:2067
pure logical function, public accel_is_enabled()
Definition: accel.F90:400
integer, parameter, public accel_mem_read_only
Definition: accel.F90:183
integer pure function, public accel_max_workgroup_size()
Definition: accel.F90:1472
This module implements batches of mesh functions.
Definition: batch.F90:133
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:122
subroutine, public dpar_vec_ghost_update(pv, v_local)
Updates ghost points of every node.
type(debug_t), save, public debug
Definition: debug.F90:154
real(real64), parameter, public m_zero
Definition: global.F90:187
real(real64), parameter, public m_half
Definition: global.F90:193
real(real64), parameter, public m_one
Definition: global.F90:188
This module implements the index, used for the mesh points.
Definition: index.F90:122
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:115
subroutine, public weights(N, M, cc, side)
Compute the weights for finite-difference calculations:
Definition: math.F90:522
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:118
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
Definition: mesh.F90:935
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
Definition: mesh.F90:947
logical pure function, public mesh_compact_boundaries(mesh)
Definition: mesh.F90:822
subroutine, public messages_obsolete_variable(namespace, name, rep)
Definition: messages.F90:1057
subroutine, public messages_info(no_lines, iunit, verbose_limit, stress, all_nodes, namespace)
Definition: messages.F90:624
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:723
subroutine, public messages_experimental(name, namespace)
Definition: messages.F90:1097
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:145
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
subroutine, public nl_operator_init(op, label)
initialize an instance of a non-local operator by setting the label
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_copy(opo, opi)
subroutine, public nl_operator_output_weights(this)
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
logical pure function, public nl_operator_compact_boundaries()
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
logical compact_boundaries
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
subroutine, public nl_operator_adjoint(op, opt, mesh, self_adjoint)
opt has to be initialised and built.
integer pure function, public nl_operator_np_zero_bc(op)
subroutine, public znl_operator_operate_diag(op, fo)
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Definition: operate_f.F90:117
Some general things and nomenclature:
Definition: par_vec.F90:171
This module defines stencils used in Octopus.
Definition: stencil.F90:135
subroutine, public stencil_end(this)
Definition: stencil.F90:214
subroutine, public stencil_copy(input, output)
Definition: stencil.F90:196
type(type_t), public type_float
Definition: types.F90:133
type(type_t), public type_integer
Definition: types.F90:135
Describes mesh distribution to nodes.
Definition: mesh.F90:186
index type for non-local operators
data type for non local operators
int true(void)