Octopus
batch_ops.F90
Go to the documentation of this file.
1!! Copyright (C) 2008 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
23module batch_ops_oct_m
24 use accel_oct_m
25 use batch_oct_m
26 use blas_oct_m
27 use debug_oct_m
28 use iso_c_binding
29 use global_oct_m
31 use math_oct_m
34 use types_oct_m
35
36 implicit none
37
38 private
39 public :: &
41 batch_axpy, &
42 batch_scal, &
43 batch_xpay, &
49 batch_mul, &
57
59 interface batch_axpy
60 module procedure dbatch_axpy_const
61 module procedure zbatch_axpy_const
62 module procedure dbatch_axpy_vec
63 module procedure zbatch_axpy_vec
64 end interface batch_axpy
65
67 interface batch_scal
68 module procedure dbatch_scal_const
69 module procedure zbatch_scal_const
70 module procedure dbatch_scal_vec
71 module procedure zbatch_scal_vec
72 end interface batch_scal
73
75 interface batch_xpay
76 module procedure dbatch_xpay_vec
77 module procedure zbatch_xpay_vec
78 module procedure dbatch_xpay_const
79 module procedure zbatch_xpay_const
80 end interface batch_xpay
81
82 interface batch_add_with_map
83 module procedure batch_add_with_map_cpu
84 module procedure batch_add_with_map_accel
85 end interface batch_add_with_map
86
87 interface batch_copy_with_map
88 module procedure batch_copy_with_map_cpu
89 module procedure batch_copy_with_map_accel
90 end interface batch_copy_with_map
91
106 interface batch_set_state
107 module procedure dbatch_set_state1
108 module procedure zbatch_set_state1
109 module procedure dbatch_set_state2
110 module procedure zbatch_set_state2
111 module procedure dbatch_set_state3
112 module procedure zbatch_set_state3
113 end interface batch_set_state
114
115 interface batch_get_state
116 module procedure dbatch_get_state1
117 module procedure zbatch_get_state1
118 module procedure dbatch_get_state2
119 module procedure zbatch_get_state2
120 module procedure dbatch_get_state3
121 module procedure zbatch_get_state3
122 end interface batch_get_state
123
124 interface batch_get_points
125 module procedure dbatch_get_points
126 module procedure zbatch_get_points
127 module procedure batch_get_points_accel
128 end interface batch_get_points
129
130 interface batch_set_points
131 module procedure dbatch_set_points
132 module procedure zbatch_set_points
133 module procedure batch_set_points_accel
134 end interface batch_set_points
135
136 interface batch_mul
137 module procedure dbatch_mul
138 module procedure zbatch_mul
139 end interface batch_mul
140
141
142contains
143
144 !--------------------------------------------------------------
146 subroutine batch_set_zero(this, np, async)
147 class(batch_t), intent(inout) :: this
148 integer, optional, intent(in) :: np
150 logical, optional, intent(in) :: async
151
152 integer :: ist_linear, ist, ip, np_
153
154 push_sub(batch_set_zero)
155
156 assert(not_in_openmp())
157
158 call profiling_in("BATCH_SET_ZERO")
159
160 select case (this%status())
162 np_ = optional_default(np, int(this%pack_size(2), int32))
163 assert(np_ <= int(this%pack_size(2), int32))
164 call accel_set_buffer_to_zero(this%ff_device, this%type(), (int(this%pack_size(1), int32) * np_), async=async)
165
166 case (batch_packed)
167 np_ = optional_default(np, int(this%pack_size(2), int32))
168 assert(np_ <= int(this%pack_size(2), int32))
169 if (this%type() == type_float) then
170 !$omp parallel do private(ist) schedule(static)
171 do ip = 1, np_
172 !$omp simd
173 do ist = 1, int(this%pack_size(1), int32)
174 this%dff_pack(ist, ip) = m_zero
175 end do
176 end do
177 else
178 !$omp parallel do private(ist) schedule(static)
179 do ip = 1, np_
180 !$omp simd
181 do ist = 1, int(this%pack_size(1), int32)
182 this%zff_pack(ist, ip) = m_z0
183 end do
184 end do
185 end if
186
187 case (batch_not_packed)
188 if (this%type() == type_float) then
189 np_ = optional_default(np, ubound(this%dff_linear, dim=1))
190 assert(np_ <= ubound(this%dff_linear, dim=1))
191 do ist_linear = 1, this%nst_linear
192 !$omp parallel do schedule(static)
193 do ip = 1, np_
194 this%dff_linear(ip, ist_linear) = m_zero
195 end do
196 end do
197 else
198 np_ = optional_default(np, ubound(this%zff_linear, dim=1))
199 assert(np_ <= ubound(this%zff_linear, dim=1))
200 do ist_linear = 1, this%nst_linear
201 !$omp parallel do schedule(static)
202 do ip = 1, np_
203 this%zff_linear(ip, ist_linear) = m_z0
204 end do
205 end do
206 end if
207
208 case default
209 message(1) = "batch_set_zero: unknown batch status."
210 call messages_fatal(1)
211
212 end select
213
214 call profiling_out("BATCH_SET_ZERO")
215
216 pop_sub(batch_set_zero)
217 end subroutine batch_set_zero
218
219 ! --------------------------------------------------------------
221 !
222 subroutine batch_get_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
223 class(batch_t), intent(in) :: this
224 integer, intent(in) :: sp
225 integer, intent(in) :: ep
226 type(accel_mem_t), intent(inout) :: psi
227 integer, intent(in) :: ldpsi1
228 integer, intent(in) :: ldpsi2
230 integer :: tsize, ii, it
231 type(accel_kernel_t), save :: kernel
232 integer, allocatable :: linear_to_ist(:), linear_to_idim(:)
233 type(accel_mem_t) :: buff_linear_to_ist, buff_linear_to_idim
234
235 push_sub(batch_get_points_accel)
236 call profiling_in("GET_POINTS")
237
238 select case (this%status())
240 call messages_not_implemented('batch_get_points_accel for non-CL batches')
241
243
244 tsize = types_get_size(this%type())/types_get_size(type_float)
245 safe_allocate(linear_to_ist(1:this%nst_linear*tsize))
246 safe_allocate(linear_to_idim(1:this%nst_linear*tsize))
247 do ii = 1, this%nst_linear
248 do it = 1, tsize
249 linear_to_ist(tsize*(ii-1)+it) = tsize*(this%linear_to_ist(ii) - 1) + it - 1
250 linear_to_idim(tsize*(ii-1)+it) = this%linear_to_idim(ii) - 1
251 end do
252 end do
253
254 call accel_create_buffer(buff_linear_to_ist, accel_mem_read_only, type_integer, this%nst_linear*tsize)
255 call accel_write_buffer(buff_linear_to_ist, this%nst_linear*tsize, linear_to_ist)
256 call accel_create_buffer(buff_linear_to_idim, accel_mem_read_only, type_integer, this%nst_linear*tsize)
257 call accel_write_buffer(buff_linear_to_idim, this%nst_linear*tsize, linear_to_idim)
258
259 call accel_kernel_start_call(kernel, 'points.cl', 'get_points')
260
261 call accel_set_kernel_arg(kernel, 0, sp)
262 call accel_set_kernel_arg(kernel, 1, ep)
263 call accel_set_kernel_arg(kernel, 2, buff_linear_to_ist)
264 call accel_set_kernel_arg(kernel, 3, buff_linear_to_idim)
265 call accel_set_kernel_arg(kernel, 4, this%nst_linear*tsize)
266 call accel_set_kernel_arg(kernel, 5, this%ff_device)
267 call accel_set_kernel_arg(kernel, 6, int(this%pack_size_real(1), int32))
268 call accel_set_kernel_arg(kernel, 7, psi)
269 call accel_set_kernel_arg(kernel, 8, ldpsi1*tsize)
270 call accel_set_kernel_arg(kernel, 9, ldpsi2)
271
272 call accel_kernel_run(kernel, (/this%pack_size_real(1), int(ep - sp + 1, int64)/), (/this%pack_size_real(1), 1_int64/))
273
274 call accel_release_buffer(buff_linear_to_ist)
275 call accel_release_buffer(buff_linear_to_idim)
276 safe_deallocate_a(linear_to_ist)
277 safe_deallocate_a(linear_to_idim)
278
279 end select
280
281 call profiling_out("GET_POINTS")
282
284 end subroutine batch_get_points_accel
285
286 ! --------------------------------------------------------------
288 !
289 subroutine batch_set_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
290 class(batch_t), intent(inout) :: this
291 integer, intent(in) :: sp
292 integer, intent(in) :: ep
293 type(accel_mem_t), intent(in) :: psi
294 integer, intent(in) :: ldpsi1
295 integer, intent(in) :: ldpsi2
296
297 integer :: tsize, ii, it
298 type(accel_kernel_t), save :: kernel
299 integer, allocatable :: linear_to_ist(:), linear_to_idim(:)
300 type(accel_mem_t) :: buff_linear_to_ist, buff_linear_to_idim
301
302 push_sub(batch_set_points_accel)
303 call profiling_in("SET_POINTS")
304
305 select case (this%status())
307 call messages_not_implemented('batch_set_points_accel for non-CL batches')
308
310
311 tsize = types_get_size(this%type())/types_get_size(type_float)
312 safe_allocate(linear_to_ist(1:this%nst_linear*tsize))
313 safe_allocate(linear_to_idim(1:this%nst_linear*tsize))
314 do ii = 1, this%nst_linear
315 do it = 1, tsize
316 linear_to_ist(tsize*(ii-1)+it) = tsize*(this%linear_to_ist(ii) - 1) + it - 1
317 linear_to_idim(tsize*(ii-1)+it) = this%linear_to_idim(ii) - 1
318 end do
319 end do
320
321 call accel_create_buffer(buff_linear_to_ist, accel_mem_read_only, type_integer, this%nst_linear*tsize)
322 call accel_write_buffer(buff_linear_to_ist, this%nst_linear*tsize, linear_to_ist)
323 call accel_create_buffer(buff_linear_to_idim, accel_mem_read_only, type_integer, this%nst_linear*tsize)
324 call accel_write_buffer(buff_linear_to_idim, this%nst_linear*tsize, linear_to_idim)
325
326 call accel_kernel_start_call(kernel, 'points.cl', 'set_points')
327
328 call accel_set_kernel_arg(kernel, 0, sp)
329 call accel_set_kernel_arg(kernel, 1, ep)
330 call accel_set_kernel_arg(kernel, 2, buff_linear_to_ist)
331 call accel_set_kernel_arg(kernel, 3, buff_linear_to_idim)
332 call accel_set_kernel_arg(kernel, 4, this%nst_linear*tsize)
333 call accel_set_kernel_arg(kernel, 5, psi)
334 call accel_set_kernel_arg(kernel, 6, ldpsi1*tsize)
335 call accel_set_kernel_arg(kernel, 7, ldpsi2)
336 call accel_set_kernel_arg(kernel, 8, this%ff_device)
337 call accel_set_kernel_arg(kernel, 9, int(this%pack_size_real(1), int32))
338
339 call accel_kernel_run(kernel, (/this%pack_size_real(1), int(ep - sp + 1, int64)/), (/this%pack_size_real(1), 1_int64/))
340
341 call accel_release_buffer(buff_linear_to_ist)
342 call accel_release_buffer(buff_linear_to_idim)
343 safe_deallocate_a(linear_to_ist)
344 safe_deallocate_a(linear_to_idim)
345
346 end select
347
348 call profiling_out("SET_POINTS")
349
351 end subroutine batch_set_points_accel
352
353 ! -------------------------
357 !
358 integer pure function batch_points_block_size() result(block_size)
359
360 block_size = 61440
361
362 end function batch_points_block_size
363
364! -------------------------
365 subroutine batch_add_with_map_cpu(np, map, xx, yy, zz)
366 integer, intent(in) :: np
367 integer, intent(in) :: map(:)
368 class(batch_t), intent(in) :: xx
369 class(batch_t), intent(in) :: yy
370 class(batch_t), intent(inout) :: zz
371 type(accel_mem_t) :: buff_map
372
373 push_sub(batch_add_with_map_cpu)
374
375 if (xx%status() /= batch_device_packed) then
376 if (xx%type() == type_float) then
377 call dbatch_add_with_map(np, map, xx, yy, zz)
378 else
379 call zbatch_add_with_map(np, map, xx, yy, zz)
380 end if
381 else
382 ! copy map to GPU if not already there
383 call accel_create_buffer(buff_map, accel_mem_read_only, type_integer, np)
384 call accel_write_buffer(buff_map, np, map)
385 call batch_add_with_map_accel(np, buff_map, xx, yy, zz)
386 call accel_release_buffer(buff_map)
387 end if
388
390 end subroutine batch_add_with_map_cpu
391
392! -------------------------
393 subroutine batch_add_with_map_accel(np, map, xx, yy, zz)
394 integer, intent(in) :: np
395 class(accel_mem_t), intent(in) :: map
396 class(batch_t), intent(in) :: xx
397 class(batch_t), intent(in) :: yy
398 class(batch_t), intent(inout) :: zz
399
400 type(accel_kernel_t), save :: kernel
401 integer(int64) :: localsize, dim3, dim2
402
404
405 call accel_kernel_start_call(kernel, 'copy.cl', 'add_with_map')
406
407 call accel_set_kernel_arg(kernel, 0, np)
408 call accel_set_kernel_arg(kernel, 1, map)
409 call accel_set_kernel_arg(kernel, 2, xx%ff_device)
410 call accel_set_kernel_arg(kernel, 3, log2(int(xx%pack_size_real(1), int32)))
411 call accel_set_kernel_arg(kernel, 4, yy%ff_device)
412 call accel_set_kernel_arg(kernel, 5, log2(int(yy%pack_size_real(1), int32)))
413 call accel_set_kernel_arg(kernel, 6, zz%ff_device)
414 call accel_set_kernel_arg(kernel, 7, log2(int(zz%pack_size_real(1), int32)))
415
416 localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size_real(1)
417
418 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
419 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))
420
421 call accel_kernel_run(kernel, (/xx%pack_size_real(1), dim2, dim3/), (/xx%pack_size_real(1), localsize, 1_int64/))
422
424 end subroutine batch_add_with_map_accel
425
426! -------------------------
427 subroutine batch_copy_with_map_cpu(np, map, xx, yy)
428 integer, intent(in) :: np
429 integer, intent(in) :: map(:)
430 class(batch_t), intent(in) :: xx
431 class(batch_t), intent(inout) :: yy
432 type(accel_mem_t) :: buff_map
433
435
436 if (xx%status() /= batch_device_packed) then
437 if (xx%type() == type_float) then
438 call dbatch_copy_with_map(np, map, xx, yy)
439 else
440 call zbatch_copy_with_map(np, map, xx, yy)
441 end if
442 else
443 ! copy map to GPU if not already there
444 call accel_create_buffer(buff_map, accel_mem_read_only, type_integer, np)
445 call accel_write_buffer(buff_map, np, map)
446 call batch_copy_with_map_accel(np, buff_map, xx, yy)
447 call accel_release_buffer(buff_map)
448 end if
449
452
453! -------------------------
454 subroutine batch_copy_with_map_accel(np, map, xx, yy)
455 integer, intent(in) :: np
456 class(accel_mem_t), intent(in) :: map
457 class(batch_t), intent(in) :: xx
458 class(batch_t), intent(inout) :: yy
459
460 type(accel_kernel_t), save :: kernel
461 integer(int64) :: localsize, dim3, dim2
462
464
465 call accel_kernel_start_call(kernel, 'copy.cl', 'copy_with_map')
466
467 ! execute only if map has at least one element
468 if (np > 0) then
469 call accel_set_kernel_arg(kernel, 0, np)
470 call accel_set_kernel_arg(kernel, 1, map)
471 call accel_set_kernel_arg(kernel, 2, xx%ff_device)
472 call accel_set_kernel_arg(kernel, 3, log2(int(xx%pack_size_real(1), int32)))
473 call accel_set_kernel_arg(kernel, 4, yy%ff_device)
474 call accel_set_kernel_arg(kernel, 5, log2(int(yy%pack_size_real(1), int32)))
475
476 localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size_real(1)
477
478 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
479 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))
480
481 call accel_kernel_run(kernel, (/xx%pack_size_real(1), dim2, dim3/), (/xx%pack_size_real(1), localsize, 1_int64/))
482 end if
483
485 end subroutine batch_copy_with_map_accel
487 ! -------------------------
491 !
492 subroutine batch_split_complex(np, xx, yy, zz)
493 integer, intent(in) :: np
494 class(batch_t), intent(in) :: xx
495 class(batch_t), intent(inout) :: yy
496 class(batch_t), intent(inout) :: zz
497
498 integer :: ist_linear, ip
499 type(accel_kernel_t), save :: kernel
500 integer(int64) :: localsize, dim3, dim2
501
502 push_sub(batch_split_complex)
503
504 assert(xx%type() == type_cmplx)
505 assert(yy%type() == type_float)
506 assert(zz%type() == type_float)
507 assert(xx%status() == yy%status())
508 assert(xx%status() == zz%status())
509
510 select case (xx%status())
511 case (batch_not_packed)
512 do ist_linear = 1, xx%nst_linear
513 !$omp parallel do schedule(static)
514 do ip = 1, np
515 yy%dff_linear(ip, ist_linear) = real(xx%zff_linear(ip, ist_linear), real64)
516 zz%dff_linear(ip, ist_linear) = aimag(xx%zff_linear(ip, ist_linear))
517 end do
518 end do
519 case (batch_packed)
520 !$omp parallel do private(ist_linear) schedule(static)
521 do ip = 1, np
522 do ist_linear = 1, xx%nst_linear
523 yy%dff_pack(ist_linear, ip) = real(xx%zff_pack(ist_linear, ip), real64)
524 zz%dff_pack(ist_linear, ip) = aimag(xx%zff_pack(ist_linear, ip))
525 end do
526 end do
527 case (batch_device_packed)
528 call accel_kernel_start_call(kernel, 'split.cl', 'split_complex')
529
530 call accel_set_kernel_arg(kernel, 0, int(xx%pack_size(2), int32))
531 call accel_set_kernel_arg(kernel, 1, xx%ff_device)
532 call accel_set_kernel_arg(kernel, 2, log2(int(xx%pack_size(1), int32)))
533 call accel_set_kernel_arg(kernel, 3, yy%ff_device)
534 call accel_set_kernel_arg(kernel, 4, log2(int(yy%pack_size(1), int32)))
535 call accel_set_kernel_arg(kernel, 5, zz%ff_device)
536 call accel_set_kernel_arg(kernel, 6, log2(int(zz%pack_size(1), int32)))
537
538 localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size(1)
539
540 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
541 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))
542
543 call accel_kernel_run(kernel, (/xx%pack_size(1), dim2, dim3/), (/xx%pack_size(1), localsize, 1_int64/))
544 end select
545
546 pop_sub(batch_split_complex)
547 end subroutine batch_split_complex
548
549#include "real.F90"
550#include "batch_ops_inc.F90"
551#include "undef.F90"
552
553#include "complex.F90"
554#include "batch_ops_inc.F90"
555#include "undef.F90"
556
557end module batch_ops_oct_m
558
559!! Local Variables:
560!! mode: f90
561!! coding: utf-8
562!! End:
batchified version of the BLAS axpy routine:
Definition: batch_ops.F90:152
scale a batch by a constant or vector
Definition: batch_ops.F90:160
There are several ways how to call batch_set_state and batch_get_state:
Definition: batch_ops.F90:199
batchified version of
Definition: batch_ops.F90:168
double log2(double __x) __attribute__((__nothrow__
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:2144
subroutine, public accel_release_buffer(this)
Definition: accel.F90:1246
integer, parameter, public accel_mem_read_only
Definition: accel.F90:183
This module implements batches of mesh functions.
Definition: batch.F90:133
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:276
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:276
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:276
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:116
subroutine batch_copy_with_map_accel(np, map, xx, yy)
Definition: batch_ops.F90:548
subroutine, public dbatch_ax_function_py(np, aa, psi, yy)
This routine performs a set of axpy operations adding the same function psi to all functions of a bat...
Definition: batch_ops.F90:1036
subroutine dbatch_set_state1(this, ist, np, psi)
Write a single state with np points into a batch at position ist.
Definition: batch_ops.F90:1462
subroutine dbatch_get_state3(this, ii, np, psi)
Definition: batch_ops.F90:1738
subroutine dbatch_axpy_const(np, aa, xx, yy)
This routine applies a 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:691
subroutine dbatch_mul(np, ff, xx, yy)
multiply all functions in a batch pointwise by a given mesh function ff
Definition: batch_ops.F90:1915
subroutine zbatch_get_state2(this, index, np, psi)
Definition: batch_ops.F90:3166
subroutine zbatch_get_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:3203
subroutine dbatch_get_state2(this, index, np, psi)
Definition: batch_ops.F90:1721
subroutine dbatch_xpay_vec(np, xx, aa, yy, a_start, a_full)
calculate yy(ist,:) = xx(ist,:) + aa(ist)*yy(ist,:) for a batch
Definition: batch_ops.F90:1291
subroutine dbatch_scal_vec(np, aa, xx, a_start, a_full)
scale all functions in a batch by state dependent constant
Definition: batch_ops.F90:1155
subroutine dbatch_set_state2(this, index, np, psi)
Write a single state with np points into a batch at position defined by index.
Definition: batch_ops.F90:1571
subroutine dbatch_copy_with_map(np, map, xx, yy)
Definition: batch_ops.F90:2066
subroutine dbatch_get_state1(this, ist, np, psi)
Write a get of state with np points from a batch.
Definition: batch_ops.F90:1609
subroutine dbatch_axpy_vec(np, aa, xx, yy, a_start, a_full)
This routine applies an 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:780
subroutine dbatch_set_state3(this, ii, np, psi)
Write a set of state with np points into a batch.
Definition: batch_ops.F90:1589
subroutine zbatch_axpy_const(np, aa, xx, yy)
This routine applies a 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:2185
subroutine zbatch_scal_const(np, aa, xx)
scale all functions in a batch by constant aa
Definition: batch_ops.F90:2596
subroutine zbatch_mul(np, ff, xx, yy)
multiply all functions in a batch pointwise by a given mesh function ff
Definition: batch_ops.F90:3360
subroutine zbatch_xpay_const(np, xx, aa, yy)
calculate yy(ist) = xx(ist) + aa*yy(ist) for a batch
Definition: batch_ops.F90:2873
subroutine, public batch_split_complex(np, xx, yy, zz)
extract the real and imaginary parts of a complex batch
Definition: batch_ops.F90:586
subroutine batch_add_with_map_accel(np, map, xx, yy, zz)
Definition: batch_ops.F90:487
subroutine dbatch_set_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:1833
subroutine zbatch_copy_with_map(np, map, xx, yy)
Definition: batch_ops.F90:3467
subroutine, public batch_set_zero(this, np, async)
fill all mesh functions of the batch with zero
Definition: batch_ops.F90:240
subroutine, public zbatch_ax_function_py(np, aa, psi, yy)
This routine performs a set of axpy operations adding the same function psi to all functions of a bat...
Definition: batch_ops.F90:2513
subroutine zbatch_axpy_vec(np, aa, xx, yy, a_start, a_full)
This routine applies an 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:2274
subroutine batch_set_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
GPU version of batch_set_points.
Definition: batch_ops.F90:383
subroutine dbatch_xpay_const(np, xx, aa, yy)
calculate yy(ist) = xx(ist) + aa*yy(ist) for a batch
Definition: batch_ops.F90:1430
subroutine dbatch_get_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:1758
integer pure function, public batch_points_block_size()
determine the device block size
Definition: batch_ops.F90:452
subroutine dbatch_add_with_map(np, map, xx, yy, zz)
Definition: batch_ops.F90:2024
subroutine zbatch_xpay_vec(np, xx, aa, yy, a_start, a_full)
calculate yy(ist,:) = xx(ist,:) + aa(ist)*yy(ist,:) for a batch
Definition: batch_ops.F90:2751
subroutine zbatch_get_state1(this, ist, np, psi)
Write a get of state with np points from a batch.
Definition: batch_ops.F90:3039
subroutine zbatch_set_state1(this, ist, np, psi)
Write a single state with np points into a batch at position ist.
Definition: batch_ops.F90:2905
subroutine batch_add_with_map_cpu(np, map, xx, yy, zz)
Definition: batch_ops.F90:459
subroutine zbatch_scal_vec(np, aa, xx, a_start, a_full)
scale all functions in a batch by state dependent constant
Definition: batch_ops.F90:2632
subroutine zbatch_set_state2(this, index, np, psi)
Write a single state with np points into a batch at position defined by index.
Definition: batch_ops.F90:3001
subroutine, public zbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:2408
subroutine zbatch_set_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:3288
subroutine zbatch_add_with_map(np, map, xx, yy, zz)
Definition: batch_ops.F90:3425
subroutine, public dbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:931
subroutine zbatch_get_state3(this, ii, np, psi)
Definition: batch_ops.F90:3183
subroutine batch_get_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
GPU version of batch_get_points.
Definition: batch_ops.F90:316
subroutine zbatch_set_state3(this, ii, np, psi)
Write a set of state with np points into a batch.
Definition: batch_ops.F90:3019
subroutine batch_copy_with_map_cpu(np, map, xx, yy)
Definition: batch_ops.F90:521
subroutine dbatch_scal_const(np, aa, xx)
scale all functions in a batch by constant aa
Definition: batch_ops.F90:1119
This module contains interfaces for BLAS routines You should not use these routines directly....
Definition: blas.F90:118
real(real64), parameter, public m_zero
Definition: global.F90:187
logical pure function, public not_in_openmp()
Definition: global.F90:453
complex(real64), parameter, public m_z0
Definition: global.F90:197
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:115
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1125
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:420
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:623
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:552
type(type_t), public type_float
Definition: types.F90:133
type(type_t), public type_integer
Definition: types.F90:135
integer pure function, public types_get_size(this)
Definition: types.F90:152
Class defining batches of mesh functions.
Definition: batch.F90:159