Octopus
batch_ops.F90
Go to the documentation of this file.
1!! Copyright (C) 2008 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
23module batch_ops_oct_m
24 use accel_oct_m
25 use batch_oct_m
26 use blas_oct_m
27 use debug_oct_m
28 use iso_c_binding
29 use global_oct_m
31 use math_oct_m
34 use types_oct_m
35
36 implicit none
37
38 private
39 public :: &
41 batch_axpy, &
42 batch_scal, &
43 batch_xpay, &
49 batch_mul, &
59
61 interface batch_axpy
62 module procedure dbatch_axpy_const
63 module procedure zbatch_axpy_const
64 module procedure dbatch_axpy_vec
65 module procedure zbatch_axpy_vec
66 end interface batch_axpy
67
69 interface batch_scal
70 module procedure dbatch_scal_const
71 module procedure zbatch_scal_const
72 module procedure dbatch_scal_vec
73 module procedure zbatch_scal_vec
74 end interface batch_scal
75
77 interface batch_xpay
78 module procedure dbatch_xpay_vec
79 module procedure zbatch_xpay_vec
80 module procedure dbatch_xpay_const
81 module procedure zbatch_xpay_const
82 end interface batch_xpay
83
84 interface batch_add_with_map
85 module procedure batch_add_with_map_cpu
86 module procedure batch_add_with_map_accel
87 end interface batch_add_with_map
88
89 interface batch_copy_with_map
90 module procedure batch_copy_with_map_cpu
91 module procedure batch_copy_with_map_accel
92 end interface batch_copy_with_map
93
108 interface batch_set_state
109 module procedure dbatch_set_state1
110 module procedure zbatch_set_state1
111 module procedure dbatch_set_state2
112 module procedure zbatch_set_state2
113 module procedure dbatch_set_state3
114 module procedure zbatch_set_state3
115 end interface batch_set_state
116
117 interface batch_get_state
118 module procedure dbatch_get_state1
119 module procedure zbatch_get_state1
120 module procedure dbatch_get_state2
121 module procedure zbatch_get_state2
122 module procedure dbatch_get_state3
123 module procedure zbatch_get_state3
124 end interface batch_get_state
125
126 interface batch_get_points
127 module procedure dbatch_get_points
128 module procedure zbatch_get_points
129 module procedure batch_get_points_accel
130 end interface batch_get_points
131
132 interface batch_set_points
133 module procedure dbatch_set_points
134 module procedure zbatch_set_points
135 module procedure batch_set_points_accel
136 end interface batch_set_points
137
138 interface batch_mul
139 module procedure dbatch_mul
140 module procedure zbatch_mul
141 end interface batch_mul
142
143
144contains
145
146 !--------------------------------------------------------------
148 subroutine batch_set_zero(this, np, async)
149 class(batch_t), intent(inout) :: this
150 integer, optional, intent(in) :: np
152 logical, optional, intent(in) :: async
153
154 integer :: ist_linear, ist, ip, np_
155
157
158 assert(not_in_openmp())
159
160 call profiling_in("BATCH_SET_ZERO")
161
162 select case (this%status())
164 np_ = optional_default(np, int(this%pack_size(2), int32))
165 assert(np_ <= int(this%pack_size(2), int32))
166 call accel_set_buffer_to_zero(this%ff_device, this%type(), (int(this%pack_size(1), int32) * np_), async=async)
167
168 case (batch_packed)
169 np_ = optional_default(np, int(this%pack_size(2), int32))
170 assert(np_ <= int(this%pack_size(2), int32))
171 if (this%type() == type_float) then
172 !$omp parallel do private(ist) schedule(static)
173 do ip = 1, np_
174 !$omp simd
175 do ist = 1, int(this%pack_size(1), int32)
176 this%dff_pack(ist, ip) = m_zero
177 end do
178 end do
179 else
180 !$omp parallel do private(ist) schedule(static)
181 do ip = 1, np_
182 !$omp simd
183 do ist = 1, int(this%pack_size(1), int32)
184 this%zff_pack(ist, ip) = m_z0
185 end do
186 end do
187 end if
188
189 case (batch_not_packed)
190 if (this%type() == type_float) then
191 np_ = optional_default(np, ubound(this%dff_linear, dim=1))
192 assert(np_ <= ubound(this%dff_linear, dim=1))
193 do ist_linear = 1, this%nst_linear
194 !$omp parallel do schedule(static)
195 do ip = 1, np_
196 this%dff_linear(ip, ist_linear) = m_zero
197 end do
198 end do
199 else
200 np_ = optional_default(np, ubound(this%zff_linear, dim=1))
201 assert(np_ <= ubound(this%zff_linear, dim=1))
202 do ist_linear = 1, this%nst_linear
203 !$omp parallel do schedule(static)
204 do ip = 1, np_
205 this%zff_linear(ip, ist_linear) = m_z0
206 end do
207 end do
208 end if
209
210 case default
211 message(1) = "batch_set_zero: unknown batch status."
213
214 end select
215
216 call profiling_out("BATCH_SET_ZERO")
217
218 pop_sub(batch_set_zero)
219 end subroutine batch_set_zero
220
221 ! --------------------------------------------------------------
223 !
224 subroutine batch_get_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
225 class(batch_t), intent(in) :: this
226 integer, intent(in) :: sp
227 integer, intent(in) :: ep
228 type(accel_mem_t), intent(inout) :: psi
229 integer, intent(in) :: ldpsi1
230 integer, intent(in) :: ldpsi2
231
232 integer :: tsize, ii, it
233 type(accel_kernel_t), save :: kernel
234 integer, allocatable :: linear_to_ist(:), linear_to_idim(:)
235 type(accel_mem_t) :: buff_linear_to_ist, buff_linear_to_idim
236
237 push_sub(batch_get_points_accel)
238 call profiling_in("GET_POINTS")
239
240 select case (this%status())
242 call messages_not_implemented('batch_get_points_accel for non-CL batches')
245
246 tsize = types_get_size(this%type())/types_get_size(type_float)
247 safe_allocate(linear_to_ist(1:this%nst_linear*tsize))
248 safe_allocate(linear_to_idim(1:this%nst_linear*tsize))
249 do ii = 1, this%nst_linear
250 do it = 1, tsize
251 linear_to_ist(tsize*(ii-1)+it) = tsize*(this%linear_to_ist(ii) - 1) + it - 1
252 linear_to_idim(tsize*(ii-1)+it) = this%linear_to_idim(ii) - 1
253 end do
254 end do
255
256 call accel_create_buffer(buff_linear_to_ist, accel_mem_read_only, type_integer, this%nst_linear*tsize)
257 call accel_write_buffer(buff_linear_to_ist, this%nst_linear*tsize, linear_to_ist)
258 call accel_create_buffer(buff_linear_to_idim, accel_mem_read_only, type_integer, this%nst_linear*tsize)
259 call accel_write_buffer(buff_linear_to_idim, this%nst_linear*tsize, linear_to_idim)
260
261 call accel_kernel_start_call(kernel, 'points.cl', 'get_points')
262
263 call accel_set_kernel_arg(kernel, 0, sp)
264 call accel_set_kernel_arg(kernel, 1, ep)
265 call accel_set_kernel_arg(kernel, 2, buff_linear_to_ist)
266 call accel_set_kernel_arg(kernel, 3, buff_linear_to_idim)
267 call accel_set_kernel_arg(kernel, 4, this%nst_linear*tsize)
268 call accel_set_kernel_arg(kernel, 5, this%ff_device)
269 call accel_set_kernel_arg(kernel, 6, int(this%pack_size_real(1), int32))
270 call accel_set_kernel_arg(kernel, 7, psi)
271 call accel_set_kernel_arg(kernel, 8, ldpsi1*tsize)
272 call accel_set_kernel_arg(kernel, 9, ldpsi2)
273
274 call accel_kernel_run(kernel, (/this%pack_size_real(1), int(ep - sp + 1, int64)/), (/this%pack_size_real(1), 1_int64/))
275
276 call accel_release_buffer(buff_linear_to_ist)
277 call accel_release_buffer(buff_linear_to_idim)
278 safe_deallocate_a(linear_to_ist)
279 safe_deallocate_a(linear_to_idim)
280
281 end select
282
283 call profiling_out("GET_POINTS")
284
286 end subroutine batch_get_points_accel
287
288 ! --------------------------------------------------------------
290 !
291 subroutine batch_set_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
292 class(batch_t), intent(inout) :: this
293 integer, intent(in) :: sp
294 integer, intent(in) :: ep
295 type(accel_mem_t), intent(in) :: psi
296 integer, intent(in) :: ldpsi1
297 integer, intent(in) :: ldpsi2
298
299 integer :: tsize, ii, it
300 type(accel_kernel_t), save :: kernel
301 integer, allocatable :: linear_to_ist(:), linear_to_idim(:)
302 type(accel_mem_t) :: buff_linear_to_ist, buff_linear_to_idim
303
304 push_sub(batch_set_points_accel)
305 call profiling_in("SET_POINTS")
306
307 select case (this%status())
309 call messages_not_implemented('batch_set_points_accel for non-CL batches')
310
312
313 tsize = types_get_size(this%type())/types_get_size(type_float)
314 safe_allocate(linear_to_ist(1:this%nst_linear*tsize))
315 safe_allocate(linear_to_idim(1:this%nst_linear*tsize))
316 do ii = 1, this%nst_linear
317 do it = 1, tsize
318 linear_to_ist(tsize*(ii-1)+it) = tsize*(this%linear_to_ist(ii) - 1) + it - 1
319 linear_to_idim(tsize*(ii-1)+it) = this%linear_to_idim(ii) - 1
320 end do
321 end do
322
323 call accel_create_buffer(buff_linear_to_ist, accel_mem_read_only, type_integer, this%nst_linear*tsize)
324 call accel_write_buffer(buff_linear_to_ist, this%nst_linear*tsize, linear_to_ist)
325 call accel_create_buffer(buff_linear_to_idim, accel_mem_read_only, type_integer, this%nst_linear*tsize)
326 call accel_write_buffer(buff_linear_to_idim, this%nst_linear*tsize, linear_to_idim)
327
328 call accel_kernel_start_call(kernel, 'points.cl', 'set_points')
329
330 call accel_set_kernel_arg(kernel, 0, sp)
331 call accel_set_kernel_arg(kernel, 1, ep)
332 call accel_set_kernel_arg(kernel, 2, buff_linear_to_ist)
333 call accel_set_kernel_arg(kernel, 3, buff_linear_to_idim)
334 call accel_set_kernel_arg(kernel, 4, this%nst_linear*tsize)
335 call accel_set_kernel_arg(kernel, 5, psi)
336 call accel_set_kernel_arg(kernel, 6, ldpsi1*tsize)
337 call accel_set_kernel_arg(kernel, 7, ldpsi2)
338 call accel_set_kernel_arg(kernel, 8, this%ff_device)
339 call accel_set_kernel_arg(kernel, 9, int(this%pack_size_real(1), int32))
340
341 call accel_kernel_run(kernel, (/this%pack_size_real(1), int(ep - sp + 1, int64)/), (/this%pack_size_real(1), 1_int64/))
342
343 call accel_release_buffer(buff_linear_to_ist)
344 call accel_release_buffer(buff_linear_to_idim)
345 safe_deallocate_a(linear_to_ist)
346 safe_deallocate_a(linear_to_idim)
347
348 end select
349
350 call profiling_out("SET_POINTS")
351
353 end subroutine batch_set_points_accel
354
355 ! -------------------------
359 !
360 integer pure function batch_points_block_size() result(block_size)
361
362 block_size = 61440
363
364 end function batch_points_block_size
365
366! -------------------------
367 subroutine batch_add_with_map_cpu(np, map, xx, yy, zz)
368 integer, intent(in) :: np
369 integer, intent(in) :: map(:)
370 class(batch_t), intent(in) :: xx
371 class(batch_t), intent(in) :: yy
372 class(batch_t), intent(inout) :: zz
373 type(accel_mem_t) :: buff_map
374
375 push_sub(batch_add_with_map_cpu)
376
377 if (xx%status() /= batch_device_packed) then
378 if (xx%type() == type_float) then
379 call dbatch_add_with_map(np, map, xx, yy, zz)
380 else
381 call zbatch_add_with_map(np, map, xx, yy, zz)
382 end if
383 else
384 ! copy map to GPU if not already there
385 call accel_create_buffer(buff_map, accel_mem_read_only, type_integer, np)
386 call accel_write_buffer(buff_map, np, map)
387 call batch_add_with_map_accel(np, buff_map, xx, yy, zz)
388 call accel_release_buffer(buff_map)
389 end if
390
392 end subroutine batch_add_with_map_cpu
393
394! -------------------------
395 subroutine batch_add_with_map_accel(np, map, xx, yy, zz)
396 integer, intent(in) :: np
397 class(accel_mem_t), intent(in) :: map
398 class(batch_t), intent(in) :: xx
399 class(batch_t), intent(in) :: yy
400 class(batch_t), intent(inout) :: zz
401
402 type(accel_kernel_t), save :: kernel
403 integer(int64) :: localsize, dim3, dim2
404
406
407 call accel_kernel_start_call(kernel, 'copy.cl', 'add_with_map')
408
409 call accel_set_kernel_arg(kernel, 0, np)
410 call accel_set_kernel_arg(kernel, 1, map)
411 call accel_set_kernel_arg(kernel, 2, xx%ff_device)
412 call accel_set_kernel_arg(kernel, 3, log2(int(xx%pack_size_real(1), int32)))
413 call accel_set_kernel_arg(kernel, 4, yy%ff_device)
414 call accel_set_kernel_arg(kernel, 5, log2(int(yy%pack_size_real(1), int32)))
415 call accel_set_kernel_arg(kernel, 6, zz%ff_device)
416 call accel_set_kernel_arg(kernel, 7, log2(int(zz%pack_size_real(1), int32)))
417
418 localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size_real(1)
419
420 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
421 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))
422
423 call accel_kernel_run(kernel, (/xx%pack_size_real(1), dim2, dim3/), (/xx%pack_size_real(1), localsize, 1_int64/))
424
426 end subroutine batch_add_with_map_accel
427
428! -------------------------
429 subroutine batch_copy_with_map_cpu(np, map, xx, yy)
430 integer, intent(in) :: np
431 integer, intent(in) :: map(:)
432 class(batch_t), intent(in) :: xx
433 class(batch_t), intent(inout) :: yy
434 type(accel_mem_t) :: buff_map
435
437
438 if (xx%status() /= batch_device_packed) then
439 if (xx%type() == type_float) then
440 call dbatch_copy_with_map(np, map, xx, yy)
441 else
442 call zbatch_copy_with_map(np, map, xx, yy)
443 end if
444 else
445 ! copy map to GPU if not already there
446 call accel_create_buffer(buff_map, accel_mem_read_only, type_integer, np)
447 call accel_write_buffer(buff_map, np, map)
448 call batch_copy_with_map_accel(np, buff_map, xx, yy)
449 call accel_release_buffer(buff_map)
450 end if
451
453 end subroutine batch_copy_with_map_cpu
454
455! -------------------------
456 subroutine batch_copy_with_map_accel(np, map, xx, yy)
457 integer, intent(in) :: np
458 class(accel_mem_t), intent(in) :: map
459 class(batch_t), intent(in) :: xx
460 class(batch_t), intent(inout) :: yy
461
462 type(accel_kernel_t), save :: kernel
463 integer(int64) :: localsize, dim3, dim2
464
466
467 call accel_kernel_start_call(kernel, 'copy.cl', 'copy_with_map')
468
469 ! execute only if map has at least one element
470 if (np > 0) then
471 call accel_set_kernel_arg(kernel, 0, np)
472 call accel_set_kernel_arg(kernel, 1, map)
473 call accel_set_kernel_arg(kernel, 2, xx%ff_device)
474 call accel_set_kernel_arg(kernel, 3, log2(int(xx%pack_size_real(1), int32)))
475 call accel_set_kernel_arg(kernel, 4, yy%ff_device)
476 call accel_set_kernel_arg(kernel, 5, log2(int(yy%pack_size_real(1), int32)))
477
478 localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size_real(1)
479
480 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
481 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))
482
483 call accel_kernel_run(kernel, (/xx%pack_size_real(1), dim2, dim3/), (/xx%pack_size_real(1), localsize, 1_int64/))
484 end if
485
487 end subroutine batch_copy_with_map_accel
488
489 ! -------------------------
493 !
494 subroutine batch_split_complex(np, xx, yy, zz)
495 integer, intent(in) :: np
496 class(batch_t), intent(in) :: xx
497 class(batch_t), intent(inout) :: yy
498 class(batch_t), intent(inout) :: zz
499
500 integer :: ist_linear, ip
501 type(accel_kernel_t), save :: kernel
502 integer(int64) :: localsize, dim3, dim2
503
504 push_sub(batch_split_complex)
505
506 assert(xx%type() == type_cmplx)
507 assert(yy%type() == type_float)
508 assert(zz%type() == type_float)
509 assert(xx%status() == yy%status())
510 assert(xx%status() == zz%status())
511
512 select case (xx%status())
513 case (batch_not_packed)
514 do ist_linear = 1, xx%nst_linear
515 !$omp parallel do schedule(static)
516 do ip = 1, np
517 yy%dff_linear(ip, ist_linear) = real(xx%zff_linear(ip, ist_linear), real64)
518 zz%dff_linear(ip, ist_linear) = aimag(xx%zff_linear(ip, ist_linear))
519 end do
520 end do
521 case (batch_packed)
522 !$omp parallel do private(ist_linear) schedule(static)
523 do ip = 1, np
524 do ist_linear = 1, xx%nst_linear
525 yy%dff_pack(ist_linear, ip) = real(xx%zff_pack(ist_linear, ip), real64)
526 zz%dff_pack(ist_linear, ip) = aimag(xx%zff_pack(ist_linear, ip))
527 end do
528 end do
529 case (batch_device_packed)
530 call accel_kernel_start_call(kernel, 'split.cl', 'split_complex')
531
532 call accel_set_kernel_arg(kernel, 0, int(xx%pack_size(2), int32))
533 call accel_set_kernel_arg(kernel, 1, xx%ff_device)
534 call accel_set_kernel_arg(kernel, 2, log2(int(xx%pack_size(1), int32)))
535 call accel_set_kernel_arg(kernel, 3, yy%ff_device)
536 call accel_set_kernel_arg(kernel, 4, log2(int(yy%pack_size(1), int32)))
537 call accel_set_kernel_arg(kernel, 5, zz%ff_device)
538 call accel_set_kernel_arg(kernel, 6, log2(int(zz%pack_size(1), int32)))
539
540 localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size(1)
541
542 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
543 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))
544
545 call accel_kernel_run(kernel, (/xx%pack_size(1), dim2, dim3/), (/xx%pack_size(1), localsize, 1_int64/))
546 end select
547
548 pop_sub(batch_split_complex)
549 end subroutine batch_split_complex
550
551#include "real.F90"
552#include "batch_ops_inc.F90"
553#include "undef.F90"
554
555#include "complex.F90"
556#include "batch_ops_inc.F90"
557#include "undef.F90"
558
559end module batch_ops_oct_m
560
561!! Local Variables:
562!! mode: f90
563!! coding: utf-8
564!! End:
batchified version of the BLAS axpy routine:
Definition: batch_ops.F90:156
scale a batch by a constant or vector
Definition: batch_ops.F90:164
There are several ways how to call batch_set_state and batch_get_state:
Definition: batch_ops.F90:203
batchified version of
Definition: batch_ops.F90:172
double log2(double __x) __attribute__((__nothrow__
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1381
subroutine, public accel_release_buffer(this, async)
Definition: accel.F90:920
integer, parameter, public accel_mem_read_only
Definition: accel.F90:196
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
subroutine zbatch_get_state3(this, ii, np, psi, async)
Definition: batch_ops.F90:3248
subroutine dbatch_get_state3(this, ii, np, psi, async)
Definition: batch_ops.F90:1747
subroutine batch_copy_with_map_accel(np, map, xx, yy)
Definition: batch_ops.F90:552
subroutine zbatch_get_state1(this, ist, np, psi, async)
Write a get of state with np points from a batch.
Definition: batch_ops.F90:3102
subroutine, public zbatch_copy_with_map_to_array(np, map, xx, array)
Transfer a batch from the mesh to an array on the submesh (defined by a map)
Definition: batch_ops.F90:3583
subroutine, public dbatch_ax_function_py(np, aa, psi, yy)
This routine performs a set of axpy operations adding the same function psi to all functions of a bat...
Definition: batch_ops.F90:1040
subroutine dbatch_set_state1(this, ist, np, psi)
Write a single state with np points into a batch at position ist.
Definition: batch_ops.F90:1466
subroutine dbatch_axpy_const(np, aa, xx, yy)
This routine applies a 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:695
subroutine dbatch_mul(np, ff, xx, yy)
multiply all functions in a batch pointwise by a given mesh function ff
Definition: batch_ops.F90:1934
subroutine zbatch_get_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:3269
subroutine dbatch_xpay_vec(np, xx, aa, yy, a_start, a_full)
calculate yy(ist,:) = xx(ist,:) + aa(ist)*yy(ist,:) for a batch
Definition: batch_ops.F90:1295
subroutine dbatch_scal_vec(np, aa, xx, a_start, a_full)
scale all functions in a batch by state dependent constant
Definition: batch_ops.F90:1159
subroutine dbatch_set_state2(this, index, np, psi)
Write a single state with np points into a batch at position defined by index.
Definition: batch_ops.F90:1576
subroutine dbatch_copy_with_map(np, map, xx, yy)
Definition: batch_ops.F90:2085
subroutine, public dbatch_copy_with_map_to_array(np, map, xx, array)
Transfer a batch from the mesh to an array on the submesh (defined by a map)
Definition: batch_ops.F90:2125
subroutine dbatch_get_state2(this, index, np, psi, async)
Definition: batch_ops.F90:1729
subroutine dbatch_axpy_vec(np, aa, xx, yy, a_start, a_full)
This routine applies an 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:784
subroutine dbatch_set_state3(this, ii, np, psi)
Write a set of state with np points into a batch.
Definition: batch_ops.F90:1594
subroutine zbatch_axpy_const(np, aa, xx, yy)
This routine applies a 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:2247
subroutine zbatch_scal_const(np, aa, xx)
scale all functions in a batch by constant aa
Definition: batch_ops.F90:2658
subroutine zbatch_get_state2(this, index, np, psi, async)
Definition: batch_ops.F90:3230
subroutine zbatch_mul(np, ff, xx, yy)
multiply all functions in a batch pointwise by a given mesh function ff
Definition: batch_ops.F90:3436
subroutine zbatch_xpay_const(np, xx, aa, yy)
calculate yy(ist) = xx(ist) + aa*yy(ist) for a batch
Definition: batch_ops.F90:2935
subroutine, public batch_split_complex(np, xx, yy, zz)
extract the real and imaginary parts of a complex batch
Definition: batch_ops.F90:590
subroutine batch_add_with_map_accel(np, map, xx, yy, zz)
Definition: batch_ops.F90:491
subroutine dbatch_set_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:1848
subroutine dbatch_get_state1(this, ist, np, psi, async)
Write a get of state with np points from a batch.
Definition: batch_ops.F90:1614
subroutine zbatch_copy_with_map(np, map, xx, yy)
Definition: batch_ops.F90:3543
subroutine, public batch_set_zero(this, np, async)
fill all mesh functions of the batch with zero
Definition: batch_ops.F90:244
subroutine, public zbatch_ax_function_py(np, aa, psi, yy)
This routine performs a set of axpy operations adding the same function psi to all functions of a bat...
Definition: batch_ops.F90:2575
subroutine zbatch_axpy_vec(np, aa, xx, yy, a_start, a_full)
This routine applies an 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:2336
subroutine batch_set_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
GPU version of batch_set_points.
Definition: batch_ops.F90:387
subroutine dbatch_xpay_const(np, xx, aa, yy)
calculate yy(ist) = xx(ist) + aa*yy(ist) for a batch
Definition: batch_ops.F90:1434
subroutine dbatch_get_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:1768
integer pure function, public batch_points_block_size()
determine the device block size
Definition: batch_ops.F90:456
subroutine dbatch_add_with_map(np, map, xx, yy, zz)
Definition: batch_ops.F90:2043
subroutine zbatch_xpay_vec(np, xx, aa, yy, a_start, a_full)
calculate yy(ist,:) = xx(ist,:) + aa(ist)*yy(ist,:) for a batch
Definition: batch_ops.F90:2813
subroutine zbatch_set_state1(this, ist, np, psi)
Write a single state with np points into a batch at position ist.
Definition: batch_ops.F90:2967
subroutine batch_add_with_map_cpu(np, map, xx, yy, zz)
Definition: batch_ops.F90:463
subroutine zbatch_scal_vec(np, aa, xx, a_start, a_full)
scale all functions in a batch by state dependent constant
Definition: batch_ops.F90:2694
subroutine zbatch_set_state2(this, index, np, psi)
Write a single state with np points into a batch at position defined by index.
Definition: batch_ops.F90:3064
subroutine, public zbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:2470
subroutine zbatch_set_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:3361
subroutine zbatch_add_with_map(np, map, xx, yy, zz)
Definition: batch_ops.F90:3501
subroutine, public dbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:935
subroutine batch_get_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
GPU version of batch_get_points.
Definition: batch_ops.F90:320
subroutine zbatch_set_state3(this, ii, np, psi)
Write a set of state with np points into a batch.
Definition: batch_ops.F90:3082
subroutine batch_copy_with_map_cpu(np, map, xx, yy)
Definition: batch_ops.F90:525
subroutine dbatch_scal_const(np, aa, xx)
scale all functions in a batch by constant aa
Definition: batch_ops.F90:1123
This module contains interfaces for BLAS routines You should not use these routines directly....
Definition: blas.F90:120
real(real64), parameter, public m_zero
Definition: global.F90:191
logical pure function, public not_in_openmp()
Definition: global.F90:493
complex(real64), parameter, public m_z0
Definition: global.F90:201
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1091
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:410
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_integer
Definition: types.F90:137
integer pure function, public types_get_size(this)
Definition: types.F90:154
Class defining batches of mesh functions.
Definition: batch.F90:161