68 integer,
public :: nst
69 integer,
public :: dim
72 integer,
allocatable :: ist_idim_index(:, :)
77 integer,
allocatable,
public :: ist(:)
84 logical :: is_allocated
87 integer,
public :: nst_linear
93 integer :: status_host
102 type(type_t) :: type_of
103 integer :: device_buffer_count
104 integer :: host_buffer_count
105 logical :: special_memory
106 logical :: needs_finish_unpack
110 real(real64),
pointer,
contiguous,
public :: dff(:, :, :)
111 complex(real64),
pointer,
contiguous,
public :: zff(:, :, :)
112 real(real64),
pointer,
contiguous,
public :: dff_linear(:, :)
113 complex(real64),
pointer,
contiguous,
public :: zff_linear(:, :)
116 real(real64),
pointer,
contiguous,
public :: dff_pack(:, :)
117 complex(real64),
pointer,
contiguous,
public :: zff_pack(:, :)
119 integer(int64),
public :: pack_size(1:2)
121 integer(int64),
public :: pack_size_real(1:2)
125 type(accel_mem_t),
public :: ff_device
183 integer,
public,
parameter :: &
184 batch_not_packed = 0, & !< functions are stored in CPU memory, unpacked order
202 class(
batch_t),
intent(inout) :: this
203 logical,
optional,
intent(in) :: copy
208 if (this%own_memory .and. this%is_packed())
then
211 call this%deallocate_packed_device()
214 call this%deallocate_packed_host()
218 this%host_buffer_count = 0
219 this%device_buffer_count = 0
224 if (this%is_allocated)
then
225 call this%deallocate_unpacked_host()
228 safe_deallocate_a(this%ist_idim_index)
229 safe_deallocate_a(this%ist)
244 this%is_allocated = .false.
246 if (this%special_memory)
then
247 if (
associated(this%dff))
then
250 if (
associated(this%zff))
then
254 safe_deallocate_p(this%dff)
255 safe_deallocate_p(this%zff)
258 nullify(this%dff_linear)
260 nullify(this%zff_linear)
271 class(
batch_t),
intent(inout) :: this
275 if (this%special_memory)
then
276 if (
associated(this%dff_pack))
then
279 if (
associated(this%zff_pack))
then
283 safe_deallocate_p(this%dff_pack)
284 safe_deallocate_p(this%zff_pack)
286 nullify(this%dff_pack)
287 nullify(this%zff_pack)
296 class(
batch_t),
intent(inout) :: this
311 class(
batch_t),
intent(inout) :: this
316 call this%dallocate_unpacked_host()
318 call this%zallocate_unpacked_host()
330 class(
batch_t),
intent(inout) :: this
335 call this%dallocate_packed_host()
337 call this%zallocate_packed_host()
349 class(
batch_t),
intent(inout) :: this
354 product(this%pack_size))
367 type(
batch_t),
intent(out) :: this
368 integer,
intent(in) :: dim
369 integer,
intent(in) :: nst
370 integer,
intent(in) :: np
374 this%is_allocated = .false.
375 this%own_memory = .false.
376 this%special_memory = .false.
377 this%needs_finish_unpack = .false.
382 this%nst_linear = nst*dim
385 this%device_buffer_count = 0
386 this%host_buffer_count = 0
391 safe_allocate(this%ist_idim_index(1:this%nst_linear, 1:this%ndims))
392 safe_allocate(this%ist(1:this%nst))
394 nullify(this%dff, this%zff, this%dff_linear, this%zff_linear)
395 nullify(this%dff_pack, this%zff_pack)
407 class(
batch_t),
intent(in) :: this
408 class(
batch_t),
allocatable,
intent(out) :: dest
409 logical,
optional,
intent(in) :: pack
411 logical,
optional,
intent(in) :: copy_data
413 integer,
optional,
intent(in) :: new_np
417 if (.not.
allocated(dest))
then
418 safe_allocate_type(
batch_t, dest)
420 message(1) =
"Internal error: destination batch in batch_clone_to has been previously allocated."
424 call this%copy_to(dest, pack, copy_data, new_np)
432 class(
batch_t),
intent(in) :: this
433 class(
batch_t),
allocatable,
intent(out) :: dest(:)
434 integer,
intent(in) :: n_batches
435 logical,
optional,
intent(in) :: pack
437 logical,
optional,
intent(in) :: copy_data
444 if (.not.
allocated(dest))
then
445 safe_allocate_type_array(
batch_t, dest, (1:n_batches))
447 message(1) =
"Internal error: destination batch in batch_clone_to_array has been previously allocated."
452 call this%copy_to(dest(ib), pack, copy_data)
463 subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special)
464 class(
batch_t),
intent(in) :: this
465 class(
batch_t),
intent(out) :: dest
466 logical,
optional,
intent(in) :: pack
468 logical,
optional,
intent(in) :: copy_data
470 integer,
optional,
intent(in) :: new_np
471 logical,
optional,
intent(in) :: special
474 logical :: host_packed, special_
481 host_packed = this%host_buffer_count > 0
485 if (
present(special))
then
486 special_ = this%special_memory
488 special_ = this%special_memory .and. .not. this%device_buffer_count > 0
492 call dbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
494 call zbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
496 message(1) =
"Internal error: unknown batch type in batch_copy_to."
500 if (this%status() /= dest%status() .and.
optional_default(pack, this%is_packed()))
call dest%do_pack(copy = .false.)
502 dest%ist_idim_index(1:this%nst_linear, 1:this%ndims) = this%ist_idim_index(1:this%nst_linear, 1:this%ndims)
503 dest%ist(1:this%nst) = this%ist(1:this%nst)
506 assert(np_ == this%np)
507 call this%copy_data_to(min(this%np, np_), dest)
518 type(
type_t)
pure function batch_type(this) result(btype)
519 class(
batch_t),
intent(in) :: this
527 integer pure function batch_type_as_integer(this) result(itype)
528 class(
batch_t),
intent(in) :: this
530 type(type_t) :: btype
534 if (btype == type_float) itype = 1
535 if (btype == type_cmplx) itype = 2
544 integer pure function batch_status(this) result(bstatus)
545 class(
batch_t),
intent(in) :: this
547 bstatus = this%status_of
552 logical pure function batch_is_packed(this) result(in_buffer)
553 class(
batch_t),
intent(in) :: this
555 in_buffer = (this%device_buffer_count > 0) .or. (this%host_buffer_count > 0)
561 class(
batch_t),
intent(inout) :: this
564 if (accel_is_enabled())
size = accel_padded_size(size)
565 size = size*pad_pow2(this%nst_linear)*types_get_size(this%type())
577 class(
batch_t),
intent(inout) :: this
578 logical,
optional,
intent(in) :: copy
579 logical,
optional,
intent(in) :: async
584 integer :: source, target
588 call profiling_in(
"BATCH_DO_PACK")
590 copy_ = optional_default(copy, .
true.)
592 async_ = optional_default(async, .false.)
595 source = this%status()
598 if (accel_is_enabled())
then
608 if (source /=
target)
then
611 call this%allocate_packed_device()
625 call this%allocate_packed_host()
630 if (this%type() == type_float)
then
632 else if (this%type() == type_cmplx)
then
636 if (this%own_memory)
call this%deallocate_unpacked_host()
642 this%device_buffer_count = this%device_buffer_count + 1
644 this%host_buffer_count = this%host_buffer_count + 1
647 call profiling_out(
"BATCH_DO_PACK")
656 class(
batch_t),
intent(inout) :: this
657 logical,
optional,
intent(in) :: copy
658 logical,
optional,
intent(in) :: force
659 logical,
optional,
intent(in) :: async
662 logical :: copy_, force_, async_
663 integer :: source, target
667 call profiling_in(
"BATCH_DO_UNPACK")
669 copy_ = optional_default(copy, .
true.)
671 force_ = optional_default(force, .false.)
673 async_ = optional_default(async, .false.)
676 source = this%status()
683 target = this%status_host
687 if (source /=
target)
then
690 if (this%host_buffer_count == 1 .or. force_)
then
691 if (this%own_memory)
call this%allocate_unpacked_host()
693 if (copy_ .or. this%own_memory)
then
694 if (this%type() == type_float)
then
696 else if (this%type() == type_cmplx)
then
700 call this%deallocate_packed_host()
701 this%status_host =
target
702 this%status_of =
target
703 this%host_buffer_count = 1
705 this%host_buffer_count = this%host_buffer_count - 1
707 if (this%device_buffer_count == 1 .or. force_)
then
719 this%needs_finish_unpack = .
true.
721 call this%deallocate_packed_device()
723 this%status_of =
target
724 this%device_buffer_count = 1
726 this%device_buffer_count = this%device_buffer_count - 1
730 call profiling_out(
"BATCH_DO_UNPACK")
738 class(
batch_t),
intent(inout) :: this
741 if (this%needs_finish_unpack)
then
743 call this%deallocate_packed_device()
744 this%needs_finish_unpack = .false.
752 class(
batch_t),
intent(inout) :: this
755 integer(int64) :: unroll
756 type(accel_mem_t) :: tmp
757 type(accel_kernel_t),
pointer :: kernel
761 call profiling_in(
"BATCH_WRT_UNPACK_ACCEL")
762 if (this%nst_linear == 1)
then
764 if (this%type() == type_float)
then
765 call accel_write_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
766 else if (this%type() == type_cmplx)
then
767 call accel_write_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
775 if (this%type() == type_float)
then
783 call accel_create_buffer(tmp, accel_mem_read_only, this%type(), unroll*this%pack_size(2))
785 do ist = 1, this%nst_linear, int(unroll, int32)
788 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
790 if (this%type() == type_float)
then
791 call accel_write_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
792 offset = (ist2 - ist)*this%pack_size(2))
794 call accel_write_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
795 offset = (ist2 - ist)*this%pack_size(2))
800 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
801 call accel_set_kernel_arg(kernel, 1, int(this%pack_size(2), int32))
802 call accel_set_kernel_arg(kernel, 2, ist - 1)
803 call accel_set_kernel_arg(kernel, 3, tmp)
804 call accel_set_kernel_arg(kernel, 4, this%ff_device)
806 call profiling_in(
"CL_PACK")
807 call accel_kernel_run(kernel, (/this%pack_size(2), unroll/), (/accel_max_workgroup_size()/unroll, unroll/))
809 if (this%type() == type_float)
then
810 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
812 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
816 call profiling_out(
"CL_PACK")
820 call accel_release_buffer(tmp)
824 call profiling_out(
"BATCH_WRT_UNPACK_ACCEL")
831 class(
batch_t),
intent(inout) :: this
834 integer(int64) :: unroll
835 type(accel_mem_t) :: tmp
836 type(accel_kernel_t),
pointer :: kernel
839 call profiling_in(
"BATCH_READ_UNPACKED_ACCEL")
841 if (this%nst_linear == 1)
then
843 if (this%type() == type_float)
then
844 call accel_read_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
846 call accel_read_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
853 call accel_create_buffer(tmp, accel_mem_write_only, this%type(), unroll*this%pack_size(2))
855 if (this%type() == type_float)
then
861 do ist = 1, this%nst_linear, int(unroll, int32)
862 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
863 call accel_set_kernel_arg(kernel, 1, int(this%pack_size(2), int32))
864 call accel_set_kernel_arg(kernel, 2, ist - 1)
865 call accel_set_kernel_arg(kernel, 3, this%ff_device)
866 call accel_set_kernel_arg(kernel, 4, tmp)
868 call profiling_in(
"CL_UNPACK")
869 call accel_kernel_run(kernel, (/unroll, this%pack_size(2)/), (/unroll, accel_max_workgroup_size()/unroll/))
871 if (this%type() == type_float)
then
872 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
874 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
878 call profiling_out(
"CL_UNPACK")
881 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
883 if (this%type() == type_float)
then
884 call accel_read_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
885 offset = (ist2 - ist)*this%pack_size(2))
887 call accel_read_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
888 offset = (ist2 - ist)*this%pack_size(2))
894 call accel_release_buffer(tmp)
897 call profiling_out(
"BATCH_READ_UNPACKED_ACCEL")
903 class(
batch_t),
intent(inout) :: this
904 logical,
optional,
intent(in) :: async
909 call profiling_in(
"BATCH_WRITE_PACKED_ACCEL")
910 if (this%type() == type_float)
then
911 call accel_write_buffer(this%ff_device, product(this%pack_size), this%dff_pack, async=async)
913 call accel_write_buffer(this%ff_device, product(this%pack_size), this%zff_pack, async=async)
915 call profiling_out(
"BATCH_WRITE_PACKED_ACCEL")
922 class(
batch_t),
intent(inout) :: this
923 logical,
optional,
intent(in) :: async
928 call profiling_in(
"BATCH_READ_PACKED_ACCEL")
929 if (this%type() == type_float)
then
930 call accel_read_buffer(this%ff_device, product(this%pack_size), this%dff_pack, async=async)
932 call accel_read_buffer(this%ff_device, product(this%pack_size), this%zff_pack, async=async)
934 call profiling_out(
"BATCH_READ_PACKED_ACCEL")
945 class(
batch_t),
intent(in) :: this
946 integer,
intent(in) :: cind(:)
948 do index = 1, this%nst_linear
949 if (all(cind(1:this%ndims) == this%ist_idim_index(index, 1:this%ndims)))
exit
952 assert(index <= this%nst_linear)
961 integer pure function batch_ist_idim_to_linear(this, cind) result(index)
962 class(
batch_t),
intent(in) :: this
963 integer,
intent(in) :: cind(:)
965 if (ubound(cind, dim = 1) == 1)
then
968 index = (cind(1) - 1)*this%dim + cind(2)
979 integer pure function batch_linear_to_ist(this, linear_index) result(ist)
980 class(
batch_t),
intent(in) :: this
981 integer,
intent(in) :: linear_index
983 ist = this%ist_idim_index(linear_index, 1)
990 integer pure function batch_linear_to_idim(this, linear_index) result(idim)
991 class(
batch_t),
intent(in) :: this
992 integer,
intent(in) :: linear_index
994 idim = this%ist_idim_index(linear_index, 2)
1009 class(
batch_t),
intent(inout) :: this
1010 type(mpi_grp_t),
intent(in) :: mpi_grp
1011 type(mpi_win),
intent(out) :: rma_win
1015 assert(.not. accel_is_enabled())
1017 if (mpi_grp%size > 1)
then
1020 if (this%type() == type_cmplx)
then
1022 call mpi_win_create(this%zff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1023 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win, mpi_err)
1025 else if (this%type() == type_float)
then
1027 call mpi_win_create(this%dff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1028 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win, mpi_err)
1031 message(1) =
"Internal error: unknown batch type in batch_remote_access_start."
1032 call messages_fatal(1)
1036 rma_win = mpi_win_null
1050 class(
batch_t),
intent(inout) :: this
1051 type(mpi_win),
intent(inout) :: rma_win
1055 if (rma_win /= mpi_win_null)
then
1057 call mpi_win_free(rma_win, mpi_err)
1059 call this%do_unpack()
1069 class(
batch_t),
intent(in) :: this
1070 integer,
intent(in) :: np
1071 class(
batch_t),
intent(inout) :: dest
1072 logical,
optional,
intent(in) :: async
1074 integer(int64) :: localsize, dim2, dim3
1078 call profiling_in(
"BATCH_COPY_DATA_TO")
1080 call this%check_compatibility_with(dest)
1082 select case (this%status())
1084 call accel_set_kernel_arg(kernel_copy, 0, np)
1085 call accel_set_kernel_arg(kernel_copy, 1, this%ff_device)
1086 call accel_set_kernel_arg(kernel_copy, 2,
log2(int(this%pack_size_real(1), int32)))
1087 call accel_set_kernel_arg(kernel_copy, 3, dest%ff_device)
1088 call accel_set_kernel_arg(kernel_copy, 4,
log2(int(dest%pack_size_real(1), int32)))
1090 localsize = accel_kernel_workgroup_size(kernel_copy)/dest%pack_size_real(1)
1092 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1093 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1095 call accel_kernel_run(kernel_copy, (/dest%pack_size_real(1), dim2, dim3/), (/dest%pack_size_real(1), localsize, 1_int64/))
1097 if(.not. optional_default(async, .false.))
call accel_finish()
1100 if (np*this%pack_size(1) > huge(0_int32))
then
1103 if (dest%type() == type_float)
then
1104 call blas_copy(int(this%pack_size(1), int32), this%dff_pack(1, ip), 1, dest%dff_pack(1, ip), 1)
1106 call blas_copy(int(this%pack_size(1), int32), this%zff_pack(1, ip), 1, dest%zff_pack(1, ip), 1)
1110 if (dest%type() == type_float)
then
1111 call blas_copy(int(this%pack_size(1)*np, int32), this%dff_pack(1, 1), 1, dest%dff_pack(1, 1), 1)
1113 call blas_copy(int(this%pack_size(1)*np, int32), this%zff_pack(1, 1), 1, dest%zff_pack(1, 1), 1)
1118 do ist = 1, dest%nst_linear
1119 if (dest%type() == type_cmplx)
then
1120 call blas_copy(np, this%zff_linear(1, ist), 1, dest%zff_linear(1, ist), 1)
1122 call blas_copy(np, this%dff_linear(1, ist), 1, dest%dff_linear(1, ist), 1)
1128 call profiling_out(
"BATCH_COPY_DATA_TO")
1136 class(
batch_t),
intent(in) :: this
1137 class(
batch_t),
intent(in) :: target
1138 logical,
optional,
intent(in) :: only_check_dim
1142 assert(this%type() ==
target%type())
1143 if (.not. optional_default(only_check_dim, .false.))
then
1144 assert(this%nst_linear ==
target%nst_linear)
1146 assert(this%status() ==
target%status())
1147 assert(this%dim ==
target%dim)
1157 class(
batch_t),
intent(inout) :: this
1158 integer,
intent(in) :: st_start
1159 integer,
intent(in) :: st_end
1161 integer :: idim, ii, ist
1165 do ist = st_start, st_end
1167 do idim = 1, this%dim
1168 ii = this%dim*(ist - st_start) + idim
1169 this%ist_idim_index(ii, 1) = ist
1170 this%ist_idim_index(ii, 2) = idim
1172 this%ist(ist - st_start + 1) = ist
1176 this%pack_size(1) = pad_pow2(this%nst_linear)
1177 this%pack_size(2) = this%np
1178 if (accel_is_enabled()) this%pack_size(2) = accel_padded_size(this%pack_size(2))
1180 this%pack_size_real = this%pack_size
1181 if (type_is_complex(this%type())) this%pack_size_real(1) = 2*this%pack_size_real(1)
1188#include "batch_inc.F90"
1191#include "complex.F90"
1192#include "batch_inc.F90"
initialize a batch with existing memory
double log2(double __x) __attribute__((__nothrow__
integer, parameter, public accel_mem_read_write
subroutine, public accel_release_buffer(this)
This module contains interfaces for routines in allocate_hardware_aware.c.
subroutine, public deallocate_hardware_aware(array, size)
This module implements batches of mesh functions.
subroutine batch_check_compatibility_with(this, target, only_check_dim)
check whether two batches have compatible dimensions (and type)
type(type_t) pure function batch_type(this)
return the type of a batch
subroutine zbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
subroutine zbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_CMPLX valued mesh functions psi.
integer, parameter cl_pack_max_buffer_size
this value controls the size (in number of wave-functions) of the buffer used to copy states to the o...
logical pure function batch_is_packed(this)
subroutine dbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
subroutine dbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_write_unpacked_to_device(this)
subroutine batch_do_unpack(this, copy, force, async)
unpack a batch
subroutine batch_finish_unpack(this)
finish the unpacking if do_unpack() was called with async=.true.
subroutine zbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_CMPLX
subroutine batch_deallocate_packed_device(this)
release packed device memory
integer pure function batch_type_as_integer(this)
For debuging purpose only.
integer function batch_inv_index(this, cind)
inverse index lookup
subroutine dbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_packed_host(this)
allocate host (CPU) memory for packed data
subroutine, public zbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_CMPLX valued batch to given size without providing external memory
subroutine zbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_CMPLX
subroutine batch_remote_access_stop(this, rma_win)
stop the remote access to the batch
subroutine batch_read_device_to_unpacked(this)
subroutine zbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_CMPLX valued mesh functions psi.
subroutine dbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_FLOAT
subroutine batch_allocate_packed_device(this)
allocate device (GPU) memory for packed data
subroutine batch_build_indices(this, st_start, st_end)
build the index ist(:) and ist_idim_index(:,:) and set pack_size
subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special)
make a copy of a batch
integer pure function batch_ist_idim_to_linear(this, cind)
direct index lookup
integer pure function batch_linear_to_ist(this, linear_index)
get state index ist from linear (combined dim and nst) index
subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data)
subroutine, public batch_read_device_to_packed(this, async)
subroutine batch_write_packed_to_device(this, async)
subroutine dbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data
subroutine batch_init_empty(this, dim, nst, np)
initialize an empty batch
subroutine, public dbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_FLOAT valued batch to given size without providing external memory
subroutine batch_clone_to(this, dest, pack, copy_data, new_np)
clone a batch to a new batch
subroutine zbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_CMPLX valued mesh functions psi.
integer pure function batch_linear_to_idim(this, linear_index)
extract idim from linear index
subroutine batch_remote_access_start(this, mpi_grp, rma_win)
start remote access to a batch on another node
subroutine batch_copy_data_to(this, np, dest, async)
copy data to another batch.
subroutine batch_do_pack(this, copy, async)
pack the data in a batch
subroutine dbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_FLOAT
subroutine dbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer pure function batch_status(this)
return the status of a batch
subroutine batch_deallocate_unpacked_host(this)
release unpacked host memory
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
subroutine batch_deallocate_packed_host(this)
release packed host memory
integer(int64) function batch_pack_total_size(this)
subroutine batch_end(this, copy)
finalize a batch and release allocated memory, if necessary
subroutine zbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
This module contains interfaces for BLAS routines You should not use these routines directly....
This module is intended to contain "only mathematical" functions and procedures.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
type(type_t), public type_float
type(type_t), public type_cmplx
type(type_t), public type_none
Class defining batches of mesh functions.