68 integer,
public :: nst
69 integer,
public :: dim
72 integer,
allocatable :: ist_idim_index(:, :)
77 integer,
allocatable,
public :: ist(:)
84 logical :: is_allocated
87 integer,
public :: nst_linear
93 integer :: status_host
102 type(type_t) :: type_of
103 integer :: device_buffer_count
104 integer :: host_buffer_count
105 logical :: special_memory
106 logical :: needs_finish_unpack
110 real(real64),
pointer,
contiguous,
public :: dff(:, :, :)
112 complex(real64),
pointer,
contiguous,
public :: zff(:, :, :)
114 real(real64),
pointer,
contiguous,
public :: dff_linear(:, :)
116 complex(real64),
pointer,
contiguous,
public :: zff_linear(:, :)
120 real(real64),
pointer,
contiguous,
public :: dff_pack(:, :)
122 complex(real64),
pointer,
contiguous,
public :: zff_pack(:, :)
125 integer(int64),
public :: pack_size(1:2)
127 integer(int64),
public :: pack_size_real(1:2)
131 type(accel_mem_t),
public :: ff_device
141 generic :: do_pack => do_pack_generic, do_pack_target
191 integer,
public,
parameter :: &
192 batch_not_packed = 0, & !< functions are stored in CPU memory, unpacked order
210 class(
batch_t),
intent(inout) :: this
211 logical,
optional,
intent(in) :: copy
216 if (this%own_memory .and. this%is_packed())
then
219 call this%deallocate_packed_device()
222 call this%deallocate_packed_host()
226 this%host_buffer_count = 0
227 this%device_buffer_count = 0
232 if (this%is_allocated)
then
233 call this%deallocate_unpacked_host()
236 safe_deallocate_a(this%ist_idim_index)
237 safe_deallocate_a(this%ist)
252 this%is_allocated = .false.
254 if (this%special_memory)
then
255 if (
associated(this%dff))
then
258 if (
associated(this%zff))
then
262 safe_deallocate_p(this%dff)
263 safe_deallocate_p(this%zff)
266 nullify(this%dff_linear)
268 nullify(this%zff_linear)
279 class(batch_t),
intent(inout) :: this
283 if (this%special_memory)
then
284 if (
associated(this%dff_pack))
then
287 if (
associated(this%zff_pack))
then
291 safe_deallocate_p(this%dff_pack)
292 safe_deallocate_p(this%zff_pack)
294 nullify(this%dff_pack)
295 nullify(this%zff_pack)
319 class(
batch_t),
intent(inout) :: this
324 call this%dallocate_unpacked_host()
326 call this%zallocate_unpacked_host()
338 class(
batch_t),
intent(inout) :: this
343 call this%dallocate_packed_host()
345 call this%zallocate_packed_host()
357 class(
batch_t),
intent(inout) :: this
362 product(this%pack_size))
375 type(
batch_t),
intent(out) :: this
376 integer,
intent(in) :: dim
377 integer,
intent(in) :: nst
378 integer,
intent(in) :: np
382 this%is_allocated = .false.
383 this%own_memory = .false.
384 this%special_memory = .false.
385 this%needs_finish_unpack = .false.
390 this%nst_linear = nst*dim
393 this%device_buffer_count = 0
394 this%host_buffer_count = 0
399 safe_allocate(this%ist_idim_index(1:this%nst_linear, 1:this%ndims))
400 safe_allocate(this%ist(1:this%nst))
402 nullify(this%dff, this%zff, this%dff_linear, this%zff_linear)
403 nullify(this%dff_pack, this%zff_pack)
414 subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
415 class(
batch_t),
intent(in) :: this
416 class(
batch_t),
allocatable,
intent(out) :: dest
417 logical,
optional,
intent(in) :: pack
419 logical,
optional,
intent(in) :: copy_data
421 integer,
optional,
intent(in) :: new_np
422 logical,
optional,
intent(in) :: special
424 type(
type_t),
optional,
intent(in) :: dest_type
428 if (.not.
allocated(dest))
then
429 safe_allocate_type(
batch_t, dest)
431 message(1) =
"Internal error: destination batch in batch_clone_to has been previously allocated."
435 call this%copy_to(dest, pack, copy_data, new_np, special, dest_type)
442 subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
443 class(
batch_t),
intent(in) :: this
444 class(
batch_t),
allocatable,
intent(out) :: dest(:)
445 integer,
intent(in) :: n_batches
446 logical,
optional,
intent(in) :: pack
448 logical,
optional,
intent(in) :: copy_data
450 integer,
optional,
intent(in) :: new_np
451 logical,
optional,
intent(in) :: special
453 type(
type_t),
optional,
intent(in) :: dest_type
459 if (.not.
allocated(dest))
then
460 safe_allocate_type_array(
batch_t, dest, (1:n_batches))
462 message(1) =
"Internal error: destination batch in batch_clone_to_array has been previously allocated."
467 call this%copy_to(dest(ib), pack, copy_data, new_np, special, dest_type)
478 subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
479 class(
batch_t),
intent(in) :: this
480 class(
batch_t),
intent(out) :: dest
481 logical,
optional,
intent(in) :: pack
483 logical,
optional,
intent(in) :: copy_data
485 integer,
optional,
intent(in) :: new_np
486 logical,
optional,
intent(in) :: special
488 type(
type_t),
optional,
intent(in) :: dest_type
490 logical :: host_packed, special_
498 host_packed = this%host_buffer_count > 0
502 if (
present(special))
then
503 special_ = this%special_memory
505 special_ = this%special_memory .and. .not. this%device_buffer_count > 0
508 if (
present(dest_type))
then
515 call dbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
517 call zbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
519 message(1) =
"Internal error: unknown batch type in batch_copy_to."
523 if (this%status() /= dest%status() .and.
optional_default(pack, this%is_packed()))
call dest%do_pack(copy = .false.)
525 dest%ist_idim_index(1:this%nst_linear, 1:this%ndims) = this%ist_idim_index(1:this%nst_linear, 1:this%ndims)
526 dest%ist(1:this%nst) = this%ist(1:this%nst)
529 assert(np_ == this%np)
530 call this%copy_data_to(min(this%np, np_), dest)
541 type(
type_t)
pure function batch_type(this) result(btype)
542 class(
batch_t),
intent(in) :: this
550 integer pure function batch_type_as_integer(this) result(itype)
551 class(
batch_t),
intent(in) :: this
553 type(type_t) :: btype
557 if (btype == type_float) itype = 1
558 if (btype == type_cmplx) itype = 2
567 integer pure function batch_status(this) result(bstatus)
568 class(
batch_t),
intent(in) :: this
570 bstatus = this%status_of
575 logical pure function batch_is_packed(this) result(in_buffer)
576 class(
batch_t),
intent(in) :: this
578 in_buffer = (this%device_buffer_count > 0) .or. (this%host_buffer_count > 0)
584 class(
batch_t),
intent(inout) :: this
587 if (accel_is_enabled())
size = accel_padded_size(size)
588 size = size*pad_pow2(this%nst_linear)*types_get_size(this%type())
600 class(
batch_t),
intent(inout) :: this
601 logical,
optional,
intent(in) :: copy
602 logical,
optional,
intent(in) :: async
605 integer :: source, target
610 source = this%status()
613 if (accel_is_enabled())
then
622 call this%do_pack(
target, copy, async)
631 class(
batch_t),
intent(inout) :: this
632 integer,
intent(in) ::
target
633 logical,
optional,
intent(in) :: copy
634 logical,
optional,
intent(in) :: async
636 logical,
optional,
intent(in) :: cpu_only
644 call profiling_in(
"BATCH_DO_PACK")
646 copy_ = optional_default(copy, .
true.)
648 async_ = optional_default(async, .false.)
651 source = this%status()
654 if (source /=
target)
then
657 call this%allocate_packed_device()
671 call this%allocate_packed_host()
676 if (this%type() == type_float)
then
678 else if (this%type() == type_cmplx)
then
682 if (this%own_memory)
call this%deallocate_unpacked_host()
684 call messages_not_implemented(
"Error: batch_do_pack called with BATCH_NOT_PACKED as target.")
690 this%device_buffer_count = this%device_buffer_count + 1
692 this%host_buffer_count = this%host_buffer_count + 1
695 call profiling_out(
"BATCH_DO_PACK")
705 class(
batch_t),
intent(inout) :: this
706 logical,
optional,
intent(in) :: copy
707 logical,
optional,
intent(in) :: force
708 logical,
optional,
intent(in) :: async
711 logical :: copy_, force_, async_
712 integer :: source, target
716 call profiling_in(
"BATCH_DO_UNPACK")
718 copy_ = optional_default(copy, .
true.)
720 force_ = optional_default(force, .false.)
722 async_ = optional_default(async, .false.)
725 source = this%status()
732 target = this%status_host
736 if (source /=
target)
then
739 if (this%host_buffer_count == 1 .or. force_)
then
740 if (this%own_memory)
call this%allocate_unpacked_host()
742 if (copy_ .or. this%own_memory)
then
743 if (this%type() == type_float)
then
745 else if (this%type() == type_cmplx)
then
749 call this%deallocate_packed_host()
750 this%status_host =
target
751 this%status_of =
target
752 this%host_buffer_count = 1
754 this%host_buffer_count = this%host_buffer_count - 1
756 if (this%device_buffer_count == 1 .or. force_)
then
768 this%needs_finish_unpack = .
true.
770 call this%deallocate_packed_device()
772 this%status_of =
target
773 this%device_buffer_count = 1
775 this%device_buffer_count = this%device_buffer_count - 1
779 call profiling_out(
"BATCH_DO_UNPACK")
787 class(
batch_t),
intent(inout) :: this
790 if (this%needs_finish_unpack)
then
792 call this%deallocate_packed_device()
793 this%needs_finish_unpack = .false.
801 class(
batch_t),
intent(inout) :: this
804 integer(int64) :: unroll
805 type(accel_mem_t) :: tmp
806 type(accel_kernel_t),
pointer :: kernel
810 call profiling_in(
"BATCH_WRT_UNPACK_ACCEL")
811 if (this%nst_linear == 1)
then
813 if (this%type() == type_float)
then
814 call accel_write_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
815 else if (this%type() == type_cmplx)
then
816 call accel_write_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
824 if (this%type() == type_float)
then
832 call accel_create_buffer(tmp, accel_mem_read_only, this%type(), unroll*this%pack_size(2))
834 do ist = 1, this%nst_linear, int(unroll, int32)
837 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
839 if (this%type() == type_float)
then
840 call accel_write_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
841 offset = (ist2 - ist)*this%pack_size(2))
843 call accel_write_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
844 offset = (ist2 - ist)*this%pack_size(2))
849 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
850 call accel_set_kernel_arg(kernel, 1, int(this%pack_size(2), int32))
851 call accel_set_kernel_arg(kernel, 2, ist - 1)
852 call accel_set_kernel_arg(kernel, 3, tmp)
853 call accel_set_kernel_arg(kernel, 4, this%ff_device)
855 call profiling_in(
"CL_PACK")
856 call accel_kernel_run(kernel, (/this%pack_size(2), unroll/), (/accel_max_workgroup_size()/unroll, unroll/))
858 if (this%type() == type_float)
then
859 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
861 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
865 call profiling_out(
"CL_PACK")
869 call accel_release_buffer(tmp)
873 call profiling_out(
"BATCH_WRT_UNPACK_ACCEL")
880 class(
batch_t),
intent(inout) :: this
883 integer(int64) :: unroll
884 type(accel_mem_t) :: tmp
885 type(accel_kernel_t),
pointer :: kernel
888 call profiling_in(
"BATCH_READ_UNPACKED_ACCEL")
890 if (this%nst_linear == 1)
then
892 if (this%type() == type_float)
then
893 call accel_read_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
895 call accel_read_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
902 call accel_create_buffer(tmp, accel_mem_write_only, this%type(), unroll*this%pack_size(2))
904 if (this%type() == type_float)
then
910 do ist = 1, this%nst_linear, int(unroll, int32)
911 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
912 call accel_set_kernel_arg(kernel, 1, int(this%pack_size(2), int32))
913 call accel_set_kernel_arg(kernel, 2, ist - 1)
914 call accel_set_kernel_arg(kernel, 3, this%ff_device)
915 call accel_set_kernel_arg(kernel, 4, tmp)
917 call profiling_in(
"CL_UNPACK")
918 call accel_kernel_run(kernel, (/unroll, this%pack_size(2)/), (/unroll, accel_max_workgroup_size()/unroll/))
920 if (this%type() == type_float)
then
921 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
923 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
927 call profiling_out(
"CL_UNPACK")
930 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
932 if (this%type() == type_float)
then
933 call accel_read_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
934 offset = (ist2 - ist)*this%pack_size(2))
936 call accel_read_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
937 offset = (ist2 - ist)*this%pack_size(2))
943 call accel_release_buffer(tmp)
946 call profiling_out(
"BATCH_READ_UNPACKED_ACCEL")
952 class(
batch_t),
intent(inout) :: this
953 logical,
optional,
intent(in) :: async
958 call profiling_in(
"BATCH_WRITE_PACKED_ACCEL")
959 if (this%type() == type_float)
then
960 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
962 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
964 call profiling_out(
"BATCH_WRITE_PACKED_ACCEL")
971 class(
batch_t),
intent(inout) :: this
972 logical,
optional,
intent(in) :: async
977 call profiling_in(
"BATCH_READ_PACKED_ACCEL")
978 if (this%type() == type_float)
then
979 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
981 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
983 call profiling_out(
"BATCH_READ_PACKED_ACCEL")
994 class(
batch_t),
intent(in) :: this
995 integer,
intent(in) :: cind(:)
997 do index = 1, this%nst_linear
998 if (all(cind(1:this%ndims) == this%ist_idim_index(index, 1:this%ndims)))
exit
1001 assert(index <= this%nst_linear)
1010 integer pure function batch_ist_idim_to_linear(this, cind) result(index)
1011 class(
batch_t),
intent(in) :: this
1012 integer,
intent(in) :: cind(:)
1014 if (ubound(cind, dim = 1) == 1)
then
1017 index = (cind(1) - 1)*this%dim + cind(2)
1028 integer pure function batch_linear_to_ist(this, linear_index) result(ist)
1029 class(
batch_t),
intent(in) :: this
1030 integer,
intent(in) :: linear_index
1032 ist = this%ist_idim_index(linear_index, 1)
1039 integer pure function batch_linear_to_idim(this, linear_index) result(idim)
1040 class(
batch_t),
intent(in) :: this
1041 integer,
intent(in) :: linear_index
1043 idim = this%ist_idim_index(linear_index, 2)
1058 class(
batch_t),
intent(inout) :: this
1059 type(mpi_grp_t),
intent(in) :: mpi_grp
1060 type(mpi_win),
intent(out) :: rma_win
1064 if (mpi_grp%size > 1)
then
1066 assert(.not. accel_is_enabled())
1070 if (this%type() == type_cmplx)
then
1072 call mpi_win_create(this%zff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1073 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1075 else if (this%type() == type_float)
then
1077 call mpi_win_create(this%dff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1078 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1081 message(1) =
"Internal error: unknown batch type in batch_remote_access_start."
1082 call messages_fatal(1)
1086 rma_win = mpi_win_null
1100 class(
batch_t),
intent(inout) :: this
1101 type(mpi_win),
intent(inout) :: rma_win
1105 if (rma_win /= mpi_win_null)
then
1107 call mpi_win_free(rma_win)
1109 call this%do_unpack()
1119 class(
batch_t),
intent(in) :: this
1120 integer,
intent(in) :: np
1121 class(
batch_t),
intent(inout) :: dest
1122 logical,
optional,
intent(in) :: async
1124 integer(int64) :: localsize, dim2, dim3
1128 call profiling_in(
"BATCH_COPY_DATA_TO")
1131 call this%check_compatibility_with(dest, type_check=.false.)
1133 if (this%type() == dest%type())
then
1134 select case (this%status())
1136 call accel_set_kernel_arg(kernel_copy, 0, np)
1137 call accel_set_kernel_arg(kernel_copy, 1, this%ff_device)
1138 call accel_set_kernel_arg(kernel_copy, 2,
log2(int(this%pack_size_real(1), int32)))
1139 call accel_set_kernel_arg(kernel_copy, 3, dest%ff_device)
1140 call accel_set_kernel_arg(kernel_copy, 4,
log2(int(dest%pack_size_real(1), int32)))
1142 localsize = accel_kernel_workgroup_size(kernel_copy)/dest%pack_size_real(1)
1144 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1145 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1147 call accel_kernel_run(kernel_copy, (/dest%pack_size_real(1), dim2, dim3/), (/dest%pack_size_real(1), localsize, 1_int64/))
1149 if(.not. optional_default(async, .false.))
call accel_finish()
1152 if (np*this%pack_size(1) > huge(0_int32))
then
1155 if (dest%type() == type_float)
then
1156 call blas_copy(int(this%pack_size(1), int32), this%dff_pack(1, ip), 1, dest%dff_pack(1, ip), 1)
1158 call blas_copy(int(this%pack_size(1), int32), this%zff_pack(1, ip), 1, dest%zff_pack(1, ip), 1)
1162 if (dest%type() == type_float)
then
1163 call blas_copy(int(this%pack_size(1)*np, int32), this%dff_pack(1, 1), 1, dest%dff_pack(1, 1), 1)
1165 call blas_copy(int(this%pack_size(1)*np, int32), this%zff_pack(1, 1), 1, dest%zff_pack(1, 1), 1)
1170 do ist = 1, dest%nst_linear
1171 if (dest%type() == type_cmplx)
then
1172 call blas_copy(np, this%zff_linear(1, ist), 1, dest%zff_linear(1, ist), 1)
1174 call blas_copy(np, this%dff_linear(1, ist), 1, dest%dff_linear(1, ist), 1)
1179 else if (this%type() == type_cmplx)
then
1181 select case (this%status())
1183 call accel_set_kernel_arg(kernel_copy_complex_to_real, 0, np)
1184 call accel_set_kernel_arg(kernel_copy_complex_to_real, 1, this%ff_device)
1185 call accel_set_kernel_arg(kernel_copy_complex_to_real, 2,
log2(int(this%pack_size_real(1), int32)))
1186 call accel_set_kernel_arg(kernel_copy_complex_to_real, 3, dest%ff_device)
1187 call accel_set_kernel_arg(kernel_copy_complex_to_real, 4,
log2(int(dest%pack_size_real(1), int32)))
1189 localsize = accel_kernel_workgroup_size(kernel_copy_complex_to_real)/dest%pack_size_real(1)
1191 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1192 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1194 call accel_kernel_run(kernel_copy_complex_to_real, &
1195 (/dest%pack_size_real(1), dim2, dim3/), (/dest%pack_size_real(1), localsize, 1_int64/))
1197 if(.not. optional_default(async, .false.))
call accel_finish()
1203 do ist = 1, dest%nst_linear
1204 dest%dff_pack(ist, ip) = real(this%zff_pack(ist, ip), real64)
1210 do ist = 1, dest%nst_linear
1213 dest%dff_linear(ip, ist) = real(this%zff_linear(ip, ist), real64)
1219 else if (this%type() == type_float)
then
1221 select case (this%status())
1223 call accel_set_kernel_arg(kernel_copy_real_to_complex, 0, np)
1224 call accel_set_kernel_arg(kernel_copy_real_to_complex, 1, this%ff_device)
1225 call accel_set_kernel_arg(kernel_copy_real_to_complex, 2,
log2(int(this%pack_size_real(1), int32)))
1226 call accel_set_kernel_arg(kernel_copy_real_to_complex, 3, dest%ff_device)
1227 call accel_set_kernel_arg(kernel_copy_real_to_complex, 4,
log2(int(dest%pack_size_real(1), int32)))
1229 localsize = accel_kernel_workgroup_size(kernel_copy_real_to_complex)/this%pack_size_real(1)
1231 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1232 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1234 call accel_kernel_run(kernel_copy_real_to_complex, &
1235 (/this%pack_size_real(1), dim2, dim3/), (/this%pack_size_real(1), localsize, 1_int64/))
1237 if(.not. optional_default(async, .false.))
call accel_finish()
1243 do ist = 1, dest%nst_linear
1244 dest%zff_pack(ist, ip) = cmplx(this%dff_pack(ist, ip), m_zero, real64)
1250 do ist = 1, dest%nst_linear
1253 dest%zff_linear(ip, ist) = cmplx(this%dff_linear(ip, ist), m_zero, real64)
1260 message(1) =
"Error! This should not happen."
1261 call messages_fatal(1)
1264 call profiling_out(
"BATCH_COPY_DATA_TO")
1272 class(
batch_t),
intent(in) :: this
1273 class(
batch_t),
intent(in) :: target
1274 logical,
optional,
intent(in) :: only_check_dim
1275 logical,
optional,
intent(in) :: type_check
1279 if (optional_default(type_check, .
true.))
then
1280 assert(this%type() ==
target%type())
1282 if (.not. optional_default(only_check_dim, .false.))
then
1283 assert(this%nst_linear ==
target%nst_linear)
1285 assert(this%status() ==
target%status())
1286 assert(this%dim ==
target%dim)
1296 class(
batch_t),
intent(inout) :: this
1297 integer,
intent(in) :: st_start
1298 integer,
intent(in) :: st_end
1300 integer :: idim, ii, ist
1304 do ist = st_start, st_end
1306 do idim = 1, this%dim
1307 ii = this%dim*(ist - st_start) + idim
1308 this%ist_idim_index(ii, 1) = ist
1309 this%ist_idim_index(ii, 2) = idim
1311 this%ist(ist - st_start + 1) = ist
1315 this%pack_size(1) = pad_pow2(this%nst_linear)
1316 this%pack_size(2) = this%np
1317 if (accel_is_enabled()) this%pack_size(2) = accel_padded_size(this%pack_size(2))
1319 this%pack_size_real = this%pack_size
1320 if (type_is_complex(this%type())) this%pack_size_real(1) = 2*this%pack_size_real(1)
1327#include "batch_inc.F90"
1330#include "complex.F90"
1331#include "batch_inc.F90"
initialize a batch with existing memory
double log2(double __x) __attribute__((__nothrow__
integer, parameter, public accel_mem_read_write
subroutine, public accel_release_buffer(this, async)
This module contains interfaces for routines in allocate_hardware_aware.c.
subroutine, public deallocate_hardware_aware(array, size)
This module implements batches of mesh functions.
type(type_t) pure function batch_type(this)
return the type of a batch
subroutine zbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
subroutine zbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_CMPLX valued mesh functions psi.
subroutine batch_check_compatibility_with(this, target, only_check_dim, type_check)
check whether two batches have compatible dimensions (and type)
integer, parameter cl_pack_max_buffer_size
this value controls the size (in number of wave-functions) of the buffer used to copy states to the o...
logical pure function batch_is_packed(this)
subroutine dbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
subroutine dbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_write_unpacked_to_device(this)
subroutine batch_do_unpack(this, copy, force, async)
unpack a batch
subroutine batch_finish_unpack(this)
finish the unpacking if do_unpack() was called with async=.true.
subroutine zbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_CMPLX
subroutine batch_deallocate_packed_device(this)
release packed device memory
integer pure function batch_type_as_integer(this)
For debuging purpose only.
subroutine batch_do_pack_generic(this, copy, async)
pack the data in a batch
integer function batch_inv_index(this, cind)
inverse index lookup
subroutine dbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_packed_host(this)
allocate host (CPU) memory for packed data
subroutine, public zbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_CMPLX valued batch to given size without providing external memory
subroutine zbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_CMPLX
subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
clone a batch to a new batch
subroutine batch_remote_access_stop(this, rma_win)
stop the remote access to the batch
subroutine batch_read_device_to_unpacked(this)
subroutine zbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_CMPLX valued mesh functions psi.
subroutine dbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_FLOAT
subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
subroutine batch_allocate_packed_device(this)
allocate device (GPU) memory for packed data
subroutine batch_build_indices(this, st_start, st_end)
build the index ist(:) and ist_idim_index(:,:) and set pack_size
integer pure function batch_ist_idim_to_linear(this, cind)
direct index lookup
subroutine batch_do_pack_target(this, target, copy, async, cpu_only)
pack the data in a batch
integer pure function batch_linear_to_ist(this, linear_index)
get state index ist from linear (combined dim and nst) index
subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
make a copy of a batch
subroutine, public batch_read_device_to_packed(this, async)
subroutine batch_write_packed_to_device(this, async)
subroutine dbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data
subroutine batch_init_empty(this, dim, nst, np)
initialize an empty batch
subroutine, public dbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_FLOAT valued batch to given size without providing external memory
subroutine zbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_CMPLX valued mesh functions psi.
integer pure function batch_linear_to_idim(this, linear_index)
extract idim from linear index
subroutine batch_remote_access_start(this, mpi_grp, rma_win)
start remote access to a batch on another node
subroutine batch_copy_data_to(this, np, dest, async)
copy data to another batch.
subroutine dbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_FLOAT
subroutine dbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer pure function batch_status(this)
return the status of a batch
subroutine batch_deallocate_unpacked_host(this)
release unpacked host memory
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
subroutine batch_deallocate_packed_host(this)
release packed host memory
integer(int64) function batch_pack_total_size(this)
subroutine batch_end(this, copy)
finalize a batch and release allocated memory, if necessary
subroutine zbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
This module contains interfaces for BLAS routines You should not use these routines directly....
This module is intended to contain "only mathematical" functions and procedures.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
type(type_t), public type_float
type(type_t), public type_cmplx
type(type_t), public type_none
Class defining batches of mesh functions.