68 integer,
public :: nst
69 integer,
public :: dim
72 integer,
allocatable :: ist_idim_index(:, :)
77 integer,
allocatable,
public :: ist(:)
84 logical :: is_allocated
87 integer,
public :: nst_linear
93 integer :: status_host
102 type(type_t) :: type_of
103 integer :: device_buffer_count
104 integer :: host_buffer_count
105 logical :: special_memory
106 logical :: needs_finish_unpack
110 real(real64),
pointer,
contiguous,
public :: dff(:, :, :)
112 complex(real64),
pointer,
contiguous,
public :: zff(:, :, :)
114 real(real64),
pointer,
contiguous,
public :: dff_linear(:, :)
116 complex(real64),
pointer,
contiguous,
public :: zff_linear(:, :)
120 real(real64),
pointer,
contiguous,
public :: dff_pack(:, :)
122 complex(real64),
pointer,
contiguous,
public :: zff_pack(:, :)
125 integer(int64),
public :: pack_size(1:2)
127 integer(int64),
public :: pack_size_real(1:2)
131 type(accel_mem_t),
public :: ff_device
141 generic :: do_pack => do_pack_generic, do_pack_target
191 integer,
public,
parameter :: &
192 batch_not_packed = 0, & !< functions are stored in CPU memory, unpacked order
210 class(
batch_t),
intent(inout) :: this
211 logical,
optional,
intent(in) :: copy
216 if (this%own_memory .and. this%is_packed())
then
219 call this%deallocate_packed_device()
222 call this%deallocate_packed_host()
226 this%host_buffer_count = 0
227 this%device_buffer_count = 0
232 if (this%is_allocated)
then
233 call this%deallocate_unpacked_host()
236 safe_deallocate_a(this%ist_idim_index)
237 safe_deallocate_a(this%ist)
252 this%is_allocated = .false.
254 if (this%special_memory)
then
255 if (
associated(this%dff))
then
258 if (
associated(this%zff))
then
262 safe_deallocate_p(this%dff)
263 safe_deallocate_p(this%zff)
266 nullify(this%dff_linear)
268 nullify(this%zff_linear)
279 class(batch_t),
intent(inout) :: this
283 if (this%special_memory)
then
284 if (
associated(this%dff_pack))
then
287 if (
associated(this%zff_pack))
then
291 safe_deallocate_p(this%dff_pack)
292 safe_deallocate_p(this%zff_pack)
294 nullify(this%dff_pack)
295 nullify(this%zff_pack)
319 class(
batch_t),
intent(inout) :: this
324 call this%dallocate_unpacked_host()
326 call this%zallocate_unpacked_host()
338 class(
batch_t),
intent(inout) :: this
343 call this%dallocate_packed_host()
345 call this%zallocate_packed_host()
357 class(
batch_t),
intent(inout) :: this
362 product(this%pack_size))
375 type(
batch_t),
intent(out) :: this
376 integer,
intent(in) :: dim
377 integer,
intent(in) :: nst
378 integer,
intent(in) :: np
382 this%is_allocated = .false.
383 this%own_memory = .false.
384 this%special_memory = .false.
385 this%needs_finish_unpack = .false.
390 this%nst_linear = nst*dim
393 this%device_buffer_count = 0
394 this%host_buffer_count = 0
399 safe_allocate(this%ist_idim_index(1:this%nst_linear, 1:this%ndims))
400 safe_allocate(this%ist(1:this%nst))
402 nullify(this%dff, this%zff, this%dff_linear, this%zff_linear)
403 nullify(this%dff_pack, this%zff_pack)
414 subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
415 class(
batch_t),
intent(in) :: this
416 class(
batch_t),
allocatable,
intent(out) :: dest
417 logical,
optional,
intent(in) :: pack
419 logical,
optional,
intent(in) :: copy_data
421 integer,
optional,
intent(in) :: new_np
422 logical,
optional,
intent(in) :: special
424 type(
type_t),
optional,
intent(in) :: dest_type
428 if (.not.
allocated(dest))
then
429 safe_allocate_type(
batch_t, dest)
431 message(1) =
"Internal error: destination batch in batch_clone_to has been previously allocated."
435 call this%copy_to(dest, pack, copy_data, new_np, special, dest_type)
442 subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
443 class(
batch_t),
intent(in) :: this
444 class(
batch_t),
allocatable,
intent(out) :: dest(:)
445 integer,
intent(in) :: n_batches
446 logical,
optional,
intent(in) :: pack
448 logical,
optional,
intent(in) :: copy_data
450 integer,
optional,
intent(in) :: new_np
451 logical,
optional,
intent(in) :: special
453 type(
type_t),
optional,
intent(in) :: dest_type
459 if (.not.
allocated(dest))
then
460 safe_allocate_type_array(
batch_t, dest, (1:n_batches))
462 message(1) =
"Internal error: destination batch in batch_clone_to_array has been previously allocated."
467 call this%copy_to(dest(ib), pack, copy_data, new_np, special, dest_type)
478 subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
479 class(
batch_t),
intent(in) :: this
480 class(
batch_t),
intent(out) :: dest
481 logical,
optional,
intent(in) :: pack
483 logical,
optional,
intent(in) :: copy_data
485 integer,
optional,
intent(in) :: new_np
486 logical,
optional,
intent(in) :: special
488 type(
type_t),
optional,
intent(in) :: dest_type
490 logical :: host_packed, special_
498 host_packed = this%host_buffer_count > 0
502 if (
present(special))
then
503 special_ = this%special_memory
505 special_ = this%special_memory .and. .not. this%device_buffer_count > 0
508 if (
present(dest_type))
then
515 call dbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
517 call zbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
519 message(1) =
"Internal error: unknown batch type in batch_copy_to."
523 if (this%status() /= dest%status() .and.
optional_default(pack, this%is_packed()))
call dest%do_pack(copy = .false.)
525 dest%ist_idim_index(1:this%nst_linear, 1:this%ndims) = this%ist_idim_index(1:this%nst_linear, 1:this%ndims)
526 dest%ist(1:this%nst) = this%ist(1:this%nst)
529 assert(np_ == this%np)
530 call this%copy_data_to(min(this%np, np_), dest)
541 type(
type_t)
pure function batch_type(this) result(btype)
542 class(
batch_t),
intent(in) :: this
550 integer pure function batch_type_as_integer(this) result(itype)
551 class(
batch_t),
intent(in) :: this
553 type(type_t) :: btype
557 if (btype == type_float) itype = 1
558 if (btype == type_cmplx) itype = 2
567 integer pure function batch_status(this) result(bstatus)
568 class(
batch_t),
intent(in) :: this
570 bstatus = this%status_of
575 logical pure function batch_is_packed(this) result(in_buffer)
576 class(
batch_t),
intent(in) :: this
578 in_buffer = (this%device_buffer_count > 0) .or. (this%host_buffer_count > 0)
584 class(
batch_t),
intent(inout) :: this
587 if (accel_is_enabled())
size = accel_padded_size(size)
588 size = size*pad_pow2(this%nst_linear)*types_get_size(this%type())
600 class(
batch_t),
intent(inout) :: this
601 logical,
optional,
intent(in) :: copy
602 logical,
optional,
intent(in) :: async
605 integer :: source, target
610 source = this%status()
613 if (accel_is_enabled())
then
622 call this%do_pack(
target, copy, async)
631 class(
batch_t),
intent(inout) :: this
632 integer,
intent(in) ::
target
633 logical,
optional,
intent(in) :: copy
634 logical,
optional,
intent(in) :: async
636 logical,
optional,
intent(in) :: cpu_only
644 call profiling_in(
"BATCH_DO_PACK")
646 copy_ = optional_default(copy, .
true.)
648 async_ = optional_default(async, .false.)
651 source = this%status()
654 if (source /=
target)
then
657 call this%allocate_packed_device()
671 call this%allocate_packed_host()
676 if (this%type() == type_float)
then
678 else if (this%type() == type_cmplx)
then
682 if (this%own_memory)
call this%deallocate_unpacked_host()
684 call messages_not_implemented(
"Error: batch_do_pack called with BATCH_NOT_PACKED as target.")
690 this%device_buffer_count = this%device_buffer_count + 1
692 this%host_buffer_count = this%host_buffer_count + 1
695 call profiling_out(
"BATCH_DO_PACK")
705 class(
batch_t),
intent(inout) :: this
706 logical,
optional,
intent(in) :: copy
707 logical,
optional,
intent(in) :: force
708 logical,
optional,
intent(in) :: async
711 logical :: copy_, force_, async_
712 integer :: source, target
716 call profiling_in(
"BATCH_DO_UNPACK")
718 copy_ = optional_default(copy, .
true.)
720 force_ = optional_default(force, .false.)
722 async_ = optional_default(async, .false.)
725 source = this%status()
732 target = this%status_host
736 if (source /=
target)
then
739 if (this%host_buffer_count == 1 .or. force_)
then
740 if (this%own_memory)
call this%allocate_unpacked_host()
742 if (copy_ .or. this%own_memory)
then
743 if (this%type() == type_float)
then
745 else if (this%type() == type_cmplx)
then
749 call this%deallocate_packed_host()
750 this%status_host =
target
751 this%status_of =
target
752 this%host_buffer_count = 1
754 this%host_buffer_count = this%host_buffer_count - 1
756 if (this%device_buffer_count == 1 .or. force_)
then
768 this%needs_finish_unpack = .
true.
770 call this%deallocate_packed_device()
772 this%status_of =
target
773 this%device_buffer_count = 1
775 this%device_buffer_count = this%device_buffer_count - 1
779 call profiling_out(
"BATCH_DO_UNPACK")
787 class(
batch_t),
intent(inout) :: this
790 if (this%needs_finish_unpack)
then
792 call this%deallocate_packed_device()
793 this%needs_finish_unpack = .false.
801 class(
batch_t),
intent(inout) :: this
804 integer(int64) :: unroll, bsize, gsize
805 type(accel_mem_t) :: tmp
806 type(accel_kernel_t),
pointer :: kernel
810 call profiling_in(
"BATCH_WRT_UNPACK_ACCEL")
811 if (this%nst_linear == 1)
then
813 if (this%type() == type_float)
then
814 call accel_write_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
815 else if (this%type() == type_cmplx)
then
816 call accel_write_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
824 if (this%type() == type_float)
then
832 call accel_create_buffer(tmp, accel_mem_read_only, this%type(), unroll*this%pack_size(2))
834 do ist = 1, this%nst_linear, int(unroll, int32)
837 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
839 if (this%type() == type_float)
then
840 call accel_write_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
841 offset = (ist2 - ist)*this%pack_size(2))
843 call accel_write_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
844 offset = (ist2 - ist)*this%pack_size(2))
849 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
850 call accel_set_kernel_arg(kernel, 1, this%np)
851 call accel_set_kernel_arg(kernel, 2, this%nst_linear)
852 call accel_set_kernel_arg(kernel, 3, ist - 1)
853 call accel_set_kernel_arg(kernel, 4, tmp)
854 call accel_set_kernel_arg(kernel, 5, this%ff_device)
856 call profiling_in(
"GPU_PACK")
859 bsize = accel_max_block_size()/unroll
860 call accel_grid_size(this%pack_size(2), bsize, gsize)
862 call accel_kernel_run(kernel, (/gsize, 1_int64/), (/bsize, unroll/))
864 if (this%type() == type_float)
then
865 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
867 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
871 call profiling_out(
"GPU_PACK")
875 call accel_free_buffer(tmp)
879 call profiling_out(
"BATCH_WRT_UNPACK_ACCEL")
886 class(
batch_t),
intent(inout) :: this
889 integer(int64) :: unroll, bsize, gsize
890 type(accel_mem_t) :: tmp
891 type(accel_kernel_t),
pointer :: kernel
894 call profiling_in(
"BATCH_READ_UNPACKED_ACCEL")
896 if (this%nst_linear == 1)
then
898 if (this%type() == type_float)
then
899 call accel_read_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
901 call accel_read_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
908 call accel_create_buffer(tmp, accel_mem_write_only, this%type(), unroll*this%pack_size(2))
910 if (this%type() == type_float)
then
916 do ist = 1, this%nst_linear, int(unroll, int32)
917 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
918 call accel_set_kernel_arg(kernel, 1, this%np)
919 call accel_set_kernel_arg(kernel, 2, this%nst_linear)
920 call accel_set_kernel_arg(kernel, 3, ist - 1)
921 call accel_set_kernel_arg(kernel, 4, this%ff_device)
922 call accel_set_kernel_arg(kernel, 5, tmp)
924 call profiling_in(
"GPU_UNPACK")
927 bsize = accel_max_block_size()/unroll
928 call accel_grid_size(this%pack_size(2), bsize, gsize)
930 call accel_kernel_run(kernel, (/1_int64, gsize/), (/unroll, bsize/))
932 if (this%type() == type_float)
then
933 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
935 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
939 call profiling_out(
"GPU_UNPACK")
942 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
944 if (this%type() == type_float)
then
945 call accel_read_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
946 offset = (ist2 - ist)*this%pack_size(2))
948 call accel_read_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
949 offset = (ist2 - ist)*this%pack_size(2))
955 call accel_free_buffer(tmp)
958 call profiling_out(
"BATCH_READ_UNPACKED_ACCEL")
964 class(
batch_t),
intent(inout) :: this
965 logical,
optional,
intent(in) :: async
970 call profiling_in(
"BATCH_WRITE_PACKED_ACCEL")
971 if (this%type() == type_float)
then
972 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
974 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
976 call profiling_out(
"BATCH_WRITE_PACKED_ACCEL")
983 class(
batch_t),
intent(inout) :: this
984 logical,
optional,
intent(in) :: async
989 call profiling_in(
"BATCH_READ_PACKED_ACCEL")
990 if (this%type() == type_float)
then
991 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
993 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
995 call profiling_out(
"BATCH_READ_PACKED_ACCEL")
1006 class(
batch_t),
intent(in) :: this
1007 integer,
intent(in) :: cind(:)
1009 do index = 1, this%nst_linear
1010 if (all(cind(1:this%ndims) == this%ist_idim_index(index, 1:this%ndims)))
exit
1013 assert(index <= this%nst_linear)
1022 integer pure function batch_ist_idim_to_linear(this, cind) result(index)
1023 class(
batch_t),
intent(in) :: this
1024 integer,
intent(in) :: cind(:)
1026 if (ubound(cind, dim = 1) == 1)
then
1029 index = (cind(1) - 1)*this%dim + cind(2)
1040 integer pure function batch_linear_to_ist(this, linear_index) result(ist)
1041 class(
batch_t),
intent(in) :: this
1042 integer,
intent(in) :: linear_index
1044 ist = this%ist_idim_index(linear_index, 1)
1051 integer pure function batch_linear_to_idim(this, linear_index) result(idim)
1052 class(
batch_t),
intent(in) :: this
1053 integer,
intent(in) :: linear_index
1055 idim = this%ist_idim_index(linear_index, 2)
1070 class(
batch_t),
intent(inout) :: this
1071 type(mpi_grp_t),
intent(in) :: mpi_grp
1072 type(mpi_win),
intent(out) :: rma_win
1076 if (mpi_grp%size > 1)
then
1078 assert(.not. accel_is_enabled())
1082 assert(int(product(this%pack_size)*types_get_size(this%type())) > 0)
1083 if (this%type() == type_cmplx)
then
1085 call mpi_win_create(this%zff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1086 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1088 else if (this%type() == type_float)
then
1090 call mpi_win_create(this%dff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1091 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1094 message(1) =
"Internal error: unknown batch type in batch_remote_access_start."
1095 call messages_fatal(1)
1099 rma_win = mpi_win_null
1113 class(
batch_t),
intent(inout) :: this
1114 type(mpi_win),
intent(inout) :: rma_win
1118 if (rma_win /= mpi_win_null)
then
1120 call mpi_win_free(rma_win)
1122 call this%do_unpack()
1132 class(
batch_t),
intent(in) :: this
1133 integer,
intent(in) :: np
1134 class(
batch_t),
intent(inout) :: dest
1135 logical,
optional,
intent(in) :: async
1137 integer(int64),
dimension(3) :: gsizes, bsizes
1141 call profiling_in(
"BATCH_COPY_DATA_TO")
1144 call this%check_compatibility_with(dest, type_check=.false.)
1146 if (this%type() == dest%type())
then
1147 select case (this%status())
1149 call accel_set_kernel_arg(kernel_copy, 0, np)
1150 call accel_set_kernel_arg(kernel_copy, 1, this%ff_device)
1151 call accel_set_kernel_arg(kernel_copy, 2,
log2(int(this%pack_size_real(1), int32)))
1152 call accel_set_kernel_arg(kernel_copy, 3, dest%ff_device)
1153 call accel_set_kernel_arg(kernel_copy, 4,
log2(int(dest%pack_size_real(1), int32)))
1156 call accel_grid_size_extend_dim(int(np, int64), dest%pack_size_real(1), gsizes, bsizes, kernel_copy)
1158 call accel_kernel_run(kernel_copy, gsizes, bsizes)
1160 if(.not. optional_default(async, .false.))
call accel_finish()
1163 if (np*this%pack_size(1) > huge(0_int32))
then
1166 if (dest%type() == type_float)
then
1167 call blas_copy(int(this%pack_size(1), int32), this%dff_pack(1, ip), 1, dest%dff_pack(1, ip), 1)
1169 call blas_copy(int(this%pack_size(1), int32), this%zff_pack(1, ip), 1, dest%zff_pack(1, ip), 1)
1173 if (dest%type() == type_float)
then
1174 call blas_copy(int(this%pack_size(1)*np, int32), this%dff_pack(1, 1), 1, dest%dff_pack(1, 1), 1)
1176 call blas_copy(int(this%pack_size(1)*np, int32), this%zff_pack(1, 1), 1, dest%zff_pack(1, 1), 1)
1181 do ist = 1, dest%nst_linear
1182 if (dest%type() == type_cmplx)
then
1183 call blas_copy(np, this%zff_linear(1, ist), 1, dest%zff_linear(1, ist), 1)
1185 call blas_copy(np, this%dff_linear(1, ist), 1, dest%dff_linear(1, ist), 1)
1190 else if (this%type() == type_cmplx)
then
1192 select case (this%status())
1194 call accel_set_kernel_arg(kernel_copy_complex_to_real, 0, np)
1195 call accel_set_kernel_arg(kernel_copy_complex_to_real, 1, this%ff_device)
1196 call accel_set_kernel_arg(kernel_copy_complex_to_real, 2,
log2(int(this%pack_size_real(1), int32)))
1197 call accel_set_kernel_arg(kernel_copy_complex_to_real, 3, dest%ff_device)
1198 call accel_set_kernel_arg(kernel_copy_complex_to_real, 4,
log2(int(dest%pack_size_real(1), int32)))
1201 call accel_grid_size_extend_dim(int(np, int64), dest%pack_size_real(1), gsizes, bsizes, kernel_copy_complex_to_real)
1203 call accel_kernel_run(kernel_copy_complex_to_real, gsizes, bsizes)
1205 if(.not. optional_default(async, .false.))
call accel_finish()
1211 do ist = 1, dest%nst_linear
1212 dest%dff_pack(ist, ip) = real(this%zff_pack(ist, ip), real64)
1218 do ist = 1, dest%nst_linear
1221 dest%dff_linear(ip, ist) = real(this%zff_linear(ip, ist), real64)
1227 else if (this%type() == type_float)
then
1229 select case (this%status())
1231 call accel_set_kernel_arg(kernel_copy_real_to_complex, 0, np)
1232 call accel_set_kernel_arg(kernel_copy_real_to_complex, 1, this%ff_device)
1233 call accel_set_kernel_arg(kernel_copy_real_to_complex, 2,
log2(int(this%pack_size_real(1), int32)))
1234 call accel_set_kernel_arg(kernel_copy_real_to_complex, 3, dest%ff_device)
1235 call accel_set_kernel_arg(kernel_copy_real_to_complex, 4,
log2(int(dest%pack_size_real(1), int32)))
1238 call accel_grid_size_extend_dim(int(np, int64), this%pack_size_real(1), gsizes, bsizes, kernel_copy_real_to_complex)
1240 call accel_kernel_run(kernel_copy_real_to_complex, gsizes, bsizes)
1242 if(.not. optional_default(async, .false.))
call accel_finish()
1248 do ist = 1, dest%nst_linear
1249 dest%zff_pack(ist, ip) = cmplx(this%dff_pack(ist, ip), m_zero, real64)
1255 do ist = 1, dest%nst_linear
1258 dest%zff_linear(ip, ist) = cmplx(this%dff_linear(ip, ist), m_zero, real64)
1265 message(1) =
"Error! This should not happen."
1266 call messages_fatal(1)
1269 call profiling_out(
"BATCH_COPY_DATA_TO")
1277 class(
batch_t),
intent(in) :: this
1278 class(
batch_t),
intent(in) :: target
1279 logical,
optional,
intent(in) :: only_check_dim
1280 logical,
optional,
intent(in) :: type_check
1284 if (optional_default(type_check, .
true.))
then
1285 assert(this%type() ==
target%type())
1287 if (.not. optional_default(only_check_dim, .false.))
then
1288 assert(this%nst_linear ==
target%nst_linear)
1290 assert(this%status() ==
target%status())
1291 assert(this%dim ==
target%dim)
1301 class(
batch_t),
intent(inout) :: this
1302 integer,
intent(in) :: st_start
1303 integer,
intent(in) :: st_end
1305 integer :: idim, ii, ist
1309 do ist = st_start, st_end
1311 do idim = 1, this%dim
1312 ii = this%dim*(ist - st_start) + idim
1313 this%ist_idim_index(ii, 1) = ist
1314 this%ist_idim_index(ii, 2) = idim
1316 this%ist(ist - st_start + 1) = ist
1320 this%pack_size(1) = pad_pow2(this%nst_linear)
1321 this%pack_size(2) = this%np
1322 if (accel_is_enabled()) this%pack_size(2) = accel_padded_size(this%pack_size(2))
1324 this%pack_size_real = this%pack_size
1325 if (type_is_complex(this%type())) this%pack_size_real(1) = 2*this%pack_size_real(1)
1332#include "batch_inc.F90"
1335#include "complex.F90"
1336#include "batch_inc.F90"
initialize a batch with existing memory
double log2(double __x) __attribute__((__nothrow__
subroutine, public accel_free_buffer(this, async)
integer, parameter, public accel_mem_read_write
This module contains interfaces for routines in allocate_hardware_aware.c.
subroutine, public deallocate_hardware_aware(array, size)
This module implements batches of mesh functions.
type(type_t) pure function batch_type(this)
return the type of a batch
subroutine zbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
subroutine zbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_CMPLX valued mesh functions psi.
subroutine batch_check_compatibility_with(this, target, only_check_dim, type_check)
check whether two batches have compatible dimensions (and type)
logical pure function batch_is_packed(this)
subroutine dbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
subroutine dbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_write_unpacked_to_device(this)
subroutine batch_do_unpack(this, copy, force, async)
unpack a batch
subroutine batch_finish_unpack(this)
finish the unpacking if do_unpack() was called with async=.true.
subroutine zbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_CMPLX
subroutine batch_deallocate_packed_device(this)
release packed device memory
integer pure function batch_type_as_integer(this)
For debuging purpose only.
subroutine batch_do_pack_generic(this, copy, async)
pack the data in a batch
integer function batch_inv_index(this, cind)
inverse index lookup
subroutine dbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_packed_host(this)
allocate host (CPU) memory for packed data
subroutine, public zbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_CMPLX valued batch to given size without providing external memory
subroutine zbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_CMPLX
subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
clone a batch to a new batch
subroutine batch_remote_access_stop(this, rma_win)
stop the remote access to the batch
subroutine batch_read_device_to_unpacked(this)
subroutine zbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_CMPLX valued mesh functions psi.
subroutine dbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_FLOAT
subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
subroutine batch_allocate_packed_device(this)
allocate device (GPU) memory for packed data
subroutine batch_build_indices(this, st_start, st_end)
build the index ist(:) and ist_idim_index(:,:) and set pack_size
integer pure function batch_ist_idim_to_linear(this, cind)
direct index lookup
integer, parameter gpu_pack_max_buffer_size
this value controls the size (in number of wave-functions) of the buffer used to copy states to the G...
subroutine batch_do_pack_target(this, target, copy, async, cpu_only)
pack the data in a batch
integer pure function batch_linear_to_ist(this, linear_index)
get state index ist from linear (combined dim and nst) index
subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
make a copy of a batch
subroutine, public batch_read_device_to_packed(this, async)
subroutine batch_write_packed_to_device(this, async)
subroutine dbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data
subroutine batch_init_empty(this, dim, nst, np)
initialize an empty batch
subroutine, public dbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_FLOAT valued batch to given size without providing external memory
subroutine zbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_CMPLX valued mesh functions psi.
integer pure function batch_linear_to_idim(this, linear_index)
extract idim from linear index
subroutine batch_remote_access_start(this, mpi_grp, rma_win)
start remote access to a batch on another node
subroutine batch_copy_data_to(this, np, dest, async)
copy data to another batch.
subroutine dbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_FLOAT
subroutine dbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer pure function batch_status(this)
return the status of a batch
subroutine batch_deallocate_unpacked_host(this)
release unpacked host memory
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
subroutine batch_deallocate_packed_host(this)
release packed host memory
integer(int64) function batch_pack_total_size(this)
subroutine batch_end(this, copy)
finalize a batch and release allocated memory, if necessary
subroutine zbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
This module contains interfaces for BLAS routines You should not use these routines directly....
This module is intended to contain "only mathematical" functions and procedures.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
type(type_t), parameter, public type_cmplx
type(type_t), parameter, public type_float
type(type_t), parameter, public type_none
Class defining batches of mesh functions.