68 integer,
public :: nst
69 integer,
public :: dim
72 integer,
allocatable :: ist_idim_index(:, :)
77 integer,
allocatable,
public :: ist(:)
84 logical :: is_allocated
87 integer,
public :: nst_linear
93 integer :: status_host
102 type(type_t) :: type_of
103 integer :: device_buffer_count
104 integer :: host_buffer_count
105 logical :: special_memory
106 logical :: needs_finish_unpack
110 real(real64),
pointer,
contiguous,
public :: dff(:, :, :)
112 complex(real64),
pointer,
contiguous,
public :: zff(:, :, :)
114 real(real64),
pointer,
contiguous,
public :: dff_linear(:, :)
116 complex(real64),
pointer,
contiguous,
public :: zff_linear(:, :)
120 real(real64),
pointer,
contiguous,
public :: dff_pack(:, :)
122 complex(real64),
pointer,
contiguous,
public :: zff_pack(:, :)
125 integer(int64),
public :: pack_size(1:2)
127 integer(int64),
public :: pack_size_real(1:2)
131 type(accel_mem_t),
public :: ff_device
189 integer,
public,
parameter :: &
190 batch_not_packed = 0, & !< functions are stored in CPU memory, unpacked order
208 class(
batch_t),
intent(inout) :: this
209 logical,
optional,
intent(in) :: copy
214 if (this%own_memory .and. this%is_packed())
then
217 call this%deallocate_packed_device()
220 call this%deallocate_packed_host()
224 this%host_buffer_count = 0
225 this%device_buffer_count = 0
230 if (this%is_allocated)
then
231 call this%deallocate_unpacked_host()
234 safe_deallocate_a(this%ist_idim_index)
235 safe_deallocate_a(this%ist)
250 this%is_allocated = .false.
252 if (this%special_memory)
then
253 if (
associated(this%dff))
then
256 if (
associated(this%zff))
then
260 safe_deallocate_p(this%dff)
261 safe_deallocate_p(this%zff)
264 nullify(this%dff_linear)
266 nullify(this%zff_linear)
277 class(
batch_t),
intent(inout) :: this
281 if (this%special_memory)
then
282 if (
associated(this%dff_pack))
then
285 if (
associated(this%zff_pack))
then
289 safe_deallocate_p(this%dff_pack)
290 safe_deallocate_p(this%zff_pack)
292 nullify(this%dff_pack)
293 nullify(this%zff_pack)
302 class(
batch_t),
intent(inout) :: this
317 class(
batch_t),
intent(inout) :: this
322 call this%dallocate_unpacked_host()
324 call this%zallocate_unpacked_host()
336 class(
batch_t),
intent(inout) :: this
341 call this%dallocate_packed_host()
343 call this%zallocate_packed_host()
355 class(
batch_t),
intent(inout) :: this
360 product(this%pack_size))
373 type(
batch_t),
intent(out) :: this
374 integer,
intent(in) :: dim
375 integer,
intent(in) :: nst
376 integer,
intent(in) :: np
380 this%is_allocated = .false.
381 this%own_memory = .false.
382 this%special_memory = .false.
383 this%needs_finish_unpack = .false.
388 this%nst_linear = nst*dim
391 this%device_buffer_count = 0
392 this%host_buffer_count = 0
397 safe_allocate(this%ist_idim_index(1:this%nst_linear, 1:this%ndims))
398 safe_allocate(this%ist(1:this%nst))
400 nullify(this%dff, this%zff, this%dff_linear, this%zff_linear)
401 nullify(this%dff_pack, this%zff_pack)
412 subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
413 class(
batch_t),
intent(in) :: this
414 class(
batch_t),
allocatable,
intent(out) :: dest
415 logical,
optional,
intent(in) :: pack
417 logical,
optional,
intent(in) :: copy_data
419 integer,
optional,
intent(in) :: new_np
420 logical,
optional,
intent(in) :: special
422 type(
type_t),
optional,
intent(in) :: dest_type
426 if (.not.
allocated(dest))
then
427 safe_allocate_type(
batch_t, dest)
429 message(1) =
"Internal error: destination batch in batch_clone_to has been previously allocated."
433 call this%copy_to(dest, pack, copy_data, new_np, special, dest_type)
440 subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
441 class(
batch_t),
intent(in) :: this
442 class(
batch_t),
allocatable,
intent(out) :: dest(:)
443 integer,
intent(in) :: n_batches
444 logical,
optional,
intent(in) :: pack
446 logical,
optional,
intent(in) :: copy_data
448 integer,
optional,
intent(in) :: new_np
449 logical,
optional,
intent(in) :: special
451 type(
type_t),
optional,
intent(in) :: dest_type
457 if (.not.
allocated(dest))
then
458 safe_allocate_type_array(
batch_t, dest, (1:n_batches))
460 message(1) =
"Internal error: destination batch in batch_clone_to_array has been previously allocated."
465 call this%copy_to(dest(ib), pack, copy_data, new_np, special, dest_type)
476 subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
477 class(
batch_t),
intent(in) :: this
478 class(
batch_t),
intent(out) :: dest
479 logical,
optional,
intent(in) :: pack
481 logical,
optional,
intent(in) :: copy_data
483 integer,
optional,
intent(in) :: new_np
484 logical,
optional,
intent(in) :: special
486 type(
type_t),
optional,
intent(in) :: dest_type
488 logical :: host_packed, special_
496 host_packed = this%host_buffer_count > 0
500 if (
present(special))
then
501 special_ = this%special_memory
503 special_ = this%special_memory .and. .not. this%device_buffer_count > 0
506 if (
present(dest_type))
then
513 call dbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
515 call zbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
517 message(1) =
"Internal error: unknown batch type in batch_copy_to."
521 if (this%status() /= dest%status() .and.
optional_default(pack, this%is_packed()))
call dest%do_pack(copy = .false.)
523 dest%ist_idim_index(1:this%nst_linear, 1:this%ndims) = this%ist_idim_index(1:this%nst_linear, 1:this%ndims)
524 dest%ist(1:this%nst) = this%ist(1:this%nst)
527 assert(np_ == this%np)
528 call this%copy_data_to(min(this%np, np_), dest)
539 type(
type_t)
pure function batch_type(this) result(btype)
540 class(
batch_t),
intent(in) :: this
548 integer pure function batch_type_as_integer(this) result(itype)
549 class(
batch_t),
intent(in) :: this
551 type(type_t) :: btype
555 if (btype == type_float) itype = 1
556 if (btype == type_cmplx) itype = 2
565 integer pure function batch_status(this) result(bstatus)
566 class(
batch_t),
intent(in) :: this
568 bstatus = this%status_of
573 logical pure function batch_is_packed(this) result(in_buffer)
574 class(
batch_t),
intent(in) :: this
576 in_buffer = (this%device_buffer_count > 0) .or. (this%host_buffer_count > 0)
582 class(
batch_t),
intent(inout) :: this
585 if (accel_is_enabled())
size = accel_padded_size(size)
586 size = size*pad_pow2(this%nst_linear)*types_get_size(this%type())
598 class(
batch_t),
intent(inout) :: this
599 logical,
optional,
intent(in) :: copy
600 logical,
optional,
intent(in) :: async
605 integer :: source, target
609 call profiling_in(
"BATCH_DO_PACK")
611 copy_ = optional_default(copy, .
true.)
613 async_ = optional_default(async, .false.)
616 source = this%status()
619 if (accel_is_enabled())
then
629 if (source /=
target)
then
632 call this%allocate_packed_device()
646 call this%allocate_packed_host()
651 if (this%type() == type_float)
then
653 else if (this%type() == type_cmplx)
then
657 if (this%own_memory)
call this%deallocate_unpacked_host()
663 this%device_buffer_count = this%device_buffer_count + 1
665 this%host_buffer_count = this%host_buffer_count + 1
668 call profiling_out(
"BATCH_DO_PACK")
677 class(
batch_t),
intent(inout) :: this
678 logical,
optional,
intent(in) :: copy
679 logical,
optional,
intent(in) :: force
680 logical,
optional,
intent(in) :: async
683 logical :: copy_, force_, async_
684 integer :: source, target
688 call profiling_in(
"BATCH_DO_UNPACK")
690 copy_ = optional_default(copy, .
true.)
692 force_ = optional_default(force, .false.)
694 async_ = optional_default(async, .false.)
697 source = this%status()
704 target = this%status_host
708 if (source /=
target)
then
711 if (this%host_buffer_count == 1 .or. force_)
then
712 if (this%own_memory)
call this%allocate_unpacked_host()
714 if (copy_ .or. this%own_memory)
then
715 if (this%type() == type_float)
then
717 else if (this%type() == type_cmplx)
then
721 call this%deallocate_packed_host()
722 this%status_host =
target
723 this%status_of =
target
724 this%host_buffer_count = 1
726 this%host_buffer_count = this%host_buffer_count - 1
728 if (this%device_buffer_count == 1 .or. force_)
then
740 this%needs_finish_unpack = .
true.
742 call this%deallocate_packed_device()
744 this%status_of =
target
745 this%device_buffer_count = 1
747 this%device_buffer_count = this%device_buffer_count - 1
751 call profiling_out(
"BATCH_DO_UNPACK")
759 class(
batch_t),
intent(inout) :: this
762 if (this%needs_finish_unpack)
then
764 call this%deallocate_packed_device()
765 this%needs_finish_unpack = .false.
773 class(
batch_t),
intent(inout) :: this
776 integer(int64) :: unroll
777 type(accel_mem_t) :: tmp
778 type(accel_kernel_t),
pointer :: kernel
782 call profiling_in(
"BATCH_WRT_UNPACK_ACCEL")
783 if (this%nst_linear == 1)
then
785 if (this%type() == type_float)
then
786 call accel_write_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
787 else if (this%type() == type_cmplx)
then
788 call accel_write_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
796 if (this%type() == type_float)
then
804 call accel_create_buffer(tmp, accel_mem_read_only, this%type(), unroll*this%pack_size(2))
806 do ist = 1, this%nst_linear, int(unroll, int32)
809 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
811 if (this%type() == type_float)
then
812 call accel_write_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
813 offset = (ist2 - ist)*this%pack_size(2))
815 call accel_write_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
816 offset = (ist2 - ist)*this%pack_size(2))
821 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
822 call accel_set_kernel_arg(kernel, 1, int(this%pack_size(2), int32))
823 call accel_set_kernel_arg(kernel, 2, ist - 1)
824 call accel_set_kernel_arg(kernel, 3, tmp)
825 call accel_set_kernel_arg(kernel, 4, this%ff_device)
827 call profiling_in(
"CL_PACK")
828 call accel_kernel_run(kernel, (/this%pack_size(2), unroll/), (/accel_max_workgroup_size()/unroll, unroll/))
830 if (this%type() == type_float)
then
831 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
833 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
837 call profiling_out(
"CL_PACK")
841 call accel_release_buffer(tmp)
845 call profiling_out(
"BATCH_WRT_UNPACK_ACCEL")
852 class(
batch_t),
intent(inout) :: this
855 integer(int64) :: unroll
856 type(accel_mem_t) :: tmp
857 type(accel_kernel_t),
pointer :: kernel
860 call profiling_in(
"BATCH_READ_UNPACKED_ACCEL")
862 if (this%nst_linear == 1)
then
864 if (this%type() == type_float)
then
865 call accel_read_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
867 call accel_read_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
874 call accel_create_buffer(tmp, accel_mem_write_only, this%type(), unroll*this%pack_size(2))
876 if (this%type() == type_float)
then
882 do ist = 1, this%nst_linear, int(unroll, int32)
883 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
884 call accel_set_kernel_arg(kernel, 1, int(this%pack_size(2), int32))
885 call accel_set_kernel_arg(kernel, 2, ist - 1)
886 call accel_set_kernel_arg(kernel, 3, this%ff_device)
887 call accel_set_kernel_arg(kernel, 4, tmp)
889 call profiling_in(
"CL_UNPACK")
890 call accel_kernel_run(kernel, (/unroll, this%pack_size(2)/), (/unroll, accel_max_workgroup_size()/unroll/))
892 if (this%type() == type_float)
then
893 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
895 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
899 call profiling_out(
"CL_UNPACK")
902 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
904 if (this%type() == type_float)
then
905 call accel_read_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
906 offset = (ist2 - ist)*this%pack_size(2))
908 call accel_read_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
909 offset = (ist2 - ist)*this%pack_size(2))
915 call accel_release_buffer(tmp)
918 call profiling_out(
"BATCH_READ_UNPACKED_ACCEL")
924 class(
batch_t),
intent(inout) :: this
925 logical,
optional,
intent(in) :: async
930 call profiling_in(
"BATCH_WRITE_PACKED_ACCEL")
931 if (this%type() == type_float)
then
932 call accel_write_buffer(this%ff_device, product(this%pack_size), this%dff_pack, async=async)
934 call accel_write_buffer(this%ff_device, product(this%pack_size), this%zff_pack, async=async)
936 call profiling_out(
"BATCH_WRITE_PACKED_ACCEL")
943 class(
batch_t),
intent(inout) :: this
944 logical,
optional,
intent(in) :: async
949 call profiling_in(
"BATCH_READ_PACKED_ACCEL")
950 if (this%type() == type_float)
then
951 call accel_read_buffer(this%ff_device, product(this%pack_size), this%dff_pack, async=async)
953 call accel_read_buffer(this%ff_device, product(this%pack_size), this%zff_pack, async=async)
955 call profiling_out(
"BATCH_READ_PACKED_ACCEL")
966 class(
batch_t),
intent(in) :: this
967 integer,
intent(in) :: cind(:)
969 do index = 1, this%nst_linear
970 if (all(cind(1:this%ndims) == this%ist_idim_index(index, 1:this%ndims)))
exit
973 assert(index <= this%nst_linear)
982 integer pure function batch_ist_idim_to_linear(this, cind) result(index)
983 class(
batch_t),
intent(in) :: this
984 integer,
intent(in) :: cind(:)
986 if (ubound(cind, dim = 1) == 1)
then
989 index = (cind(1) - 1)*this%dim + cind(2)
1000 integer pure function batch_linear_to_ist(this, linear_index) result(ist)
1001 class(
batch_t),
intent(in) :: this
1002 integer,
intent(in) :: linear_index
1004 ist = this%ist_idim_index(linear_index, 1)
1011 integer pure function batch_linear_to_idim(this, linear_index) result(idim)
1012 class(
batch_t),
intent(in) :: this
1013 integer,
intent(in) :: linear_index
1015 idim = this%ist_idim_index(linear_index, 2)
1030 class(
batch_t),
intent(inout) :: this
1031 type(mpi_grp_t),
intent(in) :: mpi_grp
1032 type(mpi_win),
intent(out) :: rma_win
1036 if (mpi_grp%size > 1)
then
1038 assert(.not. accel_is_enabled())
1042 if (this%type() == type_cmplx)
then
1044 call mpi_win_create(this%zff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1045 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win, mpi_err)
1047 else if (this%type() == type_float)
then
1049 call mpi_win_create(this%dff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1050 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win, mpi_err)
1053 message(1) =
"Internal error: unknown batch type in batch_remote_access_start."
1054 call messages_fatal(1)
1058 rma_win = mpi_win_null
1072 class(
batch_t),
intent(inout) :: this
1073 type(mpi_win),
intent(inout) :: rma_win
1077 if (rma_win /= mpi_win_null)
then
1079 call mpi_win_free(rma_win, mpi_err)
1081 call this%do_unpack()
1091 class(
batch_t),
intent(in) :: this
1092 integer,
intent(in) :: np
1094 logical,
optional,
intent(in) :: async
1096 integer(int64) :: localsize, dim2, dim3
1100 call profiling_in(
"BATCH_COPY_DATA_TO")
1103 call this%check_compatibility_with(dest, type_check=.false.)
1105 if (this%type() == dest%type())
then
1106 select case (this%status())
1108 call accel_set_kernel_arg(kernel_copy, 0, np)
1109 call accel_set_kernel_arg(kernel_copy, 1, this%ff_device)
1110 call accel_set_kernel_arg(kernel_copy, 2,
log2(int(this%pack_size_real(1), int32)))
1111 call accel_set_kernel_arg(kernel_copy, 3, dest%ff_device)
1112 call accel_set_kernel_arg(kernel_copy, 4,
log2(int(dest%pack_size_real(1), int32)))
1114 localsize = accel_kernel_workgroup_size(kernel_copy)/dest%pack_size_real(1)
1116 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1117 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1119 call accel_kernel_run(kernel_copy, (/dest%pack_size_real(1), dim2, dim3/), (/dest%pack_size_real(1), localsize, 1_int64/))
1121 if(.not. optional_default(async, .false.))
call accel_finish()
1124 if (np*this%pack_size(1) > huge(0_int32))
then
1127 if (dest%type() == type_float)
then
1128 call blas_copy(int(this%pack_size(1), int32), this%dff_pack(1, ip), 1, dest%dff_pack(1, ip), 1)
1130 call blas_copy(int(this%pack_size(1), int32), this%zff_pack(1, ip), 1, dest%zff_pack(1, ip), 1)
1134 if (dest%type() == type_float)
then
1135 call blas_copy(int(this%pack_size(1)*np, int32), this%dff_pack(1, 1), 1, dest%dff_pack(1, 1), 1)
1137 call blas_copy(int(this%pack_size(1)*np, int32), this%zff_pack(1, 1), 1, dest%zff_pack(1, 1), 1)
1142 do ist = 1, dest%nst_linear
1143 if (dest%type() == type_cmplx)
then
1144 call blas_copy(np, this%zff_linear(1, ist), 1, dest%zff_linear(1, ist), 1)
1146 call blas_copy(np, this%dff_linear(1, ist), 1, dest%dff_linear(1, ist), 1)
1151 else if (this%type() == type_cmplx)
then
1153 select case (this%status())
1155 call accel_set_kernel_arg(kernel_copy_complex_to_real, 0, np)
1156 call accel_set_kernel_arg(kernel_copy_complex_to_real, 1, this%ff_device)
1157 call accel_set_kernel_arg(kernel_copy_complex_to_real, 2,
log2(int(this%pack_size_real(1), int32)))
1158 call accel_set_kernel_arg(kernel_copy_complex_to_real, 3, dest%ff_device)
1159 call accel_set_kernel_arg(kernel_copy_complex_to_real, 4,
log2(int(dest%pack_size_real(1), int32)))
1161 localsize = accel_kernel_workgroup_size(kernel_copy_complex_to_real)/dest%pack_size_real(1)
1163 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1164 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1166 call accel_kernel_run(kernel_copy_complex_to_real, &
1167 (/dest%pack_size_real(1), dim2, dim3/), (/dest%pack_size_real(1), localsize, 1_int64/))
1169 if(.not. optional_default(async, .false.))
call accel_finish()
1175 do ist = 1, dest%nst_linear
1176 dest%dff_pack(ist, ip) = real(this%zff_pack(ist, ip), real64)
1182 do ist = 1, dest%nst_linear
1185 dest%dff_linear(ip, ist) = real(this%zff_linear(ip, ist), real64)
1191 else if (this%type() == type_float)
then
1193 select case (this%status())
1195 call accel_set_kernel_arg(kernel_copy_real_to_complex, 0, np)
1196 call accel_set_kernel_arg(kernel_copy_real_to_complex, 1, this%ff_device)
1197 call accel_set_kernel_arg(kernel_copy_real_to_complex, 2,
log2(int(this%pack_size_real(1), int32)))
1198 call accel_set_kernel_arg(kernel_copy_real_to_complex, 3, dest%ff_device)
1199 call accel_set_kernel_arg(kernel_copy_real_to_complex, 4,
log2(int(dest%pack_size_real(1), int32)))
1201 localsize = accel_kernel_workgroup_size(kernel_copy_real_to_complex)/this%pack_size_real(1)
1203 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1204 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1206 call accel_kernel_run(kernel_copy_real_to_complex, &
1207 (/this%pack_size_real(1), dim2, dim3/), (/this%pack_size_real(1), localsize, 1_int64/))
1209 if(.not. optional_default(async, .false.))
call accel_finish()
1215 do ist = 1, dest%nst_linear
1216 dest%zff_pack(ist, ip) = cmplx(this%dff_pack(ist, ip), m_zero, real64)
1222 do ist = 1, dest%nst_linear
1225 dest%zff_linear(ip, ist) = cmplx(this%dff_linear(ip, ist), m_zero, real64)
1232 message(1) =
"Error! This should not happen."
1233 call messages_fatal(1)
1236 call profiling_out(
"BATCH_COPY_DATA_TO")
1244 class(
batch_t),
intent(in) :: this
1245 class(
batch_t),
intent(in) :: target
1246 logical,
optional,
intent(in) :: only_check_dim
1247 logical,
optional,
intent(in) :: type_check
1251 if (optional_default(type_check, .
true.))
then
1252 assert(this%type() ==
target%type())
1254 if (.not. optional_default(only_check_dim, .false.))
then
1255 assert(this%nst_linear ==
target%nst_linear)
1257 assert(this%status() ==
target%status())
1258 assert(this%dim ==
target%dim)
1268 class(
batch_t),
intent(inout) :: this
1269 integer,
intent(in) :: st_start
1270 integer,
intent(in) :: st_end
1272 integer :: idim, ii, ist
1276 do ist = st_start, st_end
1278 do idim = 1, this%dim
1279 ii = this%dim*(ist - st_start) + idim
1280 this%ist_idim_index(ii, 1) = ist
1281 this%ist_idim_index(ii, 2) = idim
1283 this%ist(ist - st_start + 1) = ist
1287 this%pack_size(1) = pad_pow2(this%nst_linear)
1288 this%pack_size(2) = this%np
1289 if (accel_is_enabled()) this%pack_size(2) = accel_padded_size(this%pack_size(2))
1291 this%pack_size_real = this%pack_size
1292 if (type_is_complex(this%type())) this%pack_size_real(1) = 2*this%pack_size_real(1)
1299#include "batch_inc.F90"
1302#include "complex.F90"
1303#include "batch_inc.F90"
initialize a batch with existing memory
double log2(double __x) __attribute__((__nothrow__
integer, parameter, public accel_mem_read_write
subroutine, public accel_release_buffer(this)
This module contains interfaces for routines in allocate_hardware_aware.c.
subroutine, public deallocate_hardware_aware(array, size)
This module implements batches of mesh functions.
type(type_t) pure function batch_type(this)
return the type of a batch
subroutine zbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
subroutine zbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_CMPLX valued mesh functions psi.
subroutine batch_check_compatibility_with(this, target, only_check_dim, type_check)
check whether two batches have compatible dimensions (and type)
integer, parameter cl_pack_max_buffer_size
this value controls the size (in number of wave-functions) of the buffer used to copy states to the o...
logical pure function batch_is_packed(this)
subroutine dbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
subroutine dbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_write_unpacked_to_device(this)
subroutine batch_do_unpack(this, copy, force, async)
unpack a batch
subroutine batch_finish_unpack(this)
finish the unpacking if do_unpack() was called with async=.true.
subroutine zbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_CMPLX
subroutine batch_deallocate_packed_device(this)
release packed device memory
integer pure function batch_type_as_integer(this)
For debuging purpose only.
integer function batch_inv_index(this, cind)
inverse index lookup
subroutine dbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_packed_host(this)
allocate host (CPU) memory for packed data
subroutine, public zbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_CMPLX valued batch to given size without providing external memory
subroutine zbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_CMPLX
subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
clone a batch to a new batch
subroutine batch_remote_access_stop(this, rma_win)
stop the remote access to the batch
subroutine batch_read_device_to_unpacked(this)
subroutine zbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_CMPLX valued mesh functions psi.
subroutine dbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_FLOAT
subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
subroutine batch_allocate_packed_device(this)
allocate device (GPU) memory for packed data
subroutine batch_build_indices(this, st_start, st_end)
build the index ist(:) and ist_idim_index(:,:) and set pack_size
integer pure function batch_ist_idim_to_linear(this, cind)
direct index lookup
integer pure function batch_linear_to_ist(this, linear_index)
get state index ist from linear (combined dim and nst) index
subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
make a copy of a batch
subroutine, public batch_read_device_to_packed(this, async)
subroutine batch_write_packed_to_device(this, async)
subroutine dbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data
subroutine batch_init_empty(this, dim, nst, np)
initialize an empty batch
subroutine, public dbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_FLOAT valued batch to given size without providing external memory
subroutine zbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_CMPLX valued mesh functions psi.
integer pure function batch_linear_to_idim(this, linear_index)
extract idim from linear index
subroutine batch_remote_access_start(this, mpi_grp, rma_win)
start remote access to a batch on another node
subroutine batch_copy_data_to(this, np, dest, async)
copy data to another batch.
subroutine batch_do_pack(this, copy, async)
pack the data in a batch
subroutine dbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_FLOAT
subroutine dbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer pure function batch_status(this)
return the status of a batch
subroutine batch_deallocate_unpacked_host(this)
release unpacked host memory
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
subroutine batch_deallocate_packed_host(this)
release packed host memory
integer(int64) function batch_pack_total_size(this)
subroutine batch_end(this, copy)
finalize a batch and release allocated memory, if necessary
subroutine zbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
This module contains interfaces for BLAS routines You should not use these routines directly....
This module is intended to contain "only mathematical" functions and procedures.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
type(type_t), public type_float
type(type_t), public type_cmplx
type(type_t), public type_none
Class defining batches of mesh functions.