main/doxygen_doc/batch__ops_8F90_source.html

!! Copyright (C) 2008 X. Andrade

!!

!! This program is free software; you can redistribute it and/or modify

!! it under the terms of the GNU General Public License as published by

!! the Free Software Foundation; either version 2, or (at your option)

!! any later version.

!!

!! This program is distributed in the hope that it will be useful,

!! but WITHOUT ANY WARRANTY; without even the implied warranty of

!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

!! GNU General Public License for more details.

!!

!! You should have received a copy of the GNU General Public License

!! along with this program; if not, write to the Free Software

!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

!! 02110-1301, USA.

!!


#include "global.h"


module batch_ops_oct_m

  use accel_oct_m

  use batch_oct_m

  use blas_oct_m

  use debug_oct_m

  use iso_c_binding

  use global_oct_m

  use lalg_basic_oct_m

  use math_oct_m

  use messages_oct_m

  use profiling_oct_m

  use types_oct_m


  implicit none


  private

  public ::                         &

    batch_set_zero,                 &

    batch_axpy,                     &

    batch_scal,                     &

    batch_xpay,                     &

    batch_set_state,                &

    batch_get_state,                &

    batch_get_points,               &

    batch_set_points,               &

    batch_points_block_size,        &

    batch_mul,                      &

    batch_add_with_map,             &

    batch_copy_with_map,            &

    dbatch_axpy_function,           &

    zbatch_axpy_function,           &

    dbatch_ax_function_py,          &

    zbatch_ax_function_py,          &

    dbatch_copy_with_map_to_array,  &

    zbatch_copy_with_map_to_array,  &

    batch_split_complex


  interface batch_axpy

    module procedure dbatch_axpy_const

    module procedure zbatch_axpy_const

    module procedure dbatch_axpy_vec

    module procedure zbatch_axpy_vec

  end interface batch_axpy


  interface batch_scal

    module procedure dbatch_scal_const

    module procedure zbatch_scal_const

    module procedure dbatch_scal_vec

    module procedure zbatch_scal_vec

  end interface batch_scal


  interface batch_xpay

    module procedure dbatch_xpay_vec

    module procedure zbatch_xpay_vec

    module procedure dbatch_xpay_const

    module procedure zbatch_xpay_const

  end interface batch_xpay


  interface batch_add_with_map

    module procedure batch_add_with_map_cpu

    module procedure batch_add_with_map_accel

  end interface batch_add_with_map


  interface batch_copy_with_map

    module procedure batch_copy_with_map_cpu

    module procedure batch_copy_with_map_accel

  end interface batch_copy_with_map


  interface batch_set_state

    module procedure dbatch_set_state1

    module procedure zbatch_set_state1

    module procedure dbatch_set_state2

    module procedure zbatch_set_state2

    module procedure dbatch_set_state3

    module procedure zbatch_set_state3

  end interface batch_set_state


  interface batch_get_state

    module procedure dbatch_get_state1

    module procedure zbatch_get_state1

    module procedure dbatch_get_state2

    module procedure zbatch_get_state2

    module procedure dbatch_get_state3

    module procedure zbatch_get_state3

  end interface batch_get_state


  interface batch_get_points

    module procedure dbatch_get_points

    module procedure zbatch_get_points

    module procedure batch_get_points_accel

  end interface batch_get_points


  interface batch_set_points

    module procedure dbatch_set_points

    module procedure zbatch_set_points

    module procedure batch_set_points_accel

  end interface batch_set_points


  interface batch_mul

    module procedure dbatch_mul

    module procedure zbatch_mul

  end interface batch_mul


contains


  !--------------------------------------------------------------

  subroutine batch_set_zero(this, np, async)

    class(batch_t),     intent(inout) :: this

    integer, optional,  intent(in)    :: np

    logical, optional,  intent(in)    :: async


    integer :: ist_linear, ist, ip, np_


    push_sub(batch_set_zero)


    assert(not_in_openmp())


    call profiling_in("BATCH_SET_ZERO")


    select case (this%status())

    case (batch_device_packed)

      np_ = optional_default(np, int(this%pack_size(2), int32))

      assert(np_ <= int(this%pack_size(2), int32))

      call accel_set_buffer_to_zero(this%ff_device, this%type(), (int(this%pack_size(1), int32) * np_), async=async)


    case (batch_packed)

      np_ = optional_default(np, int(this%pack_size(2), int32))

      assert(np_ <= int(this%pack_size(2), int32))

      if (this%type() == type_float) then

        !$omp parallel do private(ist) schedule(static)

        do ip = 1, np_

          !$omp simd

          do ist = 1, int(this%pack_size(1), int32)

            this%dff_pack(ist, ip) = m_zero

          end do

        end do

      else

        !$omp parallel do private(ist) schedule(static)

        do ip = 1, np_

          !$omp simd

          do ist = 1, int(this%pack_size(1), int32)

            this%zff_pack(ist, ip) = m_z0

          end do

        end do

      end if


    case (batch_not_packed)

      if (this%type() == type_float) then

        np_ = optional_default(np, ubound(this%dff_linear, dim=1))

        assert(np_ <= ubound(this%dff_linear, dim=1))

        do ist_linear = 1, this%nst_linear

          !$omp parallel do schedule(static)

          do ip = 1, np_

            this%dff_linear(ip, ist_linear) = m_zero

          end do

        end do

      else

        np_ = optional_default(np, ubound(this%zff_linear, dim=1))

        assert(np_ <= ubound(this%zff_linear, dim=1))

        do ist_linear = 1, this%nst_linear

          !$omp parallel do schedule(static)

          do ip = 1, np_

            this%zff_linear(ip, ist_linear) = m_z0

          end do

        end do

      end if


    case default

      message(1) = "batch_set_zero: unknown batch status."

      call messages_fatal(1)


    end select


    call profiling_out("BATCH_SET_ZERO")


    pop_sub(batch_set_zero)

  end subroutine batch_set_zero


  ! --------------------------------------------------------------

  !

  subroutine batch_get_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)

    class(batch_t),      intent(in)    :: this

    integer,             intent(in)    :: sp

    integer,             intent(in)    :: ep

    type(accel_mem_t),   intent(inout) :: psi

    integer,             intent(in)    :: ldpsi1

    integer,             intent(in)    :: ldpsi2


    integer :: tsize, ii, it

    type(accel_kernel_t), save :: kernel

    integer, allocatable :: linear_to_ist(:), linear_to_idim(:)

    type(accel_mem_t) :: buff_linear_to_ist, buff_linear_to_idim


    push_sub(batch_get_points_accel)

    call profiling_in("GET_POINTS")


    select case (this%status())

    case (batch_not_packed, batch_packed)

      call messages_not_implemented('batch_get_points_accel for non-CL batches')


    case (batch_device_packed)


      tsize = types_get_size(this%type())/types_get_size(type_float)

      safe_allocate(linear_to_ist(1:this%nst_linear*tsize))

      safe_allocate(linear_to_idim(1:this%nst_linear*tsize))

      do ii = 1, this%nst_linear

        do it = 1, tsize

          linear_to_ist(tsize*(ii-1)+it) = tsize*(this%linear_to_ist(ii) - 1) + it - 1

          linear_to_idim(tsize*(ii-1)+it) = this%linear_to_idim(ii) - 1

        end do

      end do


      call accel_create_buffer(buff_linear_to_ist, accel_mem_read_only, type_integer, this%nst_linear*tsize)

      call accel_write_buffer(buff_linear_to_ist, this%nst_linear*tsize, linear_to_ist)

      call accel_create_buffer(buff_linear_to_idim, accel_mem_read_only, type_integer, this%nst_linear*tsize)

      call accel_write_buffer(buff_linear_to_idim, this%nst_linear*tsize, linear_to_idim)


      call accel_kernel_start_call(kernel, 'points.cl', 'get_points')


      call accel_set_kernel_arg(kernel, 0, sp)

      call accel_set_kernel_arg(kernel, 1, ep)

      call accel_set_kernel_arg(kernel, 2, buff_linear_to_ist)

      call accel_set_kernel_arg(kernel, 3, buff_linear_to_idim)

      call accel_set_kernel_arg(kernel, 4, this%nst_linear*tsize)

      call accel_set_kernel_arg(kernel, 5, this%ff_device)

      call accel_set_kernel_arg(kernel, 6, int(this%pack_size_real(1), int32))

      call accel_set_kernel_arg(kernel, 7, psi)

      call accel_set_kernel_arg(kernel, 8, ldpsi1*tsize)

      call accel_set_kernel_arg(kernel, 9, ldpsi2)


      call accel_kernel_run(kernel, (/this%pack_size_real(1), int(ep - sp + 1, int64)/), (/this%pack_size_real(1), 1_int64/))


      call accel_release_buffer(buff_linear_to_ist)

      call accel_release_buffer(buff_linear_to_idim)

      safe_deallocate_a(linear_to_ist)

      safe_deallocate_a(linear_to_idim)


    end select


    call profiling_out("GET_POINTS")


    pop_sub(batch_get_points_accel)

  end subroutine batch_get_points_accel


  ! --------------------------------------------------------------

  !

  subroutine batch_set_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)

    class(batch_t),      intent(inout) :: this

    integer,             intent(in)    :: sp

    integer,             intent(in)    :: ep

    type(accel_mem_t),   intent(in)    :: psi

    integer,             intent(in)    :: ldpsi1

    integer,             intent(in)    :: ldpsi2


    integer :: tsize, ii, it

    type(accel_kernel_t), save :: kernel

    integer, allocatable :: linear_to_ist(:), linear_to_idim(:)

    type(accel_mem_t) :: buff_linear_to_ist, buff_linear_to_idim


    push_sub(batch_set_points_accel)

    call profiling_in("SET_POINTS")


    select case (this%status())

    case (batch_not_packed, batch_packed)

      call messages_not_implemented('batch_set_points_accel for non-CL batches')


    case (batch_device_packed)


      tsize = types_get_size(this%type())/types_get_size(type_float)

      safe_allocate(linear_to_ist(1:this%nst_linear*tsize))

      safe_allocate(linear_to_idim(1:this%nst_linear*tsize))

      do ii = 1, this%nst_linear

        do it = 1, tsize

          linear_to_ist(tsize*(ii-1)+it) = tsize*(this%linear_to_ist(ii) - 1) + it - 1

          linear_to_idim(tsize*(ii-1)+it) = this%linear_to_idim(ii) - 1

        end do

      end do


      call accel_create_buffer(buff_linear_to_ist, accel_mem_read_only, type_integer, this%nst_linear*tsize)

      call accel_write_buffer(buff_linear_to_ist, this%nst_linear*tsize, linear_to_ist)

      call accel_create_buffer(buff_linear_to_idim, accel_mem_read_only, type_integer, this%nst_linear*tsize)

      call accel_write_buffer(buff_linear_to_idim, this%nst_linear*tsize, linear_to_idim)


      call accel_kernel_start_call(kernel, 'points.cl', 'set_points')


      call accel_set_kernel_arg(kernel, 0, sp)

      call accel_set_kernel_arg(kernel, 1, ep)

      call accel_set_kernel_arg(kernel, 2, buff_linear_to_ist)

      call accel_set_kernel_arg(kernel, 3, buff_linear_to_idim)

      call accel_set_kernel_arg(kernel, 4, this%nst_linear*tsize)

      call accel_set_kernel_arg(kernel, 5, psi)

      call accel_set_kernel_arg(kernel, 6, ldpsi1*tsize)

      call accel_set_kernel_arg(kernel, 7, ldpsi2)

      call accel_set_kernel_arg(kernel, 8, this%ff_device)

      call accel_set_kernel_arg(kernel, 9, int(this%pack_size_real(1), int32))


      call accel_kernel_run(kernel, (/this%pack_size_real(1), int(ep - sp + 1, int64)/), (/this%pack_size_real(1), 1_int64/))


      call accel_release_buffer(buff_linear_to_ist)

      call accel_release_buffer(buff_linear_to_idim)

      safe_deallocate_a(linear_to_ist)

      safe_deallocate_a(linear_to_idim)


    end select


    call profiling_out("SET_POINTS")


    pop_sub(batch_set_points_accel)

  end subroutine batch_set_points_accel


  ! -------------------------

  !

  integer pure function batch_points_block_size() result(block_size)


    block_size = 61440


  end function batch_points_block_size


! -------------------------

  subroutine batch_add_with_map_cpu(np, map, xx, yy, zz)

    integer,           intent(in)    :: np

    integer,           intent(in)    :: map(:)

    class(batch_t),    intent(in)    :: xx

    class(batch_t),    intent(in)    :: yy

    class(batch_t),    intent(inout) :: zz

    type(accel_mem_t) :: buff_map


    push_sub(batch_add_with_map_cpu)


    if (xx%status() /= batch_device_packed) then

      if (xx%type() == type_float) then

        call dbatch_add_with_map(np, map, xx, yy, zz)

      else

        call zbatch_add_with_map(np, map, xx, yy, zz)

      end if

    else

      ! copy map to GPU if not already there

      call accel_create_buffer(buff_map, accel_mem_read_only, type_integer, np)

      call accel_write_buffer(buff_map, np, map)

      call batch_add_with_map_accel(np, buff_map, xx, yy, zz)

      call accel_release_buffer(buff_map)

    end if


    pop_sub(batch_add_with_map_cpu)

  end subroutine batch_add_with_map_cpu


! -------------------------

  subroutine batch_add_with_map_accel(np, map, xx, yy, zz)

    integer,            intent(in)    :: np

    class(accel_mem_t), intent(in)    :: map

    class(batch_t),     intent(in)    :: xx

    class(batch_t),     intent(in)    :: yy

    class(batch_t),     intent(inout) :: zz


    type(accel_kernel_t), save :: kernel

    integer(int64) :: localsize, dim3, dim2


    push_sub(batch_add_with_map_accel)


    call accel_kernel_start_call(kernel, 'copy.cl', 'add_with_map')


    call accel_set_kernel_arg(kernel, 0, np)

    call accel_set_kernel_arg(kernel, 1, map)

    call accel_set_kernel_arg(kernel, 2, xx%ff_device)

    call accel_set_kernel_arg(kernel, 3, log2(int(xx%pack_size_real(1), int32)))

    call accel_set_kernel_arg(kernel, 4, yy%ff_device)

    call accel_set_kernel_arg(kernel, 5, log2(int(yy%pack_size_real(1), int32)))

    call accel_set_kernel_arg(kernel, 6, zz%ff_device)

    call accel_set_kernel_arg(kernel, 7, log2(int(zz%pack_size_real(1), int32)))


    localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size_real(1)


    dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1

    dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))


    call accel_kernel_run(kernel, (/xx%pack_size_real(1), dim2, dim3/), (/xx%pack_size_real(1), localsize, 1_int64/))


    pop_sub(batch_add_with_map_accel)

  end subroutine batch_add_with_map_accel


! -------------------------

  subroutine batch_copy_with_map_cpu(np, map, xx, yy)

    integer,           intent(in)    :: np

    integer,           intent(in)    :: map(:)

    class(batch_t),    intent(in)    :: xx

    class(batch_t),    intent(inout) :: yy

    type(accel_mem_t) :: buff_map


    push_sub(batch_copy_with_map_cpu)


    if (xx%status() /= batch_device_packed) then

      if (xx%type() == type_float) then

        call dbatch_copy_with_map(np, map, xx, yy)

      else

        call zbatch_copy_with_map(np, map, xx, yy)

      end if

    else

      ! copy map to GPU if not already there

      call accel_create_buffer(buff_map, accel_mem_read_only, type_integer, np)

      call accel_write_buffer(buff_map, np, map)

      call batch_copy_with_map_accel(np, buff_map, xx, yy)

      call accel_release_buffer(buff_map)

    end if


    pop_sub(batch_copy_with_map_cpu)

  end subroutine batch_copy_with_map_cpu


! -------------------------

  subroutine batch_copy_with_map_accel(np, map, xx, yy)

    integer,            intent(in)    :: np

    class(accel_mem_t), intent(in)    :: map

    class(batch_t),     intent(in)    :: xx

    class(batch_t),     intent(inout) :: yy


    type(accel_kernel_t), save :: kernel

    integer(int64) :: localsize, dim3, dim2


    push_sub(batch_copy_with_map_accel)


    call accel_kernel_start_call(kernel, 'copy.cl', 'copy_with_map')


    ! execute only if map has at least one element

    if (np > 0) then

      call accel_set_kernel_arg(kernel, 0, np)

      call accel_set_kernel_arg(kernel, 1, map)

      call accel_set_kernel_arg(kernel, 2, xx%ff_device)

      call accel_set_kernel_arg(kernel, 3, log2(int(xx%pack_size_real(1), int32)))

      call accel_set_kernel_arg(kernel, 4, yy%ff_device)

      call accel_set_kernel_arg(kernel, 5, log2(int(yy%pack_size_real(1), int32)))


      localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size_real(1)


      dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1

      dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))


      call accel_kernel_run(kernel, (/xx%pack_size_real(1), dim2, dim3/), (/xx%pack_size_real(1), localsize, 1_int64/))

    end if


    pop_sub(batch_copy_with_map_accel)

  end subroutine batch_copy_with_map_accel


  ! -------------------------

  !

  subroutine batch_split_complex(np, xx, yy, zz)

    integer,           intent(in)    :: np

    class(batch_t),    intent(in)    :: xx

    class(batch_t),    intent(inout) :: yy

    class(batch_t),    intent(inout) :: zz


    integer :: ist_linear, ip

    type(accel_kernel_t), save :: kernel

    integer(int64) :: localsize, dim3, dim2


    push_sub(batch_split_complex)


    assert(xx%type() == type_cmplx)

    assert(yy%type() == type_float)

    assert(zz%type() == type_float)

    assert(xx%status() == yy%status())

    assert(xx%status() == zz%status())


    select case (xx%status())

    case (batch_not_packed)

      do ist_linear = 1, xx%nst_linear

        !$omp parallel do schedule(static)

        do ip = 1, np

          yy%dff_linear(ip, ist_linear) = real(xx%zff_linear(ip, ist_linear), real64)

          zz%dff_linear(ip, ist_linear) = aimag(xx%zff_linear(ip, ist_linear))

        end do

      end do

    case (batch_packed)

      !$omp parallel do private(ist_linear) schedule(static)

      do ip = 1, np

        do ist_linear = 1, xx%nst_linear

          yy%dff_pack(ist_linear, ip) = real(xx%zff_pack(ist_linear, ip), real64)

          zz%dff_pack(ist_linear, ip) = aimag(xx%zff_pack(ist_linear, ip))

        end do

      end do

    case (batch_device_packed)

      call accel_kernel_start_call(kernel, 'split.cl', 'split_complex')


      call accel_set_kernel_arg(kernel, 0, int(xx%pack_size(2), int32))

      call accel_set_kernel_arg(kernel, 1, xx%ff_device)

      call accel_set_kernel_arg(kernel, 2, log2(int(xx%pack_size(1), int32)))

      call accel_set_kernel_arg(kernel, 3, yy%ff_device)

      call accel_set_kernel_arg(kernel, 4, log2(int(yy%pack_size(1), int32)))

      call accel_set_kernel_arg(kernel, 5, zz%ff_device)

      call accel_set_kernel_arg(kernel, 6, log2(int(zz%pack_size(1), int32)))


      localsize = accel_kernel_workgroup_size(kernel)/xx%pack_size(1)


      dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1

      dim2 = min(accel_max_size_per_dim(2)*localsize, pad(np, localsize))


      call accel_kernel_run(kernel, (/xx%pack_size(1), dim2, dim3/), (/xx%pack_size(1), localsize, 1_int64/))

    end select


    pop_sub(batch_split_complex)

  end subroutine batch_split_complex


#include "real.F90"

#include "batch_ops_inc.F90"

#include "undef.F90"


#include "complex.F90"

#include "batch_ops_inc.F90"

#include "undef.F90"


end module batch_ops_oct_m


!! Local Variables:

!! mode: f90

!! coding: utf-8

!! End:

accel_oct_m::accel_create_buffer
Definition: accel.F90:312

accel_oct_m::accel_kernel_run
Definition: accel.F90:316

accel_oct_m::accel_set_buffer_to_zero
Definition: accel.F90:320

accel_oct_m::accel_set_kernel_arg
Definition: accel.F90:351

accel_oct_m::accel_write_buffer
Definition: accel.F90:324

batch_ops_oct_m::batch_add_with_map
Definition: batch_ops.F90:179

batch_ops_oct_m::batch_axpy
batchified version of the BLAS axpy routine:
Definition: batch_ops.F90:156

batch_ops_oct_m::batch_copy_with_map
Definition: batch_ops.F90:184

batch_ops_oct_m::batch_get_points
Definition: batch_ops.F90:221

batch_ops_oct_m::batch_get_state
Definition: batch_ops.F90:212

batch_ops_oct_m::batch_mul
Definition: batch_ops.F90:233

batch_ops_oct_m::batch_scal
scale a batch by a constant or vector
Definition: batch_ops.F90:164

batch_ops_oct_m::batch_set_points
Definition: batch_ops.F90:227

batch_ops_oct_m::batch_set_state
There are several ways how to call batch_set_state and batch_get_state:
Definition: batch_ops.F90:203

batch_ops_oct_m::batch_xpay
batchified version of
Definition: batch_ops.F90:172

global_oct_m::optional_default
Definition: global.F90:275

log2
double log2(double __x) __attribute__((__nothrow__

accel_oct_m
Definition: accel.F90:116

accel_oct_m::accel_kernel_start_call
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1375

accel_oct_m::accel_release_buffer
subroutine, public accel_release_buffer(this, async)
Definition: accel.F90:918

accel_oct_m::accel_mem_read_only
integer, parameter, public accel_mem_read_only
Definition: accel.F90:195

batch_oct_m
This module implements batches of mesh functions.
Definition: batch.F90:135

batch_oct_m::batch_not_packed
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286

batch_oct_m::batch_device_packed
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286

batch_oct_m::batch_packed
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286

batch_ops_oct_m
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118

batch_ops_oct_m::batch_copy_with_map_accel
subroutine batch_copy_with_map_accel(np, map, xx, yy)
Definition: batch_ops.F90:552

batch_ops_oct_m::zbatch_copy_with_map_to_array
subroutine, public zbatch_copy_with_map_to_array(np, map, xx, array)
Transfer a batch from the mesh to an array on the submesh (defined by a map)
Definition: batch_ops.F90:3573

batch_ops_oct_m::dbatch_ax_function_py
subroutine, public dbatch_ax_function_py(np, aa, psi, yy)
This routine performs a set of axpy operations adding the same function psi to all functions of a bat...
Definition: batch_ops.F90:1040

batch_ops_oct_m::dbatch_set_state1
subroutine dbatch_set_state1(this, ist, np, psi)
Write a single state with np points into a batch at position ist.
Definition: batch_ops.F90:1466

batch_ops_oct_m::dbatch_get_state3
subroutine dbatch_get_state3(this, ii, np, psi)
Definition: batch_ops.F90:1742

batch_ops_oct_m::dbatch_axpy_const
subroutine dbatch_axpy_const(np, aa, xx, yy)
This routine applies a 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:695

batch_ops_oct_m::dbatch_mul
subroutine dbatch_mul(np, ff, xx, yy)
multiply all functions in a batch pointwise by a given mesh function ff
Definition: batch_ops.F90:1928

batch_ops_oct_m::zbatch_get_state2
subroutine zbatch_get_state2(this, index, np, psi)
Definition: batch_ops.F90:3222

batch_ops_oct_m::zbatch_get_points
subroutine zbatch_get_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:3259

batch_ops_oct_m::dbatch_get_state2
subroutine dbatch_get_state2(this, index, np, psi)
Definition: batch_ops.F90:1725

batch_ops_oct_m::dbatch_xpay_vec
subroutine dbatch_xpay_vec(np, xx, aa, yy, a_start, a_full)
calculate yy(ist,:) = xx(ist,:) + aa(ist)*yy(ist,:) for a batch
Definition: batch_ops.F90:1295

batch_ops_oct_m::dbatch_scal_vec
subroutine dbatch_scal_vec(np, aa, xx, a_start, a_full)
scale all functions in a batch by state dependent constant
Definition: batch_ops.F90:1159

batch_ops_oct_m::dbatch_set_state2
subroutine dbatch_set_state2(this, index, np, psi)
Write a single state with np points into a batch at position defined by index.
Definition: batch_ops.F90:1575

batch_ops_oct_m::dbatch_copy_with_map
subroutine dbatch_copy_with_map(np, map, xx, yy)
Definition: batch_ops.F90:2079

batch_ops_oct_m::dbatch_copy_with_map_to_array
subroutine, public dbatch_copy_with_map_to_array(np, map, xx, array)
Transfer a batch from the mesh to an array on the submesh (defined by a map)
Definition: batch_ops.F90:2119

batch_ops_oct_m::dbatch_get_state1
subroutine dbatch_get_state1(this, ist, np, psi)
Write a get of state with np points from a batch.
Definition: batch_ops.F90:1613

batch_ops_oct_m::dbatch_axpy_vec
subroutine dbatch_axpy_vec(np, aa, xx, yy, a_start, a_full)
This routine applies an 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:784

batch_ops_oct_m::dbatch_set_state3
subroutine dbatch_set_state3(this, ii, np, psi)
Write a set of state with np points into a batch.
Definition: batch_ops.F90:1593

batch_ops_oct_m::zbatch_axpy_const
subroutine zbatch_axpy_const(np, aa, xx, yy)
This routine applies a 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:2241

batch_ops_oct_m::zbatch_scal_const
subroutine zbatch_scal_const(np, aa, xx)
scale all functions in a batch by constant aa
Definition: batch_ops.F90:2652

batch_ops_oct_m::zbatch_mul
subroutine zbatch_mul(np, ff, xx, yy)
multiply all functions in a batch pointwise by a given mesh function ff
Definition: batch_ops.F90:3426

batch_ops_oct_m::zbatch_xpay_const
subroutine zbatch_xpay_const(np, xx, aa, yy)
calculate yy(ist) = xx(ist) + aa*yy(ist) for a batch
Definition: batch_ops.F90:2929

batch_ops_oct_m::batch_split_complex
subroutine, public batch_split_complex(np, xx, yy, zz)
extract the real and imaginary parts of a complex batch
Definition: batch_ops.F90:590

batch_ops_oct_m::batch_add_with_map_accel
subroutine batch_add_with_map_accel(np, map, xx, yy, zz)
Definition: batch_ops.F90:491

batch_ops_oct_m::dbatch_set_points
subroutine dbatch_set_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:1842

batch_ops_oct_m::zbatch_copy_with_map
subroutine zbatch_copy_with_map(np, map, xx, yy)
Definition: batch_ops.F90:3533

batch_ops_oct_m::batch_set_zero
subroutine, public batch_set_zero(this, np, async)
fill all mesh functions of the batch with zero
Definition: batch_ops.F90:244

batch_ops_oct_m::zbatch_ax_function_py
subroutine, public zbatch_ax_function_py(np, aa, psi, yy)
This routine performs a set of axpy operations adding the same function psi to all functions of a bat...
Definition: batch_ops.F90:2569

batch_ops_oct_m::zbatch_axpy_vec
subroutine zbatch_axpy_vec(np, aa, xx, yy, a_start, a_full)
This routine applies an 'pair-wise' axpy operation to all functions of the batches xx and yy,...
Definition: batch_ops.F90:2330

batch_ops_oct_m::batch_set_points_accel
subroutine batch_set_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
GPU version of batch_set_points.
Definition: batch_ops.F90:387

batch_ops_oct_m::dbatch_xpay_const
subroutine dbatch_xpay_const(np, xx, aa, yy)
calculate yy(ist) = xx(ist) + aa*yy(ist) for a batch
Definition: batch_ops.F90:1434

batch_ops_oct_m::dbatch_get_points
subroutine dbatch_get_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:1762

batch_ops_oct_m::batch_points_block_size
integer pure function, public batch_points_block_size()
determine the device block size
Definition: batch_ops.F90:456

batch_ops_oct_m::dbatch_add_with_map
subroutine dbatch_add_with_map(np, map, xx, yy, zz)
Definition: batch_ops.F90:2037

batch_ops_oct_m::zbatch_xpay_vec
subroutine zbatch_xpay_vec(np, xx, aa, yy, a_start, a_full)
calculate yy(ist,:) = xx(ist,:) + aa(ist)*yy(ist,:) for a batch
Definition: batch_ops.F90:2807

batch_ops_oct_m::zbatch_get_state1
subroutine zbatch_get_state1(this, ist, np, psi)
Write a get of state with np points from a batch.
Definition: batch_ops.F90:3095

batch_ops_oct_m::zbatch_set_state1
subroutine zbatch_set_state1(this, ist, np, psi)
Write a single state with np points into a batch at position ist.
Definition: batch_ops.F90:2961

batch_ops_oct_m::batch_add_with_map_cpu
subroutine batch_add_with_map_cpu(np, map, xx, yy, zz)
Definition: batch_ops.F90:463

batch_ops_oct_m::zbatch_scal_vec
subroutine zbatch_scal_vec(np, aa, xx, a_start, a_full)
scale all functions in a batch by state dependent constant
Definition: batch_ops.F90:2688

batch_ops_oct_m::zbatch_set_state2
subroutine zbatch_set_state2(this, index, np, psi)
Write a single state with np points into a batch at position defined by index.
Definition: batch_ops.F90:3057

batch_ops_oct_m::zbatch_axpy_function
subroutine, public zbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:2464

batch_ops_oct_m::zbatch_set_points
subroutine zbatch_set_points(this, sp, ep, psi)
copy a set of points into a mesh function
Definition: batch_ops.F90:3351

batch_ops_oct_m::zbatch_add_with_map
subroutine zbatch_add_with_map(np, map, xx, yy, zz)
Definition: batch_ops.F90:3491

batch_ops_oct_m::dbatch_axpy_function
subroutine, public dbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:935

batch_ops_oct_m::zbatch_get_state3
subroutine zbatch_get_state3(this, ii, np, psi)
Definition: batch_ops.F90:3239

batch_ops_oct_m::batch_get_points_accel
subroutine batch_get_points_accel(this, sp, ep, psi, ldpsi1, ldpsi2)
GPU version of batch_get_points.
Definition: batch_ops.F90:320

batch_ops_oct_m::zbatch_set_state3
subroutine zbatch_set_state3(this, ii, np, psi)
Write a set of state with np points into a batch.
Definition: batch_ops.F90:3075

batch_ops_oct_m::batch_copy_with_map_cpu
subroutine batch_copy_with_map_cpu(np, map, xx, yy)
Definition: batch_ops.F90:525

batch_ops_oct_m::dbatch_scal_const
subroutine dbatch_scal_const(np, aa, xx)
scale all functions in a batch by constant aa
Definition: batch_ops.F90:1123

blas_oct_m
This module contains interfaces for BLAS routines You should not use these routines directly....
Definition: blas.F90:120

debug_oct_m
Definition: debug.F90:116

global_oct_m
Definition: global.F90:116

global_oct_m::m_zero
real(real64), parameter, public m_zero
Definition: global.F90:190

global_oct_m::not_in_openmp
logical pure function, public not_in_openmp()
Definition: global.F90:459

global_oct_m::m_z0
complex(real64), parameter, public m_z0
Definition: global.F90:200

lalg_basic_oct_m
Definition: lalg_basic.F90:116

math_oct_m
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117

messages_oct_m
Definition: messages.F90:117

messages_oct_m::messages_not_implemented
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1097

messages_oct_m::message
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162

messages_oct_m::messages_fatal
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:416

profiling_oct_m
Definition: profiling.F90:118

profiling_oct_m::profiling_out
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:625

profiling_oct_m::profiling_in
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554

types_oct_m
Definition: types.F90:116

types_oct_m::type_float
type(type_t), public type_float
Definition: types.F90:135

types_oct_m::type_integer
type(type_t), public type_integer
Definition: types.F90:137

types_oct_m::types_get_size
integer pure function, public types_get_size(this)
Definition: types.F90:154

accel_oct_m::accel_kernel_t
Definition: accel.F90:260

accel_oct_m::accel_mem_t
Definition: accel.F90:247

batch_oct_m::batch_t
Class defining batches of mesh functions.
Definition: batch.F90:161