Octopus
lalg_basic.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
22 use blas_oct_m
23 use debug_oct_m
24 use global_oct_m
25 use, intrinsic :: iso_fortran_env
28 use utils_oct_m
29
30 implicit none
31
32 private
33 public :: &
34 lalg_swap, &
35 lalg_scal, &
36 lalg_axpy, &
37 lalg_copy, &
38 lalg_nrm2, &
39 lalg_symv, &
40 lalg_gemv, &
41 lalg_gemm, &
45 lalg_trmm, &
47 ! ------------------------------------------------------------------
48 ! BLAS level I
49 ! ------------------------------------------------------------------
50
52 interface lalg_swap
53 module procedure swap_1_2
54 module procedure swap_2_2
55 module procedure swap_3_2
56 module procedure swap_4_2
57 module procedure swap_1_4
58 module procedure swap_2_4
59 module procedure swap_3_4
60 module procedure swap_4_4
61 end interface lalg_swap
62
64 interface lalg_scal
65 module procedure scal_1_2
66 module procedure scal_2_2
67 module procedure scal_3_2
68 module procedure scal_4_2
69 module procedure scal_1_4
70 module procedure scal_2_4
71 module procedure scal_3_4
72 module procedure scal_4_4
73 module procedure scal_5_4
74 module procedure scal_6_4
75 end interface lalg_scal
76
78 interface lalg_axpy
79 module procedure axpy_1_2
80 module procedure axpy_2_2
81 module procedure axpy_3_2
82 module procedure axpy_4_2
83 module procedure axpy_1_4
84 module procedure axpy_2_4
85 module procedure axpy_3_4
86 module procedure axpy_4_4
87 module procedure axpy_5_4
88 module procedure axpy_6_4
89 module procedure axpy_7_4
90 end interface lalg_axpy
91
93 interface lalg_copy
94 module procedure copy_1_2
95 module procedure copy_2_2
96 module procedure copy_3_2
97 module procedure copy_4_2
98 module procedure copy_1_4
99 module procedure copy_2_4
100 module procedure copy_3_4
101 module procedure copy_4_4
102 end interface lalg_copy
103
105 interface lalg_nrm2
106 module procedure nrm2_2
107 module procedure nrm2_4
108 end interface lalg_nrm2
109
110 ! ------------------------------------------------------------------
111 ! BLAS level II
112 ! ------------------------------------------------------------------
113
115 interface lalg_symv
116 module procedure symv_1_2
117 module procedure symv_1_4
118 module procedure symv_2_2
119 module procedure symv_2_4
120 end interface lalg_symv
121
122 interface lalg_gemv
123 module procedure gemv_1_2
124 module procedure gemv_1_4
125 module procedure gemv_2_2
126 module procedure gemv_2_4
127 end interface lalg_gemv
128
129 ! ------------------------------------------------------------------
130 ! BLAS level III
131 ! ------------------------------------------------------------------
132
134 interface lalg_gemm
135 module procedure gemm_1_2
136 module procedure gemm_1_4
137 module procedure gemm_2_2
138 module procedure gemm_2_4
139 module procedure dgemm_simple
140 end interface lalg_gemm
141
143 interface lalg_gemm_cn
144 module procedure gemm_cn_1_2
145 module procedure gemm_cn_1_4
146 module procedure gemm_cn_2_2
147 module procedure gemm_cn_2_4
148 end interface lalg_gemm_cn
149
151 interface lalg_gemm_nc
152 module procedure gemm_nc_1_2
153 module procedure gemm_nc_1_4
154 module procedure gemm_nc_2_2
155 module procedure gemm_nc_2_4
156 end interface lalg_gemm_nc
159 interface lalg_gemm_cc
160 module procedure gemm_cc_1_2
161 module procedure gemm_cc_1_4
162 end interface lalg_gemm_cc
163
166 interface lalg_symm
167 module procedure symm_1_2
168 module procedure symm_1_4
169 end interface lalg_symm
170
172 interface lalg_trmm
173 module procedure trmm_1_2
174 module procedure trmm_1_4
175 end interface lalg_trmm
176
177contains
178
192 subroutine dgemm_simple(a, b, c, transa, transb, alpha, beta)
193 real(real64), contiguous, intent(in ) :: a(:,:)
194 real(real64), contiguous, intent(in ) :: b(:,:)
195 real(real64), contiguous, intent(inout) :: c(:,:)
196 ! zeroed on input, as \p beta is 0.
197 character(len=1), optional, intent(in ) :: transa, transb
198 real(real64), optional, intent(in ) :: alpha
199 real(real64), optional, intent(in ) :: beta
200 ! For example, 1.0 allows addition of the MM product to an input \p c.
201 ! The default, 0.0, defines \p c as the MM product.
202
203 integer :: m, k, l, n
204 character(len=1) :: ta, tb
205 real(real64) :: p_alpha, p_beta
206
207 push_sub(dgemm_simple)
209 ta = 'N'
210 tb = 'N'
211 if (present(transa)) ta = transa
212 if (present(transb)) tb = transb
213
214 p_alpha = optional_default(alpha, 1.0_real64)
215 p_beta = optional_default(beta, 0.0_real64)
216
217 if (ta == 'n' .or. ta == 'N') then
218 m = size(a, 1)
219 k = size(a, 2)
220 else
221 m = size(a, 2)
222 k = size(a, 1)
223 end if
224 if (tb == 'n' .or. tb == 'N') then
225 l = size(b, 1)
226 n = size(b, 2)
227 else
228 l = size(b, 2)
229 n = size(b, 1)
230 end if
231 assert(size(c, 1) == m)
232 assert(size(c, 2) == n)
233 assert(k == l)
234
235 call blas_gemm(ta, tb, m, n, k, p_alpha, a(1, 1), lead_dim(a), &
236 b(1, 1), lead_dim(b), p_beta, c(1, 1), lead_dim(c))
237
238 pop_sub(dgemm_simple)
239
240 end subroutine dgemm_simple
241
242# define N_ARG_TYPES 2
243# include "lalg_basic_blas_inc.F90"
244# undef N_ARG_TYPES
245
246# define N_ARG_TYPES 4
247# include "lalg_basic_blas_inc.F90"
248# undef N_ARG_TYPES
249
250end module lalg_basic_oct_m
251
253!! Local Variables:
254!! mode: f90
255!! coding: utf-8
256!! End:
--------------— gemm ---------------— performs one of the matrix-matrix operations
Definition: blas.F90:363
constant times a vector plus a vector
Definition: lalg_basic.F90:171
Copies a vector x, to a vector y.
Definition: lalg_basic.F90:186
lalg_gemm with both the (Hermitian) transpose of A and B.
Definition: lalg_basic.F90:252
The same as above but with (Hermitian) transpose of A.
Definition: lalg_basic.F90:236
The same as lalg_gemm but with (Hermitian) transpose of B.
Definition: lalg_basic.F90:244
Matrix-matrix multiplication plus matrix.
Definition: lalg_basic.F90:227
Returns the euclidean norm of a vector.
Definition: lalg_basic.F90:198
scales a vector by a constant
Definition: lalg_basic.F90:157
The following matrix multiplications all expect upper triangular matrices for a. For real matrices,...
Definition: lalg_basic.F90:259
Matrix-vector multiplication plus vector.
Definition: lalg_basic.F90:208
Matrix-matrix multiplication.
Definition: lalg_basic.F90:265
This module contains interfaces for BLAS routines You should not use these routines directly....
Definition: blas.F90:118
subroutine symv_2_4(n1, n2, alpha, a, x, beta, y)
subroutine gemm_cc_1_2(m, n, k, alpha, a, b, beta, c)
GEMM when both a and b are (Hermitian) transposes.
subroutine trmm_1_2(m, n, uplo, transa, side, alpha, a, b)
subroutine axpy_1_4(n1, da, dx, dy)
subroutine axpy_4_2(n1, n2, n3, n4, da, dx, dy)
Definition: lalg_basic.F90:644
subroutine swap_4_2(n1, n2, n3, n4, dx, dy)
Definition: lalg_basic.F90:432
subroutine copy_2_4(n1, n2, dx, dy)
subroutine swap_2_2(n1, n2, dx, dy)
Definition: lalg_basic.F90:385
subroutine gemm_1_2(m, n, k, alpha, a, b, beta, c)
Definition: lalg_basic.F90:910
subroutine gemm_2_4(m1, m2, n, k, alpha, a, b, beta, c)
subroutine axpy_6_4(n1, n2, da, dx, dy)
subroutine axpy_2_2(n1, n2, da, dx, dy)
Definition: lalg_basic.F90:573
subroutine trmm_1_4(m, n, uplo, transa, side, alpha, a, b)
subroutine swap_3_2(n1, n2, n3, dx, dy)
Definition: lalg_basic.F90:411
subroutine copy_3_2(n1, n2, n3, dx, dy)
Definition: lalg_basic.F90:736
subroutine scal_5_4(n1, da, dx)
subroutine copy_4_2(n1, n2, n3, n4, dx, dy)
Definition: lalg_basic.F90:761
subroutine swap_3_4(n1, n2, n3, dx, dy)
subroutine gemm_nc_2_2(m1, m2, n1, n2, k, alpha, a, b, beta, c)
subroutine scal_2_4(n1, n2, da, dx)
subroutine swap_4_4(n1, n2, n3, n4, dx, dy)
subroutine gemm_cn_1_2(m, n, k, alpha, a, b, beta, c)
The same as above but with (Hermitian) transpose of a. Note, in this also works when a and b are real...
Definition: lalg_basic.F90:957
subroutine gemm_cn_2_4(m1, m2, n1, n2, k, alpha, a, b, beta, c)
subroutine scal_1_2(n1, da, dx)
Definition: lalg_basic.F90:459
subroutine scal_4_4(n1, n2, n3, n4, da, dx)
subroutine swap_1_2(n1, dx, dy)
Definition: lalg_basic.F90:368
subroutine axpy_3_4(n1, n2, n3, da, dx, dy)
subroutine swap_1_4(n1, dx, dy)
subroutine axpy_4_4(n1, n2, n3, n4, da, dx, dy)
subroutine gemv_2_2(m1, m2, n, alpha, a, x, beta, y)
Definition: lalg_basic.F90:879
subroutine dgemm_simple(a, b, c, transa, transb, alpha, beta)
GEMM with a simplified API for two matrices of consistent shape and type.
Definition: lalg_basic.F90:286
subroutine scal_6_4(n1, n2, da, dx)
subroutine gemm_2_2(m1, m2, n, k, alpha, a, b, beta, c)
Definition: lalg_basic.F90:931
subroutine gemm_cc_1_4(m, n, k, alpha, a, b, beta, c)
GEMM when both a and b are (Hermitian) transposes.
subroutine scal_1_4(n1, da, dx)
subroutine copy_1_2(n1, dx, dy)
Definition: lalg_basic.F90:682
subroutine gemv_2_4(m1, m2, n, alpha, a, x, beta, y)
subroutine gemm_cn_2_2(m1, m2, n1, n2, k, alpha, a, b, beta, c)
Definition: lalg_basic.F90:978
subroutine scal_2_2(n1, n2, da, dx)
Definition: lalg_basic.F90:476
subroutine axpy_5_4(n1, da, dx, dy)
subroutine copy_2_2(n1, n2, dx, dy)
Definition: lalg_basic.F90:705
subroutine axpy_7_4(n1, n2, n3, da, dx, dy)
subroutine axpy_3_2(n1, n2, n3, da, dx, dy)
Definition: lalg_basic.F90:612
subroutine scal_3_4(n1, n2, n3, da, dx)
subroutine gemm_nc_2_4(m1, m2, n1, n2, k, alpha, a, b, beta, c)
subroutine axpy_1_2(n1, da, dx, dy)
Definition: lalg_basic.F90:544
real(real64) function nrm2_4(n, dx)
subroutine symv_1_2(n, alpha, a, x, beta, y)
Definition: lalg_basic.F90:820
subroutine gemm_1_4(m, n, k, alpha, a, b, beta, c)
subroutine gemv_1_2(m, n, alpha, a, x, beta, y)
Definition: lalg_basic.F90:860
subroutine copy_1_4(n1, dx, dy)
subroutine symm_1_4(m, n, side, alpha, a, b, beta, c)
The following matrix multiplications all expect upper triangular matrices for a. For real matrices,...
subroutine gemm_nc_1_2(m, n, k, alpha, a, b, beta, c)
The same as gemm but with (Hermitian) transpose of b. Note, in this also works when a and b are real.
subroutine symv_2_2(n1, n2, alpha, a, x, beta, y)
Definition: lalg_basic.F90:838
subroutine swap_2_4(n1, n2, dx, dy)
subroutine copy_4_4(n1, n2, n3, n4, dx, dy)
real(real64) function nrm2_2(n, dx)
Definition: lalg_basic.F90:792
subroutine gemm_cn_1_4(m, n, k, alpha, a, b, beta, c)
The same as above but with (Hermitian) transpose of a. Note, in this also works when a and b are real...
subroutine gemm_nc_1_4(m, n, k, alpha, a, b, beta, c)
The same as gemm but with (Hermitian) transpose of b. Note, in this also works when a and b are real.
subroutine symm_1_2(m, n, side, alpha, a, b, beta, c)
The following matrix multiplications all expect upper triangular matrices for a. For real matrices,...
subroutine axpy_2_4(n1, n2, da, dx, dy)
subroutine gemv_1_4(m, n, alpha, a, x, beta, y)
subroutine copy_3_4(n1, n2, n3, dx, dy)
subroutine scal_4_2(n1, n2, n3, n4, da, dx)
Definition: lalg_basic.F90:520
subroutine symv_1_4(n, alpha, a, x, beta, y)
subroutine scal_3_2(n1, n2, n3, da, dx)
Definition: lalg_basic.F90:501
This module is intended to contain simple general-purpose utility functions and procedures.
Definition: utils.F90:118