ergo
template_blas_symm.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_BLAS_SYMM_HEADER
36 #define TEMPLATE_BLAS_SYMM_HEADER
37 
38 
39 template<class Treal>
40 int template_blas_symm(const char *side, const char *uplo, const integer *m, const integer *n,
41  const Treal *alpha, const Treal *a, const integer *lda, const Treal *b,
42  const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
43 {
44  /* System generated locals */
45  integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
46  i__3;
47  /* Local variables */
48  integer info;
49  Treal temp1, temp2;
50  integer i__, j, k;
51  integer nrowa;
52  logical upper;
53 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
54 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
55 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
56 /* Purpose
57  =======
58  DSYMM performs one of the matrix-matrix operations
59  C := alpha*A*B + beta*C,
60  or
61  C := alpha*B*A + beta*C,
62  where alpha and beta are scalars, A is a symmetric matrix and B and
63  C are m by n matrices.
64  Parameters
65  ==========
66  SIDE - CHARACTER*1.
67  On entry, SIDE specifies whether the symmetric matrix A
68  appears on the left or right in the operation as follows:
69  SIDE = 'L' or 'l' C := alpha*A*B + beta*C,
70  SIDE = 'R' or 'r' C := alpha*B*A + beta*C,
71  Unchanged on exit.
72  UPLO - CHARACTER*1.
73  On entry, UPLO specifies whether the upper or lower
74  triangular part of the symmetric matrix A is to be
75  referenced as follows:
76  UPLO = 'U' or 'u' Only the upper triangular part of the
77  symmetric matrix is to be referenced.
78  UPLO = 'L' or 'l' Only the lower triangular part of the
79  symmetric matrix is to be referenced.
80  Unchanged on exit.
81  M - INTEGER.
82  On entry, M specifies the number of rows of the matrix C.
83  M must be at least zero.
84  Unchanged on exit.
85  N - INTEGER.
86  On entry, N specifies the number of columns of the matrix C.
87  N must be at least zero.
88  Unchanged on exit.
89  ALPHA - DOUBLE PRECISION.
90  On entry, ALPHA specifies the scalar alpha.
91  Unchanged on exit.
92  A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
93  m when SIDE = 'L' or 'l' and is n otherwise.
94  Before entry with SIDE = 'L' or 'l', the m by m part of
95  the array A must contain the symmetric matrix, such that
96  when UPLO = 'U' or 'u', the leading m by m upper triangular
97  part of the array A must contain the upper triangular part
98  of the symmetric matrix and the strictly lower triangular
99  part of A is not referenced, and when UPLO = 'L' or 'l',
100  the leading m by m lower triangular part of the array A
101  must contain the lower triangular part of the symmetric
102  matrix and the strictly upper triangular part of A is not
103  referenced.
104  Before entry with SIDE = 'R' or 'r', the n by n part of
105  the array A must contain the symmetric matrix, such that
106  when UPLO = 'U' or 'u', the leading n by n upper triangular
107  part of the array A must contain the upper triangular part
108  of the symmetric matrix and the strictly lower triangular
109  part of A is not referenced, and when UPLO = 'L' or 'l',
110  the leading n by n lower triangular part of the array A
111  must contain the lower triangular part of the symmetric
112  matrix and the strictly upper triangular part of A is not
113  referenced.
114  Unchanged on exit.
115  LDA - INTEGER.
116  On entry, LDA specifies the first dimension of A as declared
117  in the calling (sub) program. When SIDE = 'L' or 'l' then
118  LDA must be at least max( 1, m ), otherwise LDA must be at
119  least max( 1, n ).
120  Unchanged on exit.
121  B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
122  Before entry, the leading m by n part of the array B must
123  contain the matrix B.
124  Unchanged on exit.
125  LDB - INTEGER.
126  On entry, LDB specifies the first dimension of B as declared
127  in the calling (sub) program. LDB must be at least
128  max( 1, m ).
129  Unchanged on exit.
130  BETA - DOUBLE PRECISION.
131  On entry, BETA specifies the scalar beta. When BETA is
132  supplied as zero then C need not be set on input.
133  Unchanged on exit.
134  C - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
135  Before entry, the leading m by n part of the array C must
136  contain the matrix C, except when beta is zero, in which
137  case C need not be set on entry.
138  On exit, the array C is overwritten by the m by n updated
139  matrix.
140  LDC - INTEGER.
141  On entry, LDC specifies the first dimension of C as declared
142  in the calling (sub) program. LDC must be at least
143  max( 1, m ).
144  Unchanged on exit.
145  Level 3 Blas routine.
146  -- Written on 8-February-1989.
147  Jack Dongarra, Argonne National Laboratory.
148  Iain Duff, AERE Harwell.
149  Jeremy Du Croz, Numerical Algorithms Group Ltd.
150  Sven Hammarling, Numerical Algorithms Group Ltd.
151  Set NROWA as the number of rows of A.
152  Parameter adjustments */
153  a_dim1 = *lda;
154  a_offset = 1 + a_dim1 * 1;
155  a -= a_offset;
156  b_dim1 = *ldb;
157  b_offset = 1 + b_dim1 * 1;
158  b -= b_offset;
159  c_dim1 = *ldc;
160  c_offset = 1 + c_dim1 * 1;
161  c__ -= c_offset;
162  /* Function Body */
163  if (template_blas_lsame(side, "L")) {
164  nrowa = *m;
165  } else {
166  nrowa = *n;
167  }
168  upper = template_blas_lsame(uplo, "U");
169 /* Test the input parameters. */
170  info = 0;
171  if (! template_blas_lsame(side, "L") && ! template_blas_lsame(side, "R")) {
172  info = 1;
173  } else if (! upper && ! template_blas_lsame(uplo, "L")) {
174  info = 2;
175  } else if (*m < 0) {
176  info = 3;
177  } else if (*n < 0) {
178  info = 4;
179  } else if (*lda < maxMACRO(1,nrowa)) {
180  info = 7;
181  } else if (*ldb < maxMACRO(1,*m)) {
182  info = 9;
183  } else if (*ldc < maxMACRO(1,*m)) {
184  info = 12;
185  }
186  if (info != 0) {
187  template_blas_erbla("SYMM ", &info);
188  return 0;
189  }
190 /* Quick return if possible. */
191  if (*m == 0 || *n == 0 || ( *alpha == 0. && *beta == 1. ) ) {
192  return 0;
193  }
194 /* And when alpha.eq.zero. */
195  if (*alpha == 0.) {
196  if (*beta == 0.) {
197  i__1 = *n;
198  for (j = 1; j <= i__1; ++j) {
199  i__2 = *m;
200  for (i__ = 1; i__ <= i__2; ++i__) {
201  c___ref(i__, j) = 0.;
202 /* L10: */
203  }
204 /* L20: */
205  }
206  } else {
207  i__1 = *n;
208  for (j = 1; j <= i__1; ++j) {
209  i__2 = *m;
210  for (i__ = 1; i__ <= i__2; ++i__) {
211  c___ref(i__, j) = *beta * c___ref(i__, j);
212 /* L30: */
213  }
214 /* L40: */
215  }
216  }
217  return 0;
218  }
219 /* Start the operations. */
220  if (template_blas_lsame(side, "L")) {
221 /* Form C := alpha*A*B + beta*C. */
222  if (upper) {
223  i__1 = *n;
224  for (j = 1; j <= i__1; ++j) {
225  i__2 = *m;
226  for (i__ = 1; i__ <= i__2; ++i__) {
227  temp1 = *alpha * b_ref(i__, j);
228  temp2 = 0.;
229  i__3 = i__ - 1;
230  for (k = 1; k <= i__3; ++k) {
231  c___ref(k, j) = c___ref(k, j) + temp1 * a_ref(k, i__);
232  temp2 += b_ref(k, j) * a_ref(k, i__);
233 /* L50: */
234  }
235  if (*beta == 0.) {
236  c___ref(i__, j) = temp1 * a_ref(i__, i__) + *alpha *
237  temp2;
238  } else {
239  c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 *
240  a_ref(i__, i__) + *alpha * temp2;
241  }
242 /* L60: */
243  }
244 /* L70: */
245  }
246  } else {
247  i__1 = *n;
248  for (j = 1; j <= i__1; ++j) {
249  for (i__ = *m; i__ >= 1; --i__) {
250  temp1 = *alpha * b_ref(i__, j);
251  temp2 = 0.;
252  i__2 = *m;
253  for (k = i__ + 1; k <= i__2; ++k) {
254  c___ref(k, j) = c___ref(k, j) + temp1 * a_ref(k, i__);
255  temp2 += b_ref(k, j) * a_ref(k, i__);
256 /* L80: */
257  }
258  if (*beta == 0.) {
259  c___ref(i__, j) = temp1 * a_ref(i__, i__) + *alpha *
260  temp2;
261  } else {
262  c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 *
263  a_ref(i__, i__) + *alpha * temp2;
264  }
265 /* L90: */
266  }
267 /* L100: */
268  }
269  }
270  } else {
271 /* Form C := alpha*B*A + beta*C. */
272  i__1 = *n;
273  for (j = 1; j <= i__1; ++j) {
274  temp1 = *alpha * a_ref(j, j);
275  if (*beta == 0.) {
276  i__2 = *m;
277  for (i__ = 1; i__ <= i__2; ++i__) {
278  c___ref(i__, j) = temp1 * b_ref(i__, j);
279 /* L110: */
280  }
281  } else {
282  i__2 = *m;
283  for (i__ = 1; i__ <= i__2; ++i__) {
284  c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 * b_ref(
285  i__, j);
286 /* L120: */
287  }
288  }
289  i__2 = j - 1;
290  for (k = 1; k <= i__2; ++k) {
291  if (upper) {
292  temp1 = *alpha * a_ref(k, j);
293  } else {
294  temp1 = *alpha * a_ref(j, k);
295  }
296  i__3 = *m;
297  for (i__ = 1; i__ <= i__3; ++i__) {
298  c___ref(i__, j) = c___ref(i__, j) + temp1 * b_ref(i__, k);
299 /* L130: */
300  }
301 /* L140: */
302  }
303  i__2 = *n;
304  for (k = j + 1; k <= i__2; ++k) {
305  if (upper) {
306  temp1 = *alpha * a_ref(j, k);
307  } else {
308  temp1 = *alpha * a_ref(k, j);
309  }
310  i__3 = *m;
311  for (i__ = 1; i__ <= i__3; ++i__) {
312  c___ref(i__, j) = c___ref(i__, j) + temp1 * b_ref(i__, k);
313 /* L150: */
314  }
315 /* L160: */
316  }
317 /* L170: */
318  }
319  }
320  return 0;
321 /* End of DSYMM . */
322 } /* dsymm_ */
323 #undef c___ref
324 #undef b_ref
325 #undef a_ref
326 
327 #endif