ergo
template_lapack_steqr.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_STEQR_HEADER
36 #define TEMPLATE_LAPACK_STEQR_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_steqr(const char *compz, const integer *n, Treal *d__,
41  Treal *e, Treal *z__, const integer *ldz, Treal *work,
42  integer *info)
43 {
44 /* -- LAPACK routine (version 3.0) --
45  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
46  Courant Institute, Argonne National Lab, and Rice University
47  September 30, 1994
48 
49 
50  Purpose
51  =======
52 
53  DSTEQR computes all eigenvalues and, optionally, eigenvectors of a
54  symmetric tridiagonal matrix using the implicit QL or QR method.
55  The eigenvectors of a full or band symmetric matrix can also be found
56  if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to
57  tridiagonal form.
58 
59  Arguments
60  =========
61 
62  COMPZ (input) CHARACTER*1
63  = 'N': Compute eigenvalues only.
64  = 'V': Compute eigenvalues and eigenvectors of the original
65  symmetric matrix. On entry, Z must contain the
66  orthogonal matrix used to reduce the original matrix
67  to tridiagonal form.
68  = 'I': Compute eigenvalues and eigenvectors of the
69  tridiagonal matrix. Z is initialized to the identity
70  matrix.
71 
72  N (input) INTEGER
73  The order of the matrix. N >= 0.
74 
75  D (input/output) DOUBLE PRECISION array, dimension (N)
76  On entry, the diagonal elements of the tridiagonal matrix.
77  On exit, if INFO = 0, the eigenvalues in ascending order.
78 
79  E (input/output) DOUBLE PRECISION array, dimension (N-1)
80  On entry, the (n-1) subdiagonal elements of the tridiagonal
81  matrix.
82  On exit, E has been destroyed.
83 
84  Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
85  On entry, if COMPZ = 'V', then Z contains the orthogonal
86  matrix used in the reduction to tridiagonal form.
87  On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
88  orthonormal eigenvectors of the original symmetric matrix,
89  and if COMPZ = 'I', Z contains the orthonormal eigenvectors
90  of the symmetric tridiagonal matrix.
91  If COMPZ = 'N', then Z is not referenced.
92 
93  LDZ (input) INTEGER
94  The leading dimension of the array Z. LDZ >= 1, and if
95  eigenvectors are desired, then LDZ >= max(1,N).
96 
97  WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
98  If COMPZ = 'N', then WORK is not referenced.
99 
100  INFO (output) INTEGER
101  = 0: successful exit
102  < 0: if INFO = -i, the i-th argument had an illegal value
103  > 0: the algorithm has failed to find all the eigenvalues in
104  a total of 30*N iterations; if INFO = i, then i
105  elements of E have not converged to zero; on exit, D
106  and E contain the elements of a symmetric tridiagonal
107  matrix which is orthogonally similar to the original
108  matrix.
109 
110  =====================================================================
111 
112 
113  Test the input parameters.
114 
115  Parameter adjustments */
116  /* Table of constant values */
117  Treal c_b9 = 0.;
118  Treal c_b10 = 1.;
119  integer c__0 = 0;
120  integer c__1 = 1;
121  integer c__2 = 2;
122 
123  /* System generated locals */
124  integer z_dim1, z_offset, i__1, i__2;
125  Treal d__1, d__2;
126  /* Local variables */
127  integer lend, jtot;
128  Treal b, c__, f, g;
129  integer i__, j, k, l, m;
130  Treal p, r__, s;
131  Treal anorm;
132  integer l1;
133  integer lendm1, lendp1;
134  integer ii;
135  integer mm, iscale;
136  Treal safmin;
137  Treal safmax;
138  integer lendsv;
139  Treal ssfmin;
140  integer nmaxit, icompz;
141  Treal ssfmax;
142  integer lm1, mm1, nm1;
143  Treal rt1, rt2, eps;
144  integer lsv;
145  Treal tst, eps2;
146 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
147 
148 
149  --d__;
150  --e;
151  z_dim1 = *ldz;
152  z_offset = 1 + z_dim1 * 1;
153  z__ -= z_offset;
154  --work;
155 
156  /* Function Body */
157  *info = 0;
158 
159  if (template_blas_lsame(compz, "N")) {
160  icompz = 0;
161  } else if (template_blas_lsame(compz, "V")) {
162  icompz = 1;
163  } else if (template_blas_lsame(compz, "I")) {
164  icompz = 2;
165  } else {
166  icompz = -1;
167  }
168  if (icompz < 0) {
169  *info = -1;
170  } else if (*n < 0) {
171  *info = -2;
172  } else if (*ldz < 1 || (icompz > 0 && *ldz < maxMACRO(1,*n) ) ) {
173  *info = -6;
174  }
175  if (*info != 0) {
176  i__1 = -(*info);
177  template_blas_erbla("STEQR ", &i__1);
178  return 0;
179  }
180 
181 /* Quick return if possible */
182 
183  if (*n == 0) {
184  return 0;
185  }
186 
187  if (*n == 1) {
188  if (icompz == 2) {
189  z___ref(1, 1) = 1.;
190  }
191  return 0;
192  }
193 
194 /* Determine the unit roundoff and over/underflow thresholds. */
195 
196  eps = template_lapack_lamch("E", (Treal)0);
197 /* Computing 2nd power */
198  d__1 = eps;
199  eps2 = d__1 * d__1;
200  safmin = template_lapack_lamch("S", (Treal)0);
201  safmax = 1. / safmin;
202  ssfmax = template_blas_sqrt(safmax) / 3.;
203  ssfmin = template_blas_sqrt(safmin) / eps2;
204 
205 /* Compute the eigenvalues and eigenvectors of the tridiagonal
206  matrix. */
207 
208  if (icompz == 2) {
209  template_lapack_laset("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz);
210  }
211 
212  nmaxit = *n * 30;
213  jtot = 0;
214 
215 /* Determine where the matrix splits and choose QL or QR iteration
216  for each block, according to whether top or bottom diagonal
217  element is smaller. */
218 
219  l1 = 1;
220  nm1 = *n - 1;
221 
222 L10:
223  if (l1 > *n) {
224  goto L160;
225  }
226  if (l1 > 1) {
227  e[l1 - 1] = 0.;
228  }
229  if (l1 <= nm1) {
230  i__1 = nm1;
231  for (m = l1; m <= i__1; ++m) {
232  tst = (d__1 = e[m], absMACRO(d__1));
233  if (tst == 0.) {
234  goto L30;
235  }
236  if (tst <= template_blas_sqrt((d__1 = d__[m], absMACRO(d__1))) * template_blas_sqrt((d__2 = d__[m
237  + 1], absMACRO(d__2))) * eps) {
238  e[m] = 0.;
239  goto L30;
240  }
241 /* L20: */
242  }
243  }
244  m = *n;
245 
246 L30:
247  l = l1;
248  lsv = l;
249  lend = m;
250  lendsv = lend;
251  l1 = m + 1;
252  if (lend == l) {
253  goto L10;
254  }
255 
256 /* Scale submatrix in rows and columns L to LEND */
257 
258  i__1 = lend - l + 1;
259  anorm = template_lapack_lanst("I", &i__1, &d__[l], &e[l]);
260  iscale = 0;
261  if (anorm == 0.) {
262  goto L10;
263  }
264  if (anorm > ssfmax) {
265  iscale = 1;
266  i__1 = lend - l + 1;
267  template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
268  info);
269  i__1 = lend - l;
270  template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
271  info);
272  } else if (anorm < ssfmin) {
273  iscale = 2;
274  i__1 = lend - l + 1;
275  template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
276  info);
277  i__1 = lend - l;
278  template_lapack_lascl("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
279  info);
280  }
281 
282 /* Choose between QL and QR iteration */
283 
284  if ((d__1 = d__[lend], absMACRO(d__1)) < (d__2 = d__[l], absMACRO(d__2))) {
285  lend = lsv;
286  l = lendsv;
287  }
288 
289  if (lend > l) {
290 
291 /* QL Iteration
292 
293  Look for small subdiagonal element. */
294 
295 L40:
296  if (l != lend) {
297  lendm1 = lend - 1;
298  i__1 = lendm1;
299  for (m = l; m <= i__1; ++m) {
300 /* Computing 2nd power */
301  d__2 = (d__1 = e[m], absMACRO(d__1));
302  tst = d__2 * d__2;
303  if (tst <= eps2 * (d__1 = d__[m], absMACRO(d__1)) * (d__2 = d__[m
304  + 1], absMACRO(d__2)) + safmin) {
305  goto L60;
306  }
307 /* L50: */
308  }
309  }
310 
311  m = lend;
312 
313 L60:
314  if (m < lend) {
315  e[m] = 0.;
316  }
317  p = d__[l];
318  if (m == l) {
319  goto L80;
320  }
321 
322 /* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
323  to compute its eigensystem. */
324 
325  if (m == l + 1) {
326  if (icompz > 0) {
327  template_lapack_laev2(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
328  work[l] = c__;
329  work[*n - 1 + l] = s;
330  template_lapack_lasr("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
331  z___ref(1, l), ldz);
332  } else {
333  template_lapack_lae2(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
334  }
335  d__[l] = rt1;
336  d__[l + 1] = rt2;
337  e[l] = 0.;
338  l += 2;
339  if (l <= lend) {
340  goto L40;
341  }
342  goto L140;
343  }
344 
345  if (jtot == nmaxit) {
346  goto L140;
347  }
348  ++jtot;
349 
350 /* Form shift. */
351 
352  g = (d__[l + 1] - p) / (e[l] * 2.);
353  r__ = template_lapack_lapy2(&g, &c_b10);
354  g = d__[m] - p + e[l] / (g + template_lapack_d_sign(&r__, &g));
355 
356  s = 1.;
357  c__ = 1.;
358  p = 0.;
359 
360 /* Inner loop */
361 
362  mm1 = m - 1;
363  i__1 = l;
364  for (i__ = mm1; i__ >= i__1; --i__) {
365  f = s * e[i__];
366  b = c__ * e[i__];
367  template_lapack_lartg(&g, &f, &c__, &s, &r__);
368  if (i__ != m - 1) {
369  e[i__ + 1] = r__;
370  }
371  g = d__[i__ + 1] - p;
372  r__ = (d__[i__] - g) * s + c__ * 2. * b;
373  p = s * r__;
374  d__[i__ + 1] = g + p;
375  g = c__ * r__ - b;
376 
377 /* If eigenvectors are desired, then save rotations. */
378 
379  if (icompz > 0) {
380  work[i__] = c__;
381  work[*n - 1 + i__] = -s;
382  }
383 
384 /* L70: */
385  }
386 
387 /* If eigenvectors are desired, then apply saved rotations. */
388 
389  if (icompz > 0) {
390  mm = m - l + 1;
391  template_lapack_lasr("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &
392  z___ref(1, l), ldz);
393  }
394 
395  d__[l] -= p;
396  e[l] = g;
397  goto L40;
398 
399 /* Eigenvalue found. */
400 
401 L80:
402  d__[l] = p;
403 
404  ++l;
405  if (l <= lend) {
406  goto L40;
407  }
408  goto L140;
409 
410  } else {
411 
412 /* QR Iteration
413 
414  Look for small superdiagonal element. */
415 
416 L90:
417  if (l != lend) {
418  lendp1 = lend + 1;
419  i__1 = lendp1;
420  for (m = l; m >= i__1; --m) {
421 /* Computing 2nd power */
422  d__2 = (d__1 = e[m - 1], absMACRO(d__1));
423  tst = d__2 * d__2;
424  if (tst <= eps2 * (d__1 = d__[m], absMACRO(d__1)) * (d__2 = d__[m
425  - 1], absMACRO(d__2)) + safmin) {
426  goto L110;
427  }
428 /* L100: */
429  }
430  }
431 
432  m = lend;
433 
434 L110:
435  if (m > lend) {
436  e[m - 1] = 0.;
437  }
438  p = d__[l];
439  if (m == l) {
440  goto L130;
441  }
442 
443 /* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
444  to compute its eigensystem. */
445 
446  if (m == l - 1) {
447  if (icompz > 0) {
448  template_lapack_laev2(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
449  ;
450  work[m] = c__;
451  work[*n - 1 + m] = s;
452  template_lapack_lasr("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
453  z___ref(1, l - 1), ldz);
454  } else {
455  template_lapack_lae2(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
456  }
457  d__[l - 1] = rt1;
458  d__[l] = rt2;
459  e[l - 1] = 0.;
460  l += -2;
461  if (l >= lend) {
462  goto L90;
463  }
464  goto L140;
465  }
466 
467  if (jtot == nmaxit) {
468  goto L140;
469  }
470  ++jtot;
471 
472 /* Form shift. */
473 
474  g = (d__[l - 1] - p) / (e[l - 1] * 2.);
475  r__ = template_lapack_lapy2(&g, &c_b10);
476  g = d__[m] - p + e[l - 1] / (g + template_lapack_d_sign(&r__, &g));
477 
478  s = 1.;
479  c__ = 1.;
480  p = 0.;
481 
482 /* Inner loop */
483 
484  lm1 = l - 1;
485  i__1 = lm1;
486  for (i__ = m; i__ <= i__1; ++i__) {
487  f = s * e[i__];
488  b = c__ * e[i__];
489  template_lapack_lartg(&g, &f, &c__, &s, &r__);
490  if (i__ != m) {
491  e[i__ - 1] = r__;
492  }
493  g = d__[i__] - p;
494  r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
495  p = s * r__;
496  d__[i__] = g + p;
497  g = c__ * r__ - b;
498 
499 /* If eigenvectors are desired, then save rotations. */
500 
501  if (icompz > 0) {
502  work[i__] = c__;
503  work[*n - 1 + i__] = s;
504  }
505 
506 /* L120: */
507  }
508 
509 /* If eigenvectors are desired, then apply saved rotations. */
510 
511  if (icompz > 0) {
512  mm = l - m + 1;
513  template_lapack_lasr("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &
514  z___ref(1, m), ldz);
515  }
516 
517  d__[l] -= p;
518  e[lm1] = g;
519  goto L90;
520 
521 /* Eigenvalue found. */
522 
523 L130:
524  d__[l] = p;
525 
526  --l;
527  if (l >= lend) {
528  goto L90;
529  }
530  goto L140;
531 
532  }
533 
534 /* Undo scaling if necessary */
535 
536 L140:
537  if (iscale == 1) {
538  i__1 = lendsv - lsv + 1;
539  template_lapack_lascl("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
540  n, info);
541  i__1 = lendsv - lsv;
542  template_lapack_lascl("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
543  info);
544  } else if (iscale == 2) {
545  i__1 = lendsv - lsv + 1;
546  template_lapack_lascl("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
547  n, info);
548  i__1 = lendsv - lsv;
549  template_lapack_lascl("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
550  info);
551  }
552 
553 /* Check for no convergence to an eigenvalue after a total
554  of N*MAXIT iterations. */
555 
556  if (jtot < nmaxit) {
557  goto L10;
558  }
559  i__1 = *n - 1;
560  for (i__ = 1; i__ <= i__1; ++i__) {
561  if (e[i__] != 0.) {
562  ++(*info);
563  }
564 /* L150: */
565  }
566  goto L190;
567 
568 /* Order eigenvalues and eigenvectors. */
569 
570 L160:
571  if (icompz == 0) {
572 
573 /* Use Quick Sort */
574 
575  template_lapack_lasrt("I", n, &d__[1], info);
576 
577  } else {
578 
579 /* Use Selection Sort to minimize swaps of eigenvectors */
580 
581  i__1 = *n;
582  for (ii = 2; ii <= i__1; ++ii) {
583  i__ = ii - 1;
584  k = i__;
585  p = d__[i__];
586  i__2 = *n;
587  for (j = ii; j <= i__2; ++j) {
588  if (d__[j] < p) {
589  k = j;
590  p = d__[j];
591  }
592 /* L170: */
593  }
594  if (k != i__) {
595  d__[k] = d__[i__];
596  d__[i__] = p;
597  template_blas_swap(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1);
598  }
599 /* L180: */
600  }
601  }
602 
603 L190:
604  return 0;
605 
606 /* End of DSTEQR */
607 
608 } /* dsteqr_ */
609 
610 #undef z___ref
611 
612 
613 #endif
#define absMACRO(x)
Definition: template_blas_common.h:45
#define z___ref(a_1, a_2)
int template_lapack_lascl(const char *type__, const integer *kl, const integer *ku, const Treal *cfrom, const Treal *cto, const integer *m, const integer *n, Treal *a, const integer *lda, integer *info)
Definition: template_lapack_lascl.h:40
int template_lapack_laset(const char *uplo, const integer *m, const integer *n, const Treal *alpha, const Treal *beta, Treal *a, const integer *lda)
Definition: template_lapack_laset.h:40
int template_lapack_laev2(Treal *a, Treal *b, Treal *c__, Treal *rt1, Treal *rt2, Treal *cs1, Treal *sn1)
Definition: template_lapack_laev2.h:40
int integer
Definition: template_blas_common.h:38
Treal template_lapack_lapy2(Treal *x, Treal *y)
Definition: template_lapack_lapy2.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
int template_lapack_lasrt(const char *id, const integer *n, Treal *d__, integer *info)
Definition: template_lapack_lasrt.h:40
Treal template_lapack_d_sign(const Treal *a, const Treal *b)
Definition: template_lapack_lamch.h:46
int template_lapack_steqr(const char *compz, const integer *n, Treal *d__, Treal *e, Treal *z__, const integer *ldz, Treal *work, integer *info)
Definition: template_lapack_steqr.h:40
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:144
int template_blas_swap(const integer *n, Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_swap.h:40
int template_lapack_lartg(const Treal *f, const Treal *g, Treal *cs, Treal *sn, Treal *r__)
Definition: template_lapack_lartg.h:40
int template_lapack_lasr(const char *side, const char *pivot, const char *direct, const integer *m, const integer *n, const Treal *c__, const Treal *s, Treal *a, const integer *lda)
Definition: template_lapack_lasr.h:40
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
int template_lapack_lae2(const Treal *a, const Treal *b, const Treal *c__, Treal *rt1, Treal *rt2)
Definition: template_lapack_lae2.h:40
Treal template_lapack_lanst(const char *norm, const integer *n, const Treal *d__, const Treal *e)
Definition: template_lapack_lanst.h:40
Treal template_blas_sqrt(Treal x)
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:44