ergo
template_lapack_ggev.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_GGEV_HEADER
36 #define TEMPLATE_LAPACK_GGEV_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_ggev(const char *jobvl, const char *jobvr, const integer *n, Treal *
41  a, const integer *lda, Treal *b, const integer *ldb, Treal *alphar,
42  Treal *alphai, Treal *beta, Treal *vl, const integer *ldvl,
43  Treal *vr, const integer *ldvr, Treal *work, const integer *lwork,
44  integer *info)
45 {
46 /* -- LAPACK driver routine (version 3.0) --
47  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48  Courant Institute, Argonne National Lab, and Rice University
49  June 30, 1999
50 
51 
52  Purpose
53  =======
54 
55  DGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B)
56  the generalized eigenvalues, and optionally, the left and/or right
57  generalized eigenvectors.
58 
59  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
60  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
61  singular. It is usually represented as the pair (alpha,beta), as
62  there is a reasonable interpretation for beta=0, and even for both
63  being zero.
64 
65  The right eigenvector v(j) corresponding to the eigenvalue lambda(j)
66  of (A,B) satisfies
67 
68  A * v(j) = lambda(j) * B * v(j).
69 
70  The left eigenvector u(j) corresponding to the eigenvalue lambda(j)
71  of (A,B) satisfies
72 
73  u(j)**H * A = lambda(j) * u(j)**H * B .
74 
75  where u(j)**H is the conjugate-transpose of u(j).
76 
77 
78  Arguments
79  =========
80 
81  JOBVL (input) CHARACTER*1
82  = 'N': do not compute the left generalized eigenvectors;
83  = 'V': compute the left generalized eigenvectors.
84 
85  JOBVR (input) CHARACTER*1
86  = 'N': do not compute the right generalized eigenvectors;
87  = 'V': compute the right generalized eigenvectors.
88 
89  N (input) INTEGER
90  The order of the matrices A, B, VL, and VR. N >= 0.
91 
92  A (input/output) DOUBLE PRECISION array, dimension (LDA, N)
93  On entry, the matrix A in the pair (A,B).
94  On exit, A has been overwritten.
95 
96  LDA (input) INTEGER
97  The leading dimension of A. LDA >= max(1,N).
98 
99  B (input/output) DOUBLE PRECISION array, dimension (LDB, N)
100  On entry, the matrix B in the pair (A,B).
101  On exit, B has been overwritten.
102 
103  LDB (input) INTEGER
104  The leading dimension of B. LDB >= max(1,N).
105 
106  ALPHAR (output) DOUBLE PRECISION array, dimension (N)
107  ALPHAI (output) DOUBLE PRECISION array, dimension (N)
108  BETA (output) DOUBLE PRECISION array, dimension (N)
109  On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
110  be the generalized eigenvalues. If ALPHAI(j) is zero, then
111  the j-th eigenvalue is real; if positive, then the j-th and
112  (j+1)-st eigenvalues are a complex conjugate pair, with
113  ALPHAI(j+1) negative.
114 
115  Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
116  may easily over- or underflow, and BETA(j) may even be zero.
117  Thus, the user should avoid naively computing the ratio
118  alpha/beta. However, ALPHAR and ALPHAI will be always less
119  than and usually comparable with norm(A) in magnitude, and
120  BETA always less than and usually comparable with norm(B).
121 
122  VL (output) DOUBLE PRECISION array, dimension (LDVL,N)
123  If JOBVL = 'V', the left eigenvectors u(j) are stored one
124  after another in the columns of VL, in the same order as
125  their eigenvalues. If the j-th eigenvalue is real, then
126  u(j) = VL(:,j), the j-th column of VL. If the j-th and
127  (j+1)-th eigenvalues form a complex conjugate pair, then
128  u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1).
129  Each eigenvector will be scaled so the largest component have
130  abs(real part)+abs(imag. part)=1.
131  Not referenced if JOBVL = 'N'.
132 
133  LDVL (input) INTEGER
134  The leading dimension of the matrix VL. LDVL >= 1, and
135  if JOBVL = 'V', LDVL >= N.
136 
137  VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
138  If JOBVR = 'V', the right eigenvectors v(j) are stored one
139  after another in the columns of VR, in the same order as
140  their eigenvalues. If the j-th eigenvalue is real, then
141  v(j) = VR(:,j), the j-th column of VR. If the j-th and
142  (j+1)-th eigenvalues form a complex conjugate pair, then
143  v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1).
144  Each eigenvector will be scaled so the largest component have
145  abs(real part)+abs(imag. part)=1.
146  Not referenced if JOBVR = 'N'.
147 
148  LDVR (input) INTEGER
149  The leading dimension of the matrix VR. LDVR >= 1, and
150  if JOBVR = 'V', LDVR >= N.
151 
152  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
153  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
154 
155  LWORK (input) INTEGER
156  The dimension of the array WORK. LWORK >= max(1,8*N).
157  For good performance, LWORK must generally be larger.
158 
159  If LWORK = -1, then a workspace query is assumed; the routine
160  only calculates the optimal size of the WORK array, returns
161  this value as the first entry of the WORK array, and no error
162  message related to LWORK is issued by XERBLA.
163 
164  INFO (output) INTEGER
165  = 0: successful exit
166  < 0: if INFO = -i, the i-th argument had an illegal value.
167  = 1,...,N:
168  The QZ iteration failed. No eigenvectors have been
169  calculated, but ALPHAR(j), ALPHAI(j), and BETA(j)
170  should be correct for j=INFO+1,...,N.
171  > N: =N+1: other than QZ iteration failed in DHGEQZ.
172  =N+2: error return from DTGEVC.
173 
174  =====================================================================
175 
176 
177  Decode the input arguments
178 
179  Parameter adjustments */
180  /* Table of constant values */
181  integer c__1 = 1;
182  integer c__0 = 0;
183  Treal c_b26 = 0.;
184  Treal c_b27 = 1.;
185 
186  /* System generated locals */
187  integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1,
188  vr_offset, i__1, i__2;
189  Treal d__1, d__2, d__3, d__4;
190  /* Local variables */
191  Treal anrm, bnrm;
192  integer ierr, itau;
193  Treal temp;
194  logical ilvl, ilvr;
195  integer iwrk;
196  integer ileft, icols, irows;
197  integer jc;
198  integer in;
199  integer jr;
200  logical ilascl, ilbscl;
201  logical ldumma[1];
202  char chtemp[1];
203  Treal bignum;
204  integer ijobvl, iright, ijobvr;
205  Treal anrmto, bnrmto;
206  integer minwrk, maxwrk;
207  Treal smlnum;
208  logical lquery;
209  integer ihi, ilo;
210  Treal eps;
211  logical ilv;
212 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
213 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
214 #define vl_ref(a_1,a_2) vl[(a_2)*vl_dim1 + a_1]
215 #define vr_ref(a_1,a_2) vr[(a_2)*vr_dim1 + a_1]
216 
217 
218  a_dim1 = *lda;
219  a_offset = 1 + a_dim1 * 1;
220  a -= a_offset;
221  b_dim1 = *ldb;
222  b_offset = 1 + b_dim1 * 1;
223  b -= b_offset;
224  --alphar;
225  --alphai;
226  --beta;
227  vl_dim1 = *ldvl;
228  vl_offset = 1 + vl_dim1 * 1;
229  vl -= vl_offset;
230  vr_dim1 = *ldvr;
231  vr_offset = 1 + vr_dim1 * 1;
232  vr -= vr_offset;
233  --work;
234 
235  /* Initialization added by Elias to get rid of compiler warnings. */
236  maxwrk = 0;
237  /* Function Body */
238  if (template_blas_lsame(jobvl, "N")) {
239  ijobvl = 1;
240  ilvl = FALSE_;
241  } else if (template_blas_lsame(jobvl, "V")) {
242  ijobvl = 2;
243  ilvl = TRUE_;
244  } else {
245  ijobvl = -1;
246  ilvl = FALSE_;
247  }
248 
249  if (template_blas_lsame(jobvr, "N")) {
250  ijobvr = 1;
251  ilvr = FALSE_;
252  } else if (template_blas_lsame(jobvr, "V")) {
253  ijobvr = 2;
254  ilvr = TRUE_;
255  } else {
256  ijobvr = -1;
257  ilvr = FALSE_;
258  }
259  ilv = ilvl || ilvr;
260 
261 /* Test the input arguments */
262 
263  *info = 0;
264  lquery = *lwork == -1;
265  if (ijobvl <= 0) {
266  *info = -1;
267  } else if (ijobvr <= 0) {
268  *info = -2;
269  } else if (*n < 0) {
270  *info = -3;
271  } else if (*lda < maxMACRO(1,*n)) {
272  *info = -5;
273  } else if (*ldb < maxMACRO(1,*n)) {
274  *info = -7;
275  } else if (*ldvl < 1 || ( ilvl && *ldvl < *n ) ) {
276  *info = -12;
277  } else if (*ldvr < 1 || ( ilvr && *ldvr < *n ) ) {
278  *info = -14;
279  }
280 
281 /* Compute workspace
282  (Note: Comments in the code beginning "Workspace:" describe the
283  minimal amount of workspace needed at that point in the code,
284  as well as the preferred amount for good performance.
285  NB refers to the optimal block size for the immediately
286  following subroutine, as returned by ILAENV. The workspace is
287  computed assuming ILO = 1 and IHI = N, the worst case.) */
288 
289  minwrk = 1;
290  if (*info == 0 && (*lwork >= 1 || lquery)) {
291  maxwrk = *n * 7 + *n * template_lapack_ilaenv(&c__1, "DGEQRF", " ", n, &c__1, n, &
292  c__0, (ftnlen)6, (ftnlen)1);
293  /* Computing MAX */
294  i__1 = 1, i__2 = *n << 3;
295  minwrk = maxMACRO(i__1,i__2);
296  work[1] = (Treal) maxwrk;
297  }
298 
299  if (*lwork < minwrk && ! lquery) {
300  *info = -16;
301  }
302 
303  if (*info != 0) {
304  i__1 = -(*info);
305  template_blas_erbla("GGEV ", &i__1);
306  return 0;
307  } else if (lquery) {
308  return 0;
309  }
310 
311 /* Quick return if possible */
312 
313  if (*n == 0) {
314  return 0;
315  }
316 
317 /* Get machine constants */
318 
319  eps = template_lapack_lamch("P", (Treal)0);
320  smlnum = template_lapack_lamch("S", (Treal)0);
321  bignum = 1. / smlnum;
322  template_lapack_labad(&smlnum, &bignum);
323  smlnum = template_blas_sqrt(smlnum) / eps;
324  bignum = 1. / smlnum;
325 
326 /* Scale A if max element outside range [SMLNUM,BIGNUM] */
327 
328  anrm = template_lapack_lange("M", n, n, &a[a_offset], lda, &work[1]);
329  ilascl = FALSE_;
330  if (anrm > 0. && anrm < smlnum) {
331  anrmto = smlnum;
332  ilascl = TRUE_;
333  } else if (anrm > bignum) {
334  anrmto = bignum;
335  ilascl = TRUE_;
336  }
337  if (ilascl) {
338  template_lapack_lascl("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
339  ierr);
340  }
341 
342 /* Scale B if max element outside range [SMLNUM,BIGNUM] */
343 
344  bnrm = template_lapack_lange("M", n, n, &b[b_offset], ldb, &work[1]);
345  ilbscl = FALSE_;
346  if (bnrm > 0. && bnrm < smlnum) {
347  bnrmto = smlnum;
348  ilbscl = TRUE_;
349  } else if (bnrm > bignum) {
350  bnrmto = bignum;
351  ilbscl = TRUE_;
352  }
353  if (ilbscl) {
354  template_lapack_lascl("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
355  ierr);
356  }
357 
358 /* Permute the matrices A, B to isolate eigenvalues if possible
359  (Workspace: need 6*N) */
360 
361  ileft = 1;
362  iright = *n + 1;
363  iwrk = iright + *n;
364  template_lapack_ggbal("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[
365  ileft], &work[iright], &work[iwrk], &ierr);
366 
367 /* Reduce B to triangular form (QR decomposition of B)
368  (Workspace: need N, prefer N*NB) */
369 
370  irows = ihi + 1 - ilo;
371  if (ilv) {
372  icols = *n + 1 - ilo;
373  } else {
374  icols = irows;
375  }
376  itau = iwrk;
377  iwrk = itau + irows;
378  i__1 = *lwork + 1 - iwrk;
379  template_lapack_geqrf(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwrk], &
380  i__1, &ierr);
381 
382 /* Apply the orthogonal transformation to matrix A
383  (Workspace: need N, prefer N*NB) */
384 
385  i__1 = *lwork + 1 - iwrk;
386  /* Local char arrays added by Elias to get rid of compiler warnings. */
387  char str_L[] = {'L', 0};
388  char str_T[] = {'T', 0};
389  template_lapack_ormqr(str_L, str_T, &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[
390  itau], &a_ref(ilo, ilo), lda, &work[iwrk], &i__1, &ierr);
391 
392 /* Initialize VL
393  (Workspace: need N, prefer N*NB) */
394 
395  if (ilvl) {
396  template_lapack_laset("Full", n, n, &c_b26, &c_b27, &vl[vl_offset], ldvl)
397  ;
398  i__1 = irows - 1;
399  i__2 = irows - 1;
400  template_lapack_lacpy("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vl_ref(ilo + 1,
401  ilo), ldvl);
402  i__1 = *lwork + 1 - iwrk;
403  template_lapack_orgqr(&irows, &irows, &irows, &vl_ref(ilo, ilo), ldvl, &work[itau],
404  &work[iwrk], &i__1, &ierr);
405  }
406 
407 /* Initialize VR */
408 
409  if (ilvr) {
410  template_lapack_laset("Full", n, n, &c_b26, &c_b27, &vr[vr_offset], ldvr)
411  ;
412  }
413 
414 /* Reduce to generalized Hessenberg form
415  (Workspace: none needed) */
416 
417  if (ilv) {
418 
419 /* Eigenvectors requested -- work on whole matrix. */
420 
421  template_lapack_gghrd(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset],
422  ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr);
423  } else {
424  template_lapack_gghrd("N", "N", &irows, &c__1, &irows, &a_ref(ilo, ilo), lda, &
425  b_ref(ilo, ilo), ldb, &vl[vl_offset], ldvl, &vr[vr_offset],
426  ldvr, &ierr);
427  }
428 
429 /* Perform QZ algorithm (Compute eigenvalues, and optionally, the
430  Schur forms and Schur vectors)
431  (Workspace: need N) */
432 
433  iwrk = itau;
434  if (ilv) {
435  *(unsigned char *)chtemp = 'S';
436  } else {
437  *(unsigned char *)chtemp = 'E';
438  }
439  i__1 = *lwork + 1 - iwrk;
440  template_lapack_hgeqz(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[
441  b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset],
442  ldvl, &vr[vr_offset], ldvr, &work[iwrk], &i__1, &ierr);
443  if (ierr != 0) {
444  if (ierr > 0 && ierr <= *n) {
445  *info = ierr;
446  } else if (ierr > *n && ierr <= *n << 1) {
447  *info = ierr - *n;
448  } else {
449  *info = *n + 1;
450  }
451  goto L110;
452  }
453 
454 /* Compute Eigenvectors
455  (Workspace: need 6*N) */
456 
457  if (ilv) {
458  if (ilvl) {
459  if (ilvr) {
460  *(unsigned char *)chtemp = 'B';
461  } else {
462  *(unsigned char *)chtemp = 'L';
463  }
464  } else {
465  *(unsigned char *)chtemp = 'R';
466  }
467  template_lapack_tgevc(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb,
468  &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[
469  iwrk], &ierr);
470  if (ierr != 0) {
471  *info = *n + 2;
472  goto L110;
473  }
474 
475 /* Undo balancing on VL and VR and normalization
476  (Workspace: none needed) */
477 
478  if (ilvl) {
479  template_lapack_ggbak("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &
480  vl[vl_offset], ldvl, &ierr);
481  i__1 = *n;
482  for (jc = 1; jc <= i__1; ++jc) {
483  if (alphai[jc] < 0.) {
484  goto L50;
485  }
486  temp = 0.;
487  if (alphai[jc] == 0.) {
488  i__2 = *n;
489  for (jr = 1; jr <= i__2; ++jr) {
490 /* Computing MAX */
491  d__2 = temp, d__3 = (d__1 = vl_ref(jr, jc), absMACRO(d__1))
492  ;
493  temp = maxMACRO(d__2,d__3);
494 /* L10: */
495  }
496  } else {
497  i__2 = *n;
498  for (jr = 1; jr <= i__2; ++jr) {
499 /* Computing MAX */
500  d__3 = temp, d__4 = (d__1 = vl_ref(jr, jc), absMACRO(d__1))
501  + (d__2 = vl_ref(jr, jc + 1), absMACRO(d__2));
502  temp = maxMACRO(d__3,d__4);
503 /* L20: */
504  }
505  }
506  if (temp < smlnum) {
507  goto L50;
508  }
509  temp = 1. / temp;
510  if (alphai[jc] == 0.) {
511  i__2 = *n;
512  for (jr = 1; jr <= i__2; ++jr) {
513  vl_ref(jr, jc) = vl_ref(jr, jc) * temp;
514 /* L30: */
515  }
516  } else {
517  i__2 = *n;
518  for (jr = 1; jr <= i__2; ++jr) {
519  vl_ref(jr, jc) = vl_ref(jr, jc) * temp;
520  vl_ref(jr, jc + 1) = vl_ref(jr, jc + 1) * temp;
521 /* L40: */
522  }
523  }
524 L50:
525  ;
526  }
527  }
528  if (ilvr) {
529  template_lapack_ggbak("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &
530  vr[vr_offset], ldvr, &ierr);
531  i__1 = *n;
532  for (jc = 1; jc <= i__1; ++jc) {
533  if (alphai[jc] < 0.) {
534  goto L100;
535  }
536  temp = 0.;
537  if (alphai[jc] == 0.) {
538  i__2 = *n;
539  for (jr = 1; jr <= i__2; ++jr) {
540 /* Computing MAX */
541  d__2 = temp, d__3 = (d__1 = vr_ref(jr, jc), absMACRO(d__1))
542  ;
543  temp = maxMACRO(d__2,d__3);
544 /* L60: */
545  }
546  } else {
547  i__2 = *n;
548  for (jr = 1; jr <= i__2; ++jr) {
549 /* Computing MAX */
550  d__3 = temp, d__4 = (d__1 = vr_ref(jr, jc), absMACRO(d__1))
551  + (d__2 = vr_ref(jr, jc + 1), absMACRO(d__2));
552  temp = maxMACRO(d__3,d__4);
553 /* L70: */
554  }
555  }
556  if (temp < smlnum) {
557  goto L100;
558  }
559  temp = 1. / temp;
560  if (alphai[jc] == 0.) {
561  i__2 = *n;
562  for (jr = 1; jr <= i__2; ++jr) {
563  vr_ref(jr, jc) = vr_ref(jr, jc) * temp;
564 /* L80: */
565  }
566  } else {
567  i__2 = *n;
568  for (jr = 1; jr <= i__2; ++jr) {
569  vr_ref(jr, jc) = vr_ref(jr, jc) * temp;
570  vr_ref(jr, jc + 1) = vr_ref(jr, jc + 1) * temp;
571 /* L90: */
572  }
573  }
574 L100:
575  ;
576  }
577  }
578 
579 /* End of eigenvector calculation */
580 
581  }
582 
583 /* Undo scaling if necessary */
584 
585  if (ilascl) {
586  template_lapack_lascl("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
587  ierr);
588  template_lapack_lascl("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
589  ierr);
590  }
591 
592  if (ilbscl) {
593  template_lapack_lascl("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
594  ierr);
595  }
596 
597 L110:
598 
599  work[1] = (Treal) maxwrk;
600 
601  return 0;
602 
603 /* End of DGGEV */
604 
605 } /* dggev_ */
606 
607 #undef vr_ref
608 #undef vl_ref
609 #undef b_ref
610 #undef a_ref
611 
612 
613 #endif