ergo
template_lapack_ggbal.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_GGBAL_HEADER
36 #define TEMPLATE_LAPACK_GGBAL_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *
41  lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi,
42  Treal *lscale, Treal *rscale, Treal *work, integer *
43  info)
44 {
45 /* -- LAPACK routine (version 3.0) --
46  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
47  Courant Institute, Argonne National Lab, and Rice University
48  September 30, 1994
49 
50 
51  Purpose
52  =======
53 
54  DGGBAL balances a pair of general real matrices (A,B). This
55  involves, first, permuting A and B by similarity transformations to
56  isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
57  elements on the diagonal; and second, applying a diagonal similarity
58  transformation to rows and columns ILO to IHI to make the rows
59  and columns as close in norm as possible. Both steps are optional.
60 
61  Balancing may reduce the 1-norm of the matrices, and improve the
62  accuracy of the computed eigenvalues and/or eigenvectors in the
63  generalized eigenvalue problem A*x = lambda*B*x.
64 
65  Arguments
66  =========
67 
68  JOB (input) CHARACTER*1
69  Specifies the operations to be performed on A and B:
70  = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0
71  and RSCALE(I) = 1.0 for i = 1,...,N.
72  = 'P': permute only;
73  = 'S': scale only;
74  = 'B': both permute and scale.
75 
76  N (input) INTEGER
77  The order of the matrices A and B. N >= 0.
78 
79  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
80  On entry, the input matrix A.
81  On exit, A is overwritten by the balanced matrix.
82  If JOB = 'N', A is not referenced.
83 
84  LDA (input) INTEGER
85  The leading dimension of the array A. LDA >= max(1,N).
86 
87  B (input/output) DOUBLE PRECISION array, dimension (LDB,N)
88  On entry, the input matrix B.
89  On exit, B is overwritten by the balanced matrix.
90  If JOB = 'N', B is not referenced.
91 
92  LDB (input) INTEGER
93  The leading dimension of the array B. LDB >= max(1,N).
94 
95  ILO (output) INTEGER
96  IHI (output) INTEGER
97  ILO and IHI are set to integers such that on exit
98  A(i,j) = 0 and B(i,j) = 0 if i > j and
99  j = 1,...,ILO-1 or i = IHI+1,...,N.
100  If JOB = 'N' or 'S', ILO = 1 and IHI = N.
101 
102  LSCALE (output) DOUBLE PRECISION array, dimension (N)
103  Details of the permutations and scaling factors applied
104  to the left side of A and B. If P(j) is the index of the
105  row interchanged with row j, and D(j)
106  is the scaling factor applied to row j, then
107  LSCALE(j) = P(j) for J = 1,...,ILO-1
108  = D(j) for J = ILO,...,IHI
109  = P(j) for J = IHI+1,...,N.
110  The order in which the interchanges are made is N to IHI+1,
111  then 1 to ILO-1.
112 
113  RSCALE (output) DOUBLE PRECISION array, dimension (N)
114  Details of the permutations and scaling factors applied
115  to the right side of A and B. If P(j) is the index of the
116  column interchanged with column j, and D(j)
117  is the scaling factor applied to column j, then
118  LSCALE(j) = P(j) for J = 1,...,ILO-1
119  = D(j) for J = ILO,...,IHI
120  = P(j) for J = IHI+1,...,N.
121  The order in which the interchanges are made is N to IHI+1,
122  then 1 to ILO-1.
123 
124  WORK (workspace) DOUBLE PRECISION array, dimension (6*N)
125 
126  INFO (output) INTEGER
127  = 0: successful exit
128  < 0: if INFO = -i, the i-th argument had an illegal value.
129 
130  Further Details
131  ===============
132 
133  See R.C. WARD, Balancing the generalized eigenvalue problem,
134  SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
135 
136  =====================================================================
137 
138 
139  Test the input parameters
140 
141  Parameter adjustments */
142  /* Table of constant values */
143  integer c__1 = 1;
144  Treal c_b34 = 10.;
145  Treal c_b70 = .5;
146 
147  /* System generated locals */
148  integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
149  Treal d__1, d__2, d__3;
150  /* Local variables */
151  integer lcab;
152  Treal beta, coef;
153  integer irab, lrab;
154  Treal basl, cmax;
155  Treal coef2, coef5;
156  integer i__, j, k, l, m;
157  Treal gamma, t, alpha;
158  Treal sfmin, sfmax;
159  integer iflow;
160  integer kount, jc;
161  Treal ta, tb, tc;
162  integer ir, it;
163  Treal ew;
164  integer nr;
165  Treal pgamma;
166  integer lsfmin, lsfmax, ip1, jp1, lm1;
167  Treal cab, rab, ewc, cor, sum;
168  integer nrp2, icab;
169 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
170 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
171 
172 
173  a_dim1 = *lda;
174  a_offset = 1 + a_dim1 * 1;
175  a -= a_offset;
176  b_dim1 = *ldb;
177  b_offset = 1 + b_dim1 * 1;
178  b -= b_offset;
179  --lscale;
180  --rscale;
181  --work;
182 
183  /* Initialization added by Elias to get rid of compiler warnings. */
184  pgamma = 0;
185  /* Function Body */
186  *info = 0;
187  if (! template_blas_lsame(job, "N") && ! template_blas_lsame(job, "P") && ! template_blas_lsame(job, "S")
188  && ! template_blas_lsame(job, "B")) {
189  *info = -1;
190  } else if (*n < 0) {
191  *info = -2;
192  } else if (*lda < maxMACRO(1,*n)) {
193  *info = -4;
194  } else if (*ldb < maxMACRO(1,*n)) {
195  *info = -5;
196  }
197  if (*info != 0) {
198  i__1 = -(*info);
199  template_blas_erbla("GGBAL ", &i__1);
200  return 0;
201  }
202 
203  k = 1;
204  l = *n;
205 
206 /* Quick return if possible */
207 
208  if (*n == 0) {
209  return 0;
210  }
211 
212  if (template_blas_lsame(job, "N")) {
213  *ilo = 1;
214  *ihi = *n;
215  i__1 = *n;
216  for (i__ = 1; i__ <= i__1; ++i__) {
217  lscale[i__] = 1.;
218  rscale[i__] = 1.;
219 /* L10: */
220  }
221  return 0;
222  }
223 
224  if (k == l) {
225  *ilo = 1;
226  *ihi = 1;
227  lscale[1] = 1.;
228  rscale[1] = 1.;
229  return 0;
230  }
231 
232  if (template_blas_lsame(job, "S")) {
233  goto L190;
234  }
235 
236  goto L30;
237 
238 /* Permute the matrices A and B to isolate the eigenvalues.
239 
240  Find row with one nonzero in columns 1 through L */
241 
242 L20:
243  l = lm1;
244  if (l != 1) {
245  goto L30;
246  }
247 
248  rscale[1] = 1.;
249  lscale[1] = 1.;
250  goto L190;
251 
252 L30:
253  lm1 = l - 1;
254  for (i__ = l; i__ >= 1; --i__) {
255  i__1 = lm1;
256  for (j = 1; j <= i__1; ++j) {
257  jp1 = j + 1;
258  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
259  goto L50;
260  }
261 /* L40: */
262  }
263  j = l;
264  goto L70;
265 
266 L50:
267  i__1 = l;
268  for (j = jp1; j <= i__1; ++j) {
269  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
270  goto L80;
271  }
272 /* L60: */
273  }
274  j = jp1 - 1;
275 
276 L70:
277  m = l;
278  iflow = 1;
279  goto L160;
280 L80:
281  ;
282  }
283  goto L100;
284 
285 /* Find column with one nonzero in rows K through N */
286 
287 L90:
288  ++k;
289 
290 L100:
291  i__1 = l;
292  for (j = k; j <= i__1; ++j) {
293  i__2 = lm1;
294  for (i__ = k; i__ <= i__2; ++i__) {
295  ip1 = i__ + 1;
296  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
297  goto L120;
298  }
299 /* L110: */
300  }
301  i__ = l;
302  goto L140;
303 L120:
304  i__2 = l;
305  for (i__ = ip1; i__ <= i__2; ++i__) {
306  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
307  goto L150;
308  }
309 /* L130: */
310  }
311  i__ = ip1 - 1;
312 L140:
313  m = k;
314  iflow = 2;
315  goto L160;
316 L150:
317  ;
318  }
319  goto L190;
320 
321 /* Permute rows M and I */
322 
323 L160:
324  lscale[m] = (Treal) i__;
325  if (i__ == m) {
326  goto L170;
327  }
328  i__1 = *n - k + 1;
329  template_blas_swap(&i__1, &a_ref(i__, k), lda, &a_ref(m, k), lda);
330  i__1 = *n - k + 1;
331  template_blas_swap(&i__1, &b_ref(i__, k), ldb, &b_ref(m, k), ldb);
332 
333 /* Permute columns M and J */
334 
335 L170:
336  rscale[m] = (Treal) j;
337  if (j == m) {
338  goto L180;
339  }
340  template_blas_swap(&l, &a_ref(1, j), &c__1, &a_ref(1, m), &c__1);
341  template_blas_swap(&l, &b_ref(1, j), &c__1, &b_ref(1, m), &c__1);
342 
343 L180:
344  switch (iflow) {
345  case 1: goto L20;
346  case 2: goto L90;
347  }
348 
349 L190:
350  *ilo = k;
351  *ihi = l;
352 
353  if (*ilo == *ihi) {
354  return 0;
355  }
356 
357  if (template_blas_lsame(job, "P")) {
358  return 0;
359  }
360 
361 /* Balance the submatrix in rows ILO to IHI. */
362 
363  nr = *ihi - *ilo + 1;
364  i__1 = *ihi;
365  for (i__ = *ilo; i__ <= i__1; ++i__) {
366  rscale[i__] = 0.;
367  lscale[i__] = 0.;
368 
369  work[i__] = 0.;
370  work[i__ + *n] = 0.;
371  work[i__ + (*n << 1)] = 0.;
372  work[i__ + *n * 3] = 0.;
373  work[i__ + (*n << 2)] = 0.;
374  work[i__ + *n * 5] = 0.;
375 /* L200: */
376  }
377 
378 /* Compute right side vector in resulting linear equations */
379 
380  basl = template_blas_lg10(&c_b34);
381  i__1 = *ihi;
382  for (i__ = *ilo; i__ <= i__1; ++i__) {
383  i__2 = *ihi;
384  for (j = *ilo; j <= i__2; ++j) {
385  tb = b_ref(i__, j);
386  ta = a_ref(i__, j);
387  if (ta == 0.) {
388  goto L210;
389  }
390  d__1 = absMACRO(ta);
391  ta = template_blas_lg10(&d__1) / basl;
392 L210:
393  if (tb == 0.) {
394  goto L220;
395  }
396  d__1 = absMACRO(tb);
397  tb = template_blas_lg10(&d__1) / basl;
398 L220:
399  work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
400  work[j + *n * 5] = work[j + *n * 5] - ta - tb;
401 /* L230: */
402  }
403 /* L240: */
404  }
405 
406  coef = 1. / (Treal) (nr << 1);
407  coef2 = coef * coef;
408  coef5 = coef2 * .5;
409  nrp2 = nr + 2;
410  beta = 0.;
411  it = 1;
412 
413 /* Start generalized conjugate gradient iteration */
414 
415 L250:
416 
417  gamma = template_blas_dot(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
418  , &c__1) + template_blas_dot(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
419  n * 5], &c__1);
420 
421  ew = 0.;
422  ewc = 0.;
423  i__1 = *ihi;
424  for (i__ = *ilo; i__ <= i__1; ++i__) {
425  ew += work[i__ + (*n << 2)];
426  ewc += work[i__ + *n * 5];
427 /* L260: */
428  }
429 
430 /* Computing 2nd power */
431  d__1 = ew;
432 /* Computing 2nd power */
433  d__2 = ewc;
434 /* Computing 2nd power */
435  d__3 = ew - ewc;
436  gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * (
437  d__3 * d__3);
438  if (gamma == 0.) {
439  goto L350;
440  }
441  if (it != 1) {
442  beta = gamma / pgamma;
443  }
444  t = coef5 * (ewc - ew * 3.);
445  tc = coef5 * (ew - ewc * 3.);
446 
447  template_blas_scal(&nr, &beta, &work[*ilo], &c__1);
448  template_blas_scal(&nr, &beta, &work[*ilo + *n], &c__1);
449 
450  template_blas_axpy(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
451  c__1);
452  template_blas_axpy(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);
453 
454  i__1 = *ihi;
455  for (i__ = *ilo; i__ <= i__1; ++i__) {
456  work[i__] += tc;
457  work[i__ + *n] += t;
458 /* L270: */
459  }
460 
461 /* Apply matrix to vector */
462 
463  i__1 = *ihi;
464  for (i__ = *ilo; i__ <= i__1; ++i__) {
465  kount = 0;
466  sum = 0.;
467  i__2 = *ihi;
468  for (j = *ilo; j <= i__2; ++j) {
469  if (a_ref(i__, j) == 0.) {
470  goto L280;
471  }
472  ++kount;
473  sum += work[j];
474 L280:
475  if (b_ref(i__, j) == 0.) {
476  goto L290;
477  }
478  ++kount;
479  sum += work[j];
480 L290:
481  ;
482  }
483  work[i__ + (*n << 1)] = (Treal) kount * work[i__ + *n] + sum;
484 /* L300: */
485  }
486 
487  i__1 = *ihi;
488  for (j = *ilo; j <= i__1; ++j) {
489  kount = 0;
490  sum = 0.;
491  i__2 = *ihi;
492  for (i__ = *ilo; i__ <= i__2; ++i__) {
493  if (a_ref(i__, j) == 0.) {
494  goto L310;
495  }
496  ++kount;
497  sum += work[i__ + *n];
498 L310:
499  if (b_ref(i__, j) == 0.) {
500  goto L320;
501  }
502  ++kount;
503  sum += work[i__ + *n];
504 L320:
505  ;
506  }
507  work[j + *n * 3] = (Treal) kount * work[j] + sum;
508 /* L330: */
509  }
510 
511  sum = template_blas_dot(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1)
512  + template_blas_dot(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
513  alpha = gamma / sum;
514 
515 /* Determine correction to current iteration */
516 
517  cmax = 0.;
518  i__1 = *ihi;
519  for (i__ = *ilo; i__ <= i__1; ++i__) {
520  cor = alpha * work[i__ + *n];
521  if (absMACRO(cor) > cmax) {
522  cmax = absMACRO(cor);
523  }
524  lscale[i__] += cor;
525  cor = alpha * work[i__];
526  if (absMACRO(cor) > cmax) {
527  cmax = absMACRO(cor);
528  }
529  rscale[i__] += cor;
530 /* L340: */
531  }
532  if (cmax < .5) {
533  goto L350;
534  }
535 
536  d__1 = -alpha;
537  template_blas_axpy(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
538  , &c__1);
539  d__1 = -alpha;
540  template_blas_axpy(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
541  c__1);
542 
543  pgamma = gamma;
544  ++it;
545  if (it <= nrp2) {
546  goto L250;
547  }
548 
549 /* End generalized conjugate gradient iteration */
550 
551 L350:
552  sfmin = template_lapack_lamch("S", (Treal)0);
553  sfmax = 1. / sfmin;
554  lsfmin = (integer) (template_blas_lg10(&sfmin) / basl + 1.);
555  lsfmax = (integer) (template_blas_lg10(&sfmax) / basl);
556  i__1 = *ihi;
557  for (i__ = *ilo; i__ <= i__1; ++i__) {
558  i__2 = *n - *ilo + 1;
559  irab = template_blas_idamax(&i__2, &a_ref(i__, *ilo), lda);
560  rab = (d__1 = a_ref(i__, irab + *ilo - 1), absMACRO(d__1));
561  i__2 = *n - *ilo + 1;
562  irab = template_blas_idamax(&i__2, &b_ref(i__, *ilo), lda);
563 /* Computing MAX */
564  d__2 = rab, d__3 = (d__1 = b_ref(i__, irab + *ilo - 1), absMACRO(d__1));
565  rab = maxMACRO(d__2,d__3);
566  d__1 = rab + sfmin;
567  lrab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
568  ir = (integer) (lscale[i__] + template_lapack_d_sign(&c_b70, &lscale[i__]));
569 /* Computing MIN */
570  i__2 = maxMACRO(ir,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lrab;
571  ir = minMACRO(i__2,i__3);
572  lscale[i__] = template_lapack_pow_di(&c_b34, &ir);
573  icab = template_blas_idamax(ihi, &a_ref(1, i__), &c__1);
574  cab = (d__1 = a_ref(icab, i__), absMACRO(d__1));
575  icab = template_blas_idamax(ihi, &b_ref(1, i__), &c__1);
576 /* Computing MAX */
577  d__2 = cab, d__3 = (d__1 = b_ref(icab, i__), absMACRO(d__1));
578  cab = maxMACRO(d__2,d__3);
579  d__1 = cab + sfmin;
580  lcab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
581  jc = (integer) (rscale[i__] + template_lapack_d_sign(&c_b70, &rscale[i__]));
582 /* Computing MIN */
583  i__2 = maxMACRO(jc,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lcab;
584  jc = minMACRO(i__2,i__3);
585  rscale[i__] = template_lapack_pow_di(&c_b34, &jc);
586 /* L360: */
587  }
588 
589 /* Row scaling of matrices A and B */
590 
591  i__1 = *ihi;
592  for (i__ = *ilo; i__ <= i__1; ++i__) {
593  i__2 = *n - *ilo + 1;
594  template_blas_scal(&i__2, &lscale[i__], &a_ref(i__, *ilo), lda);
595  i__2 = *n - *ilo + 1;
596  template_blas_scal(&i__2, &lscale[i__], &b_ref(i__, *ilo), ldb);
597 /* L370: */
598  }
599 
600 /* Column scaling of matrices A and B */
601 
602  i__1 = *ihi;
603  for (j = *ilo; j <= i__1; ++j) {
604  template_blas_scal(ihi, &rscale[j], &a_ref(1, j), &c__1);
605  template_blas_scal(ihi, &rscale[j], &b_ref(1, j), &c__1);
606 /* L380: */
607  }
608 
609  return 0;
610 
611 /* End of DGGBAL */
612 
613 } /* dggbal_ */
614 
615 #undef b_ref
616 #undef a_ref
617 
618 
619 #endif
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:41
#define absMACRO(x)
Definition: template_blas_common.h:45
integer template_blas_idamax(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_idamax.h:40
int integer
Definition: template_blas_common.h:38
#define b_ref(a_1, a_2)
Treal template_blas_lg10(Treal *x)
Definition: template_lapack_lamch.h:57
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
#define minMACRO(a, b)
Definition: template_blas_common.h:44
Treal template_lapack_d_sign(const Treal *a, const Treal *b)
Definition: template_lapack_lamch.h:46
#define a_ref(a_1, a_2)
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:144
int template_blas_swap(const integer *n, Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_swap.h:40
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
double template_lapack_pow_di(Treal *ap, integer *bp)
Definition: template_lapack_lamch.h:165
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:41
int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi, Treal *lscale, Treal *rscale, Treal *work, integer *info)
Definition: template_lapack_ggbal.h:40
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:44
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:41