ergo
template_lapack_laln2.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LALN2_HEADER
36 #define TEMPLATE_LAPACK_LALN2_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_laln2(const logical *ltrans, const integer *na, const integer *nw,
41  const Treal *smin, const Treal *ca, const Treal *a, const integer *lda,
42  const Treal *d1, const Treal *d2, const Treal *b, const integer *ldb,
43  const Treal *wr, const Treal *wi, Treal *x, const integer *ldx,
44  Treal *scale, Treal *xnorm, integer *info)
45 {
46 /* -- LAPACK auxiliary routine (version 3.0) --
47  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48  Courant Institute, Argonne National Lab, and Rice University
49  October 31, 1992
50 
51 
52  Purpose
53  =======
54 
55  DLALN2 solves a system of the form (ca A - w D ) X = s B
56  or (ca A' - w D) X = s B with possible scaling ("s") and
57  perturbation of A. (A' means A-transpose.)
58 
59  A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
60  real diagonal matrix, w is a real or complex value, and X and B are
61  NA x 1 matrices -- real if w is real, complex if w is complex. NA
62  may be 1 or 2.
63 
64  If w is complex, X and B are represented as NA x 2 matrices,
65  the first column of each being the real part and the second
66  being the imaginary part.
67 
68  "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
69  so chosen that X can be computed without overflow. X is further
70  scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
71  than overflow.
72 
73  If both singular values of (ca A - w D) are less than SMIN,
74  SMIN*identity will be used instead of (ca A - w D). If only one
75  singular value is less than SMIN, one element of (ca A - w D) will be
76  perturbed enough to make the smallest singular value roughly SMIN.
77  If both singular values are at least SMIN, (ca A - w D) will not be
78  perturbed. In any case, the perturbation will be at most some small
79  multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values
80  are computed by infinity-norm approximations, and thus will only be
81  correct to a factor of 2 or so.
82 
83  Note: all input quantities are assumed to be smaller than overflow
84  by a reasonable factor. (See BIGNUM.)
85 
86  Arguments
87  ==========
88 
89  LTRANS (input) LOGICAL
90  =.TRUE.: A-transpose will be used.
91  =.FALSE.: A will be used (not transposed.)
92 
93  NA (input) INTEGER
94  The size of the matrix A. It may (only) be 1 or 2.
95 
96  NW (input) INTEGER
97  1 if "w" is real, 2 if "w" is complex. It may only be 1
98  or 2.
99 
100  SMIN (input) DOUBLE PRECISION
101  The desired lower bound on the singular values of A. This
102  should be a safe distance away from underflow or overflow,
103  say, between (underflow/machine precision) and (machine
104  precision * overflow ). (See BIGNUM and ULP.)
105 
106  CA (input) DOUBLE PRECISION
107  The coefficient c, which A is multiplied by.
108 
109  A (input) DOUBLE PRECISION array, dimension (LDA,NA)
110  The NA x NA matrix A.
111 
112  LDA (input) INTEGER
113  The leading dimension of A. It must be at least NA.
114 
115  D1 (input) DOUBLE PRECISION
116  The 1,1 element in the diagonal matrix D.
117 
118  D2 (input) DOUBLE PRECISION
119  The 2,2 element in the diagonal matrix D. Not used if NW=1.
120 
121  B (input) DOUBLE PRECISION array, dimension (LDB,NW)
122  The NA x NW matrix B (right-hand side). If NW=2 ("w" is
123  complex), column 1 contains the real part of B and column 2
124  contains the imaginary part.
125 
126  LDB (input) INTEGER
127  The leading dimension of B. It must be at least NA.
128 
129  WR (input) DOUBLE PRECISION
130  The real part of the scalar "w".
131 
132  WI (input) DOUBLE PRECISION
133  The imaginary part of the scalar "w". Not used if NW=1.
134 
135  X (output) DOUBLE PRECISION array, dimension (LDX,NW)
136  The NA x NW matrix X (unknowns), as computed by DLALN2.
137  If NW=2 ("w" is complex), on exit, column 1 will contain
138  the real part of X and column 2 will contain the imaginary
139  part.
140 
141  LDX (input) INTEGER
142  The leading dimension of X. It must be at least NA.
143 
144  SCALE (output) DOUBLE PRECISION
145  The scale factor that B must be multiplied by to insure
146  that overflow does not occur when computing X. Thus,
147  (ca A - w D) X will be SCALE*B, not B (ignoring
148  perturbations of A.) It will be at most 1.
149 
150  XNORM (output) DOUBLE PRECISION
151  The infinity-norm of X, when X is regarded as an NA x NW
152  real matrix.
153 
154  INFO (output) INTEGER
155  An error flag. It will be set to zero if no error occurs,
156  a negative number if an argument is in error, or a positive
157  number if ca A - w D had to be perturbed.
158  The possible values are:
159  = 0: No error occurred, and (ca A - w D) did not have to be
160  perturbed.
161  = 1: (ca A - w D) had to be perturbed to make its smallest
162  (or only) singular value greater than SMIN.
163  NOTE: In the interests of speed, this routine does not
164  check the inputs for errors.
165 
166  =====================================================================
167 
168  Parameter adjustments */
169  /* Initialized data */
170  logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
171  logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
172  integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
173  4,3,2,1 };
174  /* System generated locals */
175  integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
176  Treal d__1, d__2, d__3, d__4, d__5, d__6;
177  Treal equiv_0[4], equiv_1[4];
178  /* Local variables */
179  Treal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s;
180  integer j;
181  Treal u22abs;
182  integer icmax;
183  Treal bnorm, cnorm, smini;
184 #define ci (equiv_0)
185 #define cr (equiv_1)
186  Treal bignum, bi1, bi2, br1, br2, smlnum, xi1, xi2, xr1, xr2,
187  ci21, ci22, cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
188 #define civ (equiv_0)
189  Treal csr, ur11, ur12, ur22;
190 #define crv (equiv_1)
191 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
192 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
193 #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]
194 #define ci_ref(a_1,a_2) ci[(a_2)*2 + a_1 - 3]
195 #define cr_ref(a_1,a_2) cr[(a_2)*2 + a_1 - 3]
196 #define ipivot_ref(a_1,a_2) ipivot[(a_2)*4 + a_1 - 5]
197 
198  a_dim1 = *lda;
199  a_offset = 1 + a_dim1 * 1;
200  a -= a_offset;
201  b_dim1 = *ldb;
202  b_offset = 1 + b_dim1 * 1;
203  b -= b_offset;
204  x_dim1 = *ldx;
205  x_offset = 1 + x_dim1 * 1;
206  x -= x_offset;
207 
208  /* Function Body
209 
210  Compute BIGNUM */
211 
212  smlnum = 2. * template_lapack_lamch("Safe minimum", (Treal)0);
213  bignum = 1. / smlnum;
214  smini = maxMACRO(*smin,smlnum);
215 
216 /* Don't check for input errors */
217 
218  *info = 0;
219 
220 /* Standard Initializations */
221 
222  *scale = 1.;
223 
224  if (*na == 1) {
225 
226 /* 1 x 1 (i.e., scalar) system C X = B */
227 
228  if (*nw == 1) {
229 
230 /* Real 1x1 system.
231 
232  C = ca A - w D */
233 
234  csr = *ca * a_ref(1, 1) - *wr * *d1;
235  cnorm = absMACRO(csr);
236 
237 /* If | C | < SMINI, use C = SMINI */
238 
239  if (cnorm < smini) {
240  csr = smini;
241  cnorm = smini;
242  *info = 1;
243  }
244 
245 /* Check scaling for X = B / C */
246 
247  bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1));
248  if (cnorm < 1. && bnorm > 1.) {
249  if (bnorm > bignum * cnorm) {
250  *scale = 1. / bnorm;
251  }
252  }
253 
254 /* Compute X */
255 
256  x_ref(1, 1) = b_ref(1, 1) * *scale / csr;
257  *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1));
258  } else {
259 
260 /* Complex 1x1 system (w is complex)
261 
262  C = ca A - w D */
263 
264  csr = *ca * a_ref(1, 1) - *wr * *d1;
265  csi = -(*wi) * *d1;
266  cnorm = absMACRO(csr) + absMACRO(csi);
267 
268 /* If | C | < SMINI, use C = SMINI */
269 
270  if (cnorm < smini) {
271  csr = smini;
272  csi = 0.;
273  cnorm = smini;
274  *info = 1;
275  }
276 
277 /* Check scaling for X = B / C */
278 
279  bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2),
280  absMACRO(d__2));
281  if (cnorm < 1. && bnorm > 1.) {
282  if (bnorm > bignum * cnorm) {
283  *scale = 1. / bnorm;
284  }
285  }
286 
287 /* Compute X */
288 
289  d__1 = *scale * b_ref(1, 1);
290  d__2 = *scale * b_ref(1, 2);
291  template_lapack_ladiv(&d__1, &d__2, &csr, &csi, &x_ref(1, 1), &x_ref(1, 2));
292  *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1)) + (d__2 = x_ref(1, 2),
293  absMACRO(d__2));
294  }
295 
296  } else {
297 
298 /* 2x2 System
299 
300  Compute the real part of C = ca A - w D (or ca A' - w D ) */
301 
302  cr_ref(1, 1) = *ca * a_ref(1, 1) - *wr * *d1;
303  cr_ref(2, 2) = *ca * a_ref(2, 2) - *wr * *d2;
304  if (*ltrans) {
305  cr_ref(1, 2) = *ca * a_ref(2, 1);
306  cr_ref(2, 1) = *ca * a_ref(1, 2);
307  } else {
308  cr_ref(2, 1) = *ca * a_ref(2, 1);
309  cr_ref(1, 2) = *ca * a_ref(1, 2);
310  }
311 
312  if (*nw == 1) {
313 
314 /* Real 2x2 system (w is real)
315 
316  Find the largest element in C */
317 
318  cmax = 0.;
319  icmax = 0;
320 
321  for (j = 1; j <= 4; ++j) {
322  if ((d__1 = crv[j - 1], absMACRO(d__1)) > cmax) {
323  cmax = (d__1 = crv[j - 1], absMACRO(d__1));
324  icmax = j;
325  }
326 /* L10: */
327  }
328 
329 /* If norm(C) < SMINI, use SMINI*identity. */
330 
331  if (cmax < smini) {
332 /* Computing MAX */
333  d__3 = (d__1 = b_ref(1, 1), absMACRO(d__1)), d__4 = (d__2 = b_ref(
334  2, 1), absMACRO(d__2));
335  bnorm = maxMACRO(d__3,d__4);
336  if (smini < 1. && bnorm > 1.) {
337  if (bnorm > bignum * smini) {
338  *scale = 1. / bnorm;
339  }
340  }
341  temp = *scale / smini;
342  x_ref(1, 1) = temp * b_ref(1, 1);
343  x_ref(2, 1) = temp * b_ref(2, 1);
344  *xnorm = temp * bnorm;
345  *info = 1;
346  return 0;
347  }
348 
349 /* Gaussian elimination with complete pivoting. */
350 
351  ur11 = crv[icmax - 1];
352  cr21 = crv[ipivot_ref(2, icmax) - 1];
353  ur12 = crv[ipivot_ref(3, icmax) - 1];
354  cr22 = crv[ipivot_ref(4, icmax) - 1];
355  ur11r = 1. / ur11;
356  lr21 = ur11r * cr21;
357  ur22 = cr22 - ur12 * lr21;
358 
359 /* If smaller pivot < SMINI, use SMINI */
360 
361  if (absMACRO(ur22) < smini) {
362  ur22 = smini;
363  *info = 1;
364  }
365  if (rswap[icmax - 1]) {
366  br1 = b_ref(2, 1);
367  br2 = b_ref(1, 1);
368  } else {
369  br1 = b_ref(1, 1);
370  br2 = b_ref(2, 1);
371  }
372  br2 -= lr21 * br1;
373 /* Computing MAX */
374  d__2 = (d__1 = br1 * (ur22 * ur11r), absMACRO(d__1)), d__3 = absMACRO(br2);
375  bbnd = maxMACRO(d__2,d__3);
376  if (bbnd > 1. && absMACRO(ur22) < 1.) {
377  if (bbnd >= bignum * absMACRO(ur22)) {
378  *scale = 1. / bbnd;
379  }
380  }
381 
382  xr2 = br2 * *scale / ur22;
383  xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
384  if (zswap[icmax - 1]) {
385  x_ref(1, 1) = xr2;
386  x_ref(2, 1) = xr1;
387  } else {
388  x_ref(1, 1) = xr1;
389  x_ref(2, 1) = xr2;
390  }
391 /* Computing MAX */
392  d__1 = absMACRO(xr1), d__2 = absMACRO(xr2);
393  *xnorm = maxMACRO(d__1,d__2);
394 
395 /* Further scaling if norm(A) norm(X) > overflow */
396 
397  if (*xnorm > 1. && cmax > 1.) {
398  if (*xnorm > bignum / cmax) {
399  temp = cmax / bignum;
400  x_ref(1, 1) = temp * x_ref(1, 1);
401  x_ref(2, 1) = temp * x_ref(2, 1);
402  *xnorm = temp * *xnorm;
403  *scale = temp * *scale;
404  }
405  }
406  } else {
407 
408 /* Complex 2x2 system (w is complex)
409 
410  Find the largest element in C */
411 
412  ci_ref(1, 1) = -(*wi) * *d1;
413  ci_ref(2, 1) = 0.;
414  ci_ref(1, 2) = 0.;
415  ci_ref(2, 2) = -(*wi) * *d2;
416  cmax = 0.;
417  icmax = 0;
418 
419  for (j = 1; j <= 4; ++j) {
420  if ((d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1], absMACRO(
421  d__2)) > cmax) {
422  cmax = (d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1]
423  , absMACRO(d__2));
424  icmax = j;
425  }
426 /* L20: */
427  }
428 
429 /* If norm(C) < SMINI, use SMINI*identity. */
430 
431  if (cmax < smini) {
432 /* Computing MAX */
433  d__5 = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2),
434  absMACRO(d__2)), d__6 = (d__3 = b_ref(2, 1), absMACRO(d__3)) + (
435  d__4 = b_ref(2, 2), absMACRO(d__4));
436  bnorm = maxMACRO(d__5,d__6);
437  if (smini < 1. && bnorm > 1.) {
438  if (bnorm > bignum * smini) {
439  *scale = 1. / bnorm;
440  }
441  }
442  temp = *scale / smini;
443  x_ref(1, 1) = temp * b_ref(1, 1);
444  x_ref(2, 1) = temp * b_ref(2, 1);
445  x_ref(1, 2) = temp * b_ref(1, 2);
446  x_ref(2, 2) = temp * b_ref(2, 2);
447  *xnorm = temp * bnorm;
448  *info = 1;
449  return 0;
450  }
451 
452 /* Gaussian elimination with complete pivoting. */
453 
454  ur11 = crv[icmax - 1];
455  ui11 = civ[icmax - 1];
456  cr21 = crv[ipivot_ref(2, icmax) - 1];
457  ci21 = civ[ipivot_ref(2, icmax) - 1];
458  ur12 = crv[ipivot_ref(3, icmax) - 1];
459  ui12 = civ[ipivot_ref(3, icmax) - 1];
460  cr22 = crv[ipivot_ref(4, icmax) - 1];
461  ci22 = civ[ipivot_ref(4, icmax) - 1];
462  if (icmax == 1 || icmax == 4) {
463 
464 /* Code when off-diagonals of pivoted C are real */
465 
466  if (absMACRO(ur11) > absMACRO(ui11)) {
467  temp = ui11 / ur11;
468 /* Computing 2nd power */
469  d__1 = temp;
470  ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
471  ui11r = -temp * ur11r;
472  } else {
473  temp = ur11 / ui11;
474 /* Computing 2nd power */
475  d__1 = temp;
476  ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
477  ur11r = -temp * ui11r;
478  }
479  lr21 = cr21 * ur11r;
480  li21 = cr21 * ui11r;
481  ur12s = ur12 * ur11r;
482  ui12s = ur12 * ui11r;
483  ur22 = cr22 - ur12 * lr21;
484  ui22 = ci22 - ur12 * li21;
485  } else {
486 
487 /* Code when diagonals of pivoted C are real */
488 
489  ur11r = 1. / ur11;
490  ui11r = 0.;
491  lr21 = cr21 * ur11r;
492  li21 = ci21 * ur11r;
493  ur12s = ur12 * ur11r;
494  ui12s = ui12 * ur11r;
495  ur22 = cr22 - ur12 * lr21 + ui12 * li21;
496  ui22 = -ur12 * li21 - ui12 * lr21;
497  }
498  u22abs = absMACRO(ur22) + absMACRO(ui22);
499 
500 /* If smaller pivot < SMINI, use SMINI */
501 
502  if (u22abs < smini) {
503  ur22 = smini;
504  ui22 = 0.;
505  *info = 1;
506  }
507  if (rswap[icmax - 1]) {
508  br2 = b_ref(1, 1);
509  br1 = b_ref(2, 1);
510  bi2 = b_ref(1, 2);
511  bi1 = b_ref(2, 2);
512  } else {
513  br1 = b_ref(1, 1);
514  br2 = b_ref(2, 1);
515  bi1 = b_ref(1, 2);
516  bi2 = b_ref(2, 2);
517  }
518  br2 = br2 - lr21 * br1 + li21 * bi1;
519  bi2 = bi2 - li21 * br1 - lr21 * bi1;
520 /* Computing MAX */
521  d__1 = (absMACRO(br1) + absMACRO(bi1)) * (u22abs * (absMACRO(ur11r) + absMACRO(ui11r))
522  ), d__2 = absMACRO(br2) + absMACRO(bi2);
523  bbnd = maxMACRO(d__1,d__2);
524  if (bbnd > 1. && u22abs < 1.) {
525  if (bbnd >= bignum * u22abs) {
526  *scale = 1. / bbnd;
527  br1 = *scale * br1;
528  bi1 = *scale * bi1;
529  br2 = *scale * br2;
530  bi2 = *scale * bi2;
531  }
532  }
533 
534  template_lapack_ladiv(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
535  xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
536  xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
537  if (zswap[icmax - 1]) {
538  x_ref(1, 1) = xr2;
539  x_ref(2, 1) = xr1;
540  x_ref(1, 2) = xi2;
541  x_ref(2, 2) = xi1;
542  } else {
543  x_ref(1, 1) = xr1;
544  x_ref(2, 1) = xr2;
545  x_ref(1, 2) = xi1;
546  x_ref(2, 2) = xi2;
547  }
548 /* Computing MAX */
549  d__1 = absMACRO(xr1) + absMACRO(xi1), d__2 = absMACRO(xr2) + absMACRO(xi2);
550  *xnorm = maxMACRO(d__1,d__2);
551 
552 /* Further scaling if norm(A) norm(X) > overflow */
553 
554  if (*xnorm > 1. && cmax > 1.) {
555  if (*xnorm > bignum / cmax) {
556  temp = cmax / bignum;
557  x_ref(1, 1) = temp * x_ref(1, 1);
558  x_ref(2, 1) = temp * x_ref(2, 1);
559  x_ref(1, 2) = temp * x_ref(1, 2);
560  x_ref(2, 2) = temp * x_ref(2, 2);
561  *xnorm = temp * *xnorm;
562  *scale = temp * *scale;
563  }
564  }
565  }
566  }
567 
568  return 0;
569 
570 /* End of DLALN2 */
571 
572 } /* dlaln2_ */
573 
574 #undef ipivot_ref
575 #undef cr_ref
576 #undef ci_ref
577 #undef x_ref
578 #undef b_ref
579 #undef a_ref
580 #undef crv
581 #undef civ
582 #undef cr
583 #undef ci
584 
585 
586 #endif
#define absMACRO(x)
Definition: template_blas_common.h:45
#define crv
int integer
Definition: template_blas_common.h:38
int template_lapack_ladiv(const Treal *a, const Treal *b, const Treal *c__, const Treal *d__, Treal *p, Treal *q)
Definition: template_lapack_ladiv.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
#define a_ref(a_1, a_2)
#define ipivot_ref(a_1, a_2)
#define b_ref(a_1, a_2)
#define x_ref(a_1, a_2)
#define cr_ref(a_1, a_2)
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
bool logical
Definition: template_blas_common.h:39
#define TRUE_
Definition: template_lapack_common.h:40
int template_lapack_laln2(const logical *ltrans, const integer *na, const integer *nw, const Treal *smin, const Treal *ca, const Treal *a, const integer *lda, const Treal *d1, const Treal *d2, const Treal *b, const integer *ldb, const Treal *wr, const Treal *wi, Treal *x, const integer *ldx, Treal *scale, Treal *xnorm, integer *info)
Definition: template_lapack_laln2.h:40
#define FALSE_
Definition: template_lapack_common.h:41
#define civ
#define ci_ref(a_1, a_2)