ergo
template_lapack_larrj.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LARRJ_HEADER
36 #define TEMPLATE_LAPACK_LARRJ_HEADER
37 
38 template<class Treal>
39 int template_lapack_larrj(integer *n, Treal *d__, Treal *e2,
40  integer *ifirst, integer *ilast, Treal *rtol, integer *offset,
41  Treal *w, Treal *werr, Treal *work, integer *iwork,
42  Treal *pivmin, Treal *spdiam, integer *info)
43 {
44  /* System generated locals */
45  integer i__1, i__2;
46  Treal d__1, d__2;
47 
48 
49  /* Local variables */
50  integer i__, j, k, p;
51  Treal s;
52  integer i1, i2, ii;
53  Treal fac, mid;
54  integer cnt;
55  Treal tmp, left;
56  integer iter, nint, prev, next, savi1;
57  Treal right, width, dplus;
58  integer olnint, maxitr;
59 
60 
61 /* -- LAPACK auxiliary routine (version 3.2) -- */
62 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
63 /* November 2006 */
64 
65 /* .. Scalar Arguments .. */
66 /* .. */
67 /* .. Array Arguments .. */
68 /* .. */
69 
70 /* Purpose */
71 /* ======= */
72 
73 /* Given the initial eigenvalue approximations of T, DLARRJ */
74 /* does bisection to refine the eigenvalues of T, */
75 /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
76 /* guesses for these eigenvalues are input in W, the corresponding estimate */
77 /* of the error in these guesses in WERR. During bisection, intervals */
78 /* [left, right] are maintained by storing their mid-points and */
79 /* semi-widths in the arrays W and WERR respectively. */
80 
81 /* Arguments */
82 /* ========= */
83 
84 /* N (input) INTEGER */
85 /* The order of the matrix. */
86 
87 /* D (input) DOUBLE PRECISION array, dimension (N) */
88 /* The N diagonal elements of T. */
89 
90 /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */
91 /* The Squares of the (N-1) subdiagonal elements of T. */
92 
93 /* IFIRST (input) INTEGER */
94 /* The index of the first eigenvalue to be computed. */
95 
96 /* ILAST (input) INTEGER */
97 /* The index of the last eigenvalue to be computed. */
98 
99 /* RTOL (input) DOUBLE PRECISION */
100 /* Tolerance for the convergence of the bisection intervals. */
101 /* An interval [LEFT,RIGHT] has converged if */
102 /* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */
103 
104 /* OFFSET (input) INTEGER */
105 /* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */
106 /* through ILAST-OFFSET elements of these arrays are to be used. */
107 
108 /* W (input/output) DOUBLE PRECISION array, dimension (N) */
109 /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
110 /* estimates of the eigenvalues of L D L^T indexed IFIRST through */
111 /* ILAST. */
112 /* On output, these estimates are refined. */
113 
114 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */
115 /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
116 /* the errors in the estimates of the corresponding elements in W. */
117 /* On output, these errors are refined. */
118 
119 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */
120 /* Workspace. */
121 
122 /* IWORK (workspace) INTEGER array, dimension (2*N) */
123 /* Workspace. */
124 
125 /* PIVMIN (input) DOUBLE PRECISION */
126 /* The minimum pivot in the Sturm sequence for T. */
127 
128 /* SPDIAM (input) DOUBLE PRECISION */
129 /* The spectral diameter of T. */
130 
131 /* INFO (output) INTEGER */
132 /* Error flag. */
133 
134 /* Further Details */
135 /* =============== */
136 
137 /* Based on contributions by */
138 /* Beresford Parlett, University of California, Berkeley, USA */
139 /* Jim Demmel, University of California, Berkeley, USA */
140 /* Inderjit Dhillon, University of Texas, Austin, USA */
141 /* Osni Marques, LBNL/NERSC, USA */
142 /* Christof Voemel, University of California, Berkeley, USA */
143 
144 /* ===================================================================== */
145 
146 /* .. Parameters .. */
147 /* .. */
148 /* .. Local Scalars .. */
149 
150 /* .. */
151 /* .. Intrinsic Functions .. */
152 /* .. */
153 /* .. Executable Statements .. */
154 
155  /* Parameter adjustments */
156  --iwork;
157  --work;
158  --werr;
159  --w;
160  --e2;
161  --d__;
162 
163  /* Function Body */
164  *info = 0;
165 
166  maxitr = (integer) ((template_blas_log(*spdiam + *pivmin) - template_blas_log(*pivmin)) / template_blas_log(2.)) +
167  2;
168 
169 /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
170 /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
171 /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */
172 /* for an unconverged interval is set to the index of the next unconverged */
173 /* interval, and is -1 or 0 for a converged interval. Thus a linked */
174 /* list of unconverged intervals is set up. */
175 
176  i1 = *ifirst;
177  i2 = *ilast;
178 /* The number of unconverged intervals */
179  nint = 0;
180 /* The last unconverged interval found */
181  prev = 0;
182  i__1 = i2;
183  for (i__ = i1; i__ <= i__1; ++i__) {
184  k = i__ << 1;
185  ii = i__ - *offset;
186  left = w[ii] - werr[ii];
187  mid = w[ii];
188  right = w[ii] + werr[ii];
189  width = right - mid;
190 /* Computing MAX */
191  d__1 = absMACRO(left), d__2 = absMACRO(right);
192  tmp = maxMACRO(d__1,d__2);
193 /* The following test prevents the test of converged intervals */
194  if (width < *rtol * tmp) {
195 /* This interval has already converged and does not need refinement. */
196 /* (Note that the gaps might change through refining the */
197 /* eigenvalues, however, they can only get bigger.) */
198 /* Remove it from the list. */
199  iwork[k - 1] = -1;
200 /* Make sure that I1 always points to the first unconverged interval */
201  if (i__ == i1 && i__ < i2) {
202  i1 = i__ + 1;
203  }
204  if (prev >= i1 && i__ <= i2) {
205  iwork[(prev << 1) - 1] = i__ + 1;
206  }
207  } else {
208 /* unconverged interval found */
209  prev = i__;
210 /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
211 
212 /* Do while( CNT(LEFT).GT.I-1 ) */
213 
214  fac = 1.;
215 L20:
216  cnt = 0;
217  s = left;
218  dplus = d__[1] - s;
219  if (dplus < 0.) {
220  ++cnt;
221  }
222  i__2 = *n;
223  for (j = 2; j <= i__2; ++j) {
224  dplus = d__[j] - s - e2[j - 1] / dplus;
225  if (dplus < 0.) {
226  ++cnt;
227  }
228 /* L30: */
229  }
230  if (cnt > i__ - 1) {
231  left -= werr[ii] * fac;
232  fac *= 2.;
233  goto L20;
234  }
235 
236 /* Do while( CNT(RIGHT).LT.I ) */
237 
238  fac = 1.;
239 L50:
240  cnt = 0;
241  s = right;
242  dplus = d__[1] - s;
243  if (dplus < 0.) {
244  ++cnt;
245  }
246  i__2 = *n;
247  for (j = 2; j <= i__2; ++j) {
248  dplus = d__[j] - s - e2[j - 1] / dplus;
249  if (dplus < 0.) {
250  ++cnt;
251  }
252 /* L60: */
253  }
254  if (cnt < i__) {
255  right += werr[ii] * fac;
256  fac *= 2.;
257  goto L50;
258  }
259  ++nint;
260  iwork[k - 1] = i__ + 1;
261  iwork[k] = cnt;
262  }
263  work[k - 1] = left;
264  work[k] = right;
265 /* L75: */
266  }
267  savi1 = i1;
268 
269 /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
270 /* and while (ITER.LT.MAXITR) */
271 
272  iter = 0;
273 L80:
274  prev = i1 - 1;
275  i__ = i1;
276  olnint = nint;
277  i__1 = olnint;
278  for (p = 1; p <= i__1; ++p) {
279  k = i__ << 1;
280  ii = i__ - *offset;
281  next = iwork[k - 1];
282  left = work[k - 1];
283  right = work[k];
284  mid = (left + right) * .5;
285 /* semiwidth of interval */
286  width = right - mid;
287 /* Computing MAX */
288  d__1 = absMACRO(left), d__2 = absMACRO(right);
289  tmp = maxMACRO(d__1,d__2);
290  if (width < *rtol * tmp || iter == maxitr) {
291 /* reduce number of unconverged intervals */
292  --nint;
293 /* Mark interval as converged. */
294  iwork[k - 1] = 0;
295  if (i1 == i__) {
296  i1 = next;
297  } else {
298 /* Prev holds the last unconverged interval previously examined */
299  if (prev >= i1) {
300  iwork[(prev << 1) - 1] = next;
301  }
302  }
303  i__ = next;
304  goto L100;
305  }
306  prev = i__;
307 
308 /* Perform one bisection step */
309 
310  cnt = 0;
311  s = mid;
312  dplus = d__[1] - s;
313  if (dplus < 0.) {
314  ++cnt;
315  }
316  i__2 = *n;
317  for (j = 2; j <= i__2; ++j) {
318  dplus = d__[j] - s - e2[j - 1] / dplus;
319  if (dplus < 0.) {
320  ++cnt;
321  }
322 /* L90: */
323  }
324  if (cnt <= i__ - 1) {
325  work[k - 1] = mid;
326  } else {
327  work[k] = mid;
328  }
329  i__ = next;
330 L100:
331  ;
332  }
333  ++iter;
334 /* do another loop if there are still unconverged intervals */
335 /* However, in the last iteration, all intervals are accepted */
336 /* since this is the best we can do. */
337  if (nint > 0 && iter <= maxitr) {
338  goto L80;
339  }
340 
341 
342 /* At this point, all the intervals have converged */
343  i__1 = *ilast;
344  for (i__ = savi1; i__ <= i__1; ++i__) {
345  k = i__ << 1;
346  ii = i__ - *offset;
347 /* All intervals marked by '0' have been refined. */
348  if (iwork[k - 1] == 0) {
349  w[ii] = (work[k - 1] + work[k]) * .5;
350  werr[ii] = work[k] - w[ii];
351  }
352 /* L110: */
353  }
354 
355  return 0;
356 
357 /* End of DLARRJ */
358 
359 } /* dlarrj_ */
360 
361 #endif
#define absMACRO(x)
Definition: template_blas_common.h:45
Definition: Matrix.h:73
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
Treal template_blas_log(Treal x)
int template_lapack_larrj(integer *n, Treal *d__, Treal *e2, integer *ifirst, integer *ilast, Treal *rtol, integer *offset, Treal *w, Treal *werr, Treal *work, integer *iwork, Treal *pivmin, Treal *spdiam, integer *info)
Definition: template_lapack_larrj.h:39
Definition: Matrix.h:73