001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.analysis.interpolation; 018 019 import java.io.Serializable; 020 import java.util.Arrays; 021 022 import org.apache.commons.math.MathException; 023 import org.apache.commons.math.analysis.polynomials.PolynomialSplineFunction; 024 025 /** 026 * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression"> 027 * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of 028 * real univariate functions. 029 * <p/> 030 * For reference, see 031 * <a href="http://www.math.tau.ac.il/~yekutiel/MA seminar/Cleveland 1979.pdf"> 032 * William S. Cleveland - Robust Locally Weighted Regression and Smoothing 033 * Scatterplots</a> 034 * <p/> 035 * This class implements both the loess method and serves as an interpolation 036 * adapter to it, allowing to build a spline on the obtained loess fit. 037 * 038 * @version $Revision: 794709 $ $Date: 2009-07-16 11:09:02 -0400 (Thu, 16 Jul 2009) $ 039 * @since 2.0 040 */ 041 public class LoessInterpolator 042 implements UnivariateRealInterpolator, Serializable { 043 044 /** serializable version identifier. */ 045 private static final long serialVersionUID = 5204927143605193821L; 046 047 /** 048 * Default value of the bandwidth parameter. 049 */ 050 public static final double DEFAULT_BANDWIDTH = 0.3; 051 /** 052 * Default value of the number of robustness iterations. 053 */ 054 public static final int DEFAULT_ROBUSTNESS_ITERS = 2; 055 056 /** 057 * The bandwidth parameter: when computing the loess fit at 058 * a particular point, this fraction of source points closest 059 * to the current point is taken into account for computing 060 * a least-squares regression. 061 * <p/> 062 * A sensible value is usually 0.25 to 0.5. 063 */ 064 private final double bandwidth; 065 066 /** 067 * The number of robustness iterations parameter: this many 068 * robustness iterations are done. 069 * <p/> 070 * A sensible value is usually 0 (just the initial fit without any 071 * robustness iterations) to 4. 072 */ 073 private final int robustnessIters; 074 075 /** 076 * Constructs a new {@link LoessInterpolator} 077 * with a bandwidth of {@link #DEFAULT_BANDWIDTH} and 078 * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations. 079 * See {@link #LoessInterpolator(double, int)} for an explanation of 080 * the parameters. 081 */ 082 public LoessInterpolator() { 083 this.bandwidth = DEFAULT_BANDWIDTH; 084 this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS; 085 } 086 087 /** 088 * Constructs a new {@link LoessInterpolator} 089 * with given bandwidth and number of robustness iterations. 090 * 091 * @param bandwidth when computing the loess fit at 092 * a particular point, this fraction of source points closest 093 * to the current point is taken into account for computing 094 * a least-squares regression.</br> 095 * A sensible value is usually 0.25 to 0.5, the default value is 096 * {@link #DEFAULT_BANDWIDTH}. 097 * @param robustnessIters This many robustness iterations are done.</br> 098 * A sensible value is usually 0 (just the initial fit without any 099 * robustness iterations) to 4, the default value is 100 * {@link #DEFAULT_ROBUSTNESS_ITERS}. 101 * @throws MathException if bandwidth does not lie in the interval [0,1] 102 * or if robustnessIters is negative. 103 */ 104 public LoessInterpolator(double bandwidth, int robustnessIters) throws MathException { 105 if (bandwidth < 0 || bandwidth > 1) { 106 throw new MathException("bandwidth must be in the interval [0,1], but got {0}", 107 bandwidth); 108 } 109 this.bandwidth = bandwidth; 110 if (robustnessIters < 0) { 111 throw new MathException("the number of robustness iterations must " + 112 "be non-negative, but got {0}", 113 robustnessIters); 114 } 115 this.robustnessIters = robustnessIters; 116 } 117 118 /** 119 * Compute an interpolating function by performing a loess fit 120 * on the data at the original abscissae and then building a cubic spline 121 * with a 122 * {@link org.apache.commons.math.analysis.interpolation.SplineInterpolator} 123 * on the resulting fit. 124 * 125 * @param xval the arguments for the interpolation points 126 * @param yval the values for the interpolation points 127 * @return A cubic spline built upon a loess fit to the data at the original abscissae 128 * @throws MathException if some of the following conditions are false: 129 * <ul> 130 * <li> Arguments and values are of the same size that is greater than zero</li> 131 * <li> The arguments are in a strictly increasing order</li> 132 * <li> All arguments and values are finite real numbers</li> 133 * </ul> 134 */ 135 public final PolynomialSplineFunction interpolate( 136 final double[] xval, final double[] yval) throws MathException { 137 return new SplineInterpolator().interpolate(xval, smooth(xval, yval)); 138 } 139 140 /** 141 * Compute a loess fit on the data at the original abscissae. 142 * 143 * @param xval the arguments for the interpolation points 144 * @param yval the values for the interpolation points 145 * @return values of the loess fit at corresponding original abscissae 146 * @throws MathException if some of the following conditions are false: 147 * <ul> 148 * <li> Arguments and values are of the same size that is greater than zero</li> 149 * <li> The arguments are in a strictly increasing order</li> 150 * <li> All arguments and values are finite real numbers</li> 151 * </ul> 152 */ 153 public final double[] smooth(final double[] xval, final double[] yval) 154 throws MathException { 155 if (xval.length != yval.length) { 156 throw new MathException( 157 "Loess expects the abscissa and ordinate arrays " + 158 "to be of the same size, " + 159 "but got {0} abscisssae and {1} ordinatae", 160 xval.length, yval.length); 161 } 162 163 final int n = xval.length; 164 165 if (n == 0) { 166 throw new MathException("Loess expects at least 1 point"); 167 } 168 169 checkAllFiniteReal(xval, true); 170 checkAllFiniteReal(yval, false); 171 172 checkStrictlyIncreasing(xval); 173 174 if (n == 1) { 175 return new double[]{yval[0]}; 176 } 177 178 if (n == 2) { 179 return new double[]{yval[0], yval[1]}; 180 } 181 182 int bandwidthInPoints = (int) (bandwidth * n); 183 184 if (bandwidthInPoints < 2) { 185 throw new MathException( 186 "the bandwidth must be large enough to " + 187 "accomodate at least 2 points. There are {0} " + 188 " data points, and bandwidth must be at least {1} " + 189 " but it is only {2}", 190 n, 2.0 / n, bandwidth); 191 } 192 193 final double[] res = new double[n]; 194 195 final double[] residuals = new double[n]; 196 final double[] sortedResiduals = new double[n]; 197 198 final double[] robustnessWeights = new double[n]; 199 200 // Do an initial fit and 'robustnessIters' robustness iterations. 201 // This is equivalent to doing 'robustnessIters+1' robustness iterations 202 // starting with all robustness weights set to 1. 203 Arrays.fill(robustnessWeights, 1); 204 205 for (int iter = 0; iter <= robustnessIters; ++iter) { 206 final int[] bandwidthInterval = {0, bandwidthInPoints - 1}; 207 // At each x, compute a local weighted linear regression 208 for (int i = 0; i < n; ++i) { 209 final double x = xval[i]; 210 211 // Find out the interval of source points on which 212 // a regression is to be made. 213 if (i > 0) { 214 updateBandwidthInterval(xval, i, bandwidthInterval); 215 } 216 217 final int ileft = bandwidthInterval[0]; 218 final int iright = bandwidthInterval[1]; 219 220 // Compute the point of the bandwidth interval that is 221 // farthest from x 222 final int edge; 223 if (xval[i] - xval[ileft] > xval[iright] - xval[i]) { 224 edge = ileft; 225 } else { 226 edge = iright; 227 } 228 229 // Compute a least-squares linear fit weighted by 230 // the product of robustness weights and the tricube 231 // weight function. 232 // See http://en.wikipedia.org/wiki/Linear_regression 233 // (section "Univariate linear case") 234 // and http://en.wikipedia.org/wiki/Weighted_least_squares 235 // (section "Weighted least squares") 236 double sumWeights = 0; 237 double sumX = 0, sumXSquared = 0, sumY = 0, sumXY = 0; 238 double denom = Math.abs(1.0 / (xval[edge] - x)); 239 for (int k = ileft; k <= iright; ++k) { 240 final double xk = xval[k]; 241 final double yk = yval[k]; 242 double dist; 243 if (k < i) { 244 dist = (x - xk); 245 } else { 246 dist = (xk - x); 247 } 248 final double w = tricube(dist * denom) * robustnessWeights[k]; 249 final double xkw = xk * w; 250 sumWeights += w; 251 sumX += xkw; 252 sumXSquared += xk * xkw; 253 sumY += yk * w; 254 sumXY += yk * xkw; 255 } 256 257 final double meanX = sumX / sumWeights; 258 final double meanY = sumY / sumWeights; 259 final double meanXY = sumXY / sumWeights; 260 final double meanXSquared = sumXSquared / sumWeights; 261 262 final double beta; 263 if (meanXSquared == meanX * meanX) { 264 beta = 0; 265 } else { 266 beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX); 267 } 268 269 final double alpha = meanY - beta * meanX; 270 271 res[i] = beta * x + alpha; 272 residuals[i] = Math.abs(yval[i] - res[i]); 273 } 274 275 // No need to recompute the robustness weights at the last 276 // iteration, they won't be needed anymore 277 if (iter == robustnessIters) { 278 break; 279 } 280 281 // Recompute the robustness weights. 282 283 // Find the median residual. 284 // An arraycopy and a sort are completely tractable here, 285 // because the preceding loop is a lot more expensive 286 System.arraycopy(residuals, 0, sortedResiduals, 0, n); 287 Arrays.sort(sortedResiduals); 288 final double medianResidual = sortedResiduals[n / 2]; 289 290 if (medianResidual == 0) { 291 break; 292 } 293 294 for (int i = 0; i < n; ++i) { 295 final double arg = residuals[i] / (6 * medianResidual); 296 robustnessWeights[i] = (arg >= 1) ? 0 : Math.pow(1 - arg * arg, 2); 297 } 298 } 299 300 return res; 301 } 302 303 /** 304 * Given an index interval into xval that embraces a certain number of 305 * points closest to xval[i-1], update the interval so that it embraces 306 * the same number of points closest to xval[i] 307 * 308 * @param xval arguments array 309 * @param i the index around which the new interval should be computed 310 * @param bandwidthInterval a two-element array {left, right} such that: <p/> 311 * <tt>(left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])</tt> 312 * <p/> and also <p/> 313 * <tt>(right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])</tt>. 314 * The array will be updated. 315 */ 316 private static void updateBandwidthInterval(final double[] xval, final int i, 317 final int[] bandwidthInterval) { 318 final int left = bandwidthInterval[0]; 319 final int right = bandwidthInterval[1]; 320 321 // The right edge should be adjusted if the next point to the right 322 // is closer to xval[i] than the leftmost point of the current interval 323 if (right < xval.length - 1 && 324 xval[right+1] - xval[i] < xval[i] - xval[left]) { 325 bandwidthInterval[0]++; 326 bandwidthInterval[1]++; 327 } 328 } 329 330 /** 331 * Compute the 332 * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a> 333 * weight function 334 * 335 * @param x the argument 336 * @return (1-|x|^3)^3 337 */ 338 private static double tricube(final double x) { 339 final double tmp = 1 - x * x * x; 340 return tmp * tmp * tmp; 341 } 342 343 /** 344 * Check that all elements of an array are finite real numbers. 345 * 346 * @param values the values array 347 * @param isAbscissae if true, elements are abscissae otherwise they are ordinatae 348 * @throws MathException if one of the values is not 349 * a finite real number 350 */ 351 private static void checkAllFiniteReal(final double[] values, final boolean isAbscissae) 352 throws MathException { 353 for (int i = 0; i < values.length; i++) { 354 final double x = values[i]; 355 if (Double.isInfinite(x) || Double.isNaN(x)) { 356 final String pattern = isAbscissae ? 357 "all abscissae must be finite real numbers, but {0}-th is {1}" : 358 "all ordinatae must be finite real numbers, but {0}-th is {1}"; 359 throw new MathException(pattern, i, x); 360 } 361 } 362 } 363 364 /** 365 * Check that elements of the abscissae array are in a strictly 366 * increasing order. 367 * 368 * @param xval the abscissae array 369 * @throws MathException if the abscissae array 370 * is not in a strictly increasing order 371 */ 372 private static void checkStrictlyIncreasing(final double[] xval) 373 throws MathException { 374 for (int i = 0; i < xval.length; ++i) { 375 if (i >= 1 && xval[i - 1] >= xval[i]) { 376 throw new MathException( 377 "the abscissae array must be sorted in a strictly " + 378 "increasing order, but the {0}-th element is {1} " + 379 "whereas {2}-th is {3}", 380 i - 1, xval[i - 1], i, xval[i]); 381 } 382 } 383 } 384 }