001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.descriptive.rank; 018 019 import java.io.Serializable; 020 import java.util.Arrays; 021 022 import org.apache.commons.math.MathRuntimeException; 023 import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic; 024 025 /** 026 * Provides percentile computation. 027 * <p> 028 * There are several commonly used methods for estimating percentiles (a.k.a. 029 * quantiles) based on sample data. For large samples, the different methods 030 * agree closely, but when sample sizes are small, different methods will give 031 * significantly different results. The algorithm implemented here works as follows: 032 * <ol> 033 * <li>Let <code>n</code> be the length of the (sorted) array and 034 * <code>0 < p <= 100</code> be the desired percentile.</li> 035 * <li>If <code> n = 1 </code> return the unique array element (regardless of 036 * the value of <code>p</code>); otherwise </li> 037 * <li>Compute the estimated percentile position 038 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> 039 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional 040 * part of <code>pos</code>). If <code>pos >= n</code> return the largest 041 * element in the array; otherwise</li> 042 * <li>Let <code>lower</code> be the element in position 043 * <code>floor(pos)</code> in the array and let <code>upper</code> be the 044 * next element in the array. Return <code>lower + d * (upper - lower)</code> 045 * </li> 046 * </ol></p> 047 * <p> 048 * To compute percentiles, the data must be (totally) ordered. Input arrays 049 * are copied and then sorted using {@link java.util.Arrays#sort(double[])}. 050 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined 051 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes 052 * <code>Double.NaN</code> larger than any other value (including 053 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median 054 * (50th percentile) of 055 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p> 056 * <p> 057 * Since percentile estimation usually involves interpolation between array 058 * elements, arrays containing <code>NaN</code> or infinite values will often 059 * result in <code>NaN<code> or infinite values returned.</p> 060 * <p> 061 * <strong>Note that this implementation is not synchronized.</strong> If 062 * multiple threads access an instance of this class concurrently, and at least 063 * one of the threads invokes the <code>increment()</code> or 064 * <code>clear()</code> method, it must be synchronized externally.</p> 065 * 066 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $ 067 */ 068 public class Percentile extends AbstractUnivariateStatistic implements Serializable { 069 070 /** Serializable version identifier */ 071 private static final long serialVersionUID = -8091216485095130416L; 072 073 /** Determines what percentile is computed when evaluate() is activated 074 * with no quantile argument */ 075 private double quantile = 0.0; 076 077 /** 078 * Constructs a Percentile with a default quantile 079 * value of 50.0. 080 */ 081 public Percentile() { 082 this(50.0); 083 } 084 085 /** 086 * Constructs a Percentile with the specific quantile value. 087 * @param p the quantile 088 * @throws IllegalArgumentException if p is not greater than 0 and less 089 * than or equal to 100 090 */ 091 public Percentile(final double p) { 092 setQuantile(p); 093 } 094 095 /** 096 * Copy constructor, creates a new {@code Percentile} identical 097 * to the {@code original} 098 * 099 * @param original the {@code Percentile} instance to copy 100 */ 101 public Percentile(Percentile original) { 102 copy(original, this); 103 } 104 105 /** 106 * Returns an estimate of the <code>p</code>th percentile of the values 107 * in the <code>values</code> array. 108 * <p> 109 * Calls to this method do not modify the internal <code>quantile</code> 110 * state of this statistic.</p> 111 * <p> 112 * <ul> 113 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 114 * <code>0</code></li> 115 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 116 * if <code>values</code> has length <code>1</code></li> 117 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 118 * is null or p is not a valid quantile value (p must be greater than 0 119 * and less than or equal to 100) </li> 120 * </ul></p> 121 * <p> 122 * See {@link Percentile} for a description of the percentile estimation 123 * algorithm used.</p> 124 * 125 * @param values input array of values 126 * @param p the percentile value to compute 127 * @return the percentile value or Double.NaN if the array is empty 128 * @throws IllegalArgumentException if <code>values</code> is null 129 * or p is invalid 130 */ 131 public double evaluate(final double[] values, final double p) { 132 test(values, 0, 0); 133 return evaluate(values, 0, values.length, p); 134 } 135 136 /** 137 * Returns an estimate of the <code>quantile</code>th percentile of the 138 * designated values in the <code>values</code> array. The quantile 139 * estimated is determined by the <code>quantile</code> property. 140 * <p> 141 * <ul> 142 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 143 * <li>Returns (for any value of <code>quantile</code>) 144 * <code>values[begin]</code> if <code>length = 1 </code></li> 145 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 146 * is null, or <code>start</code> or <code>length</code> 147 * is invalid</li> 148 * </ul></p> 149 * <p> 150 * See {@link Percentile} for a description of the percentile estimation 151 * algorithm used.</p> 152 * 153 * @param values the input array 154 * @param start index of the first array element to include 155 * @param length the number of elements to include 156 * @return the percentile value 157 * @throws IllegalArgumentException if the parameters are not valid 158 * 159 */ 160 @Override 161 public double evaluate( final double[] values, final int start, final int length) { 162 return evaluate(values, start, length, quantile); 163 } 164 165 /** 166 * Returns an estimate of the <code>p</code>th percentile of the values 167 * in the <code>values</code> array, starting with the element in (0-based) 168 * position <code>begin</code> in the array and including <code>length</code> 169 * values. 170 * <p> 171 * Calls to this method do not modify the internal <code>quantile</code> 172 * state of this statistic.</p> 173 * <p> 174 * <ul> 175 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 176 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 177 * if <code>length = 1 </code></li> 178 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 179 * is null , <code>begin</code> or <code>length</code> is invalid, or 180 * <code>p</code> is not a valid quantile value (p must be greater than 0 181 * and less than or equal to 100)</li> 182 * </ul></p> 183 * <p> 184 * See {@link Percentile} for a description of the percentile estimation 185 * algorithm used.</p> 186 * 187 * @param values array of input values 188 * @param p the percentile to compute 189 * @param begin the first (0-based) element to include in the computation 190 * @param length the number of array elements to include 191 * @return the percentile value 192 * @throws IllegalArgumentException if the parameters are not valid or the 193 * input array is null 194 */ 195 public double evaluate(final double[] values, final int begin, 196 final int length, final double p) { 197 198 test(values, begin, length); 199 200 if ((p > 100) || (p <= 0)) { 201 throw MathRuntimeException.createIllegalArgumentException( 202 "out of bounds quantile value: {0}, must be in (0, 100]", p); 203 } 204 if (length == 0) { 205 return Double.NaN; 206 } 207 if (length == 1) { 208 return values[begin]; // always return single value for n = 1 209 } 210 double n = length; 211 double pos = p * (n + 1) / 100; 212 double fpos = Math.floor(pos); 213 int intPos = (int) fpos; 214 double dif = pos - fpos; 215 double[] sorted = new double[length]; 216 System.arraycopy(values, begin, sorted, 0, length); 217 Arrays.sort(sorted); 218 219 if (pos < 1) { 220 return sorted[0]; 221 } 222 if (pos >= n) { 223 return sorted[length - 1]; 224 } 225 double lower = sorted[intPos - 1]; 226 double upper = sorted[intPos]; 227 return lower + dif * (upper - lower); 228 } 229 230 /** 231 * Returns the value of the quantile field (determines what percentile is 232 * computed when evaluate() is called with no quantile argument). 233 * 234 * @return quantile 235 */ 236 public double getQuantile() { 237 return quantile; 238 } 239 240 /** 241 * Sets the value of the quantile field (determines what percentile is 242 * computed when evaluate() is called with no quantile argument). 243 * 244 * @param p a value between 0 < p <= 100 245 * @throws IllegalArgumentException if p is not greater than 0 and less 246 * than or equal to 100 247 */ 248 public void setQuantile(final double p) { 249 if (p <= 0 || p > 100) { 250 throw MathRuntimeException.createIllegalArgumentException( 251 "out of bounds quantile value: {0}, must be in (0, 100]", p); 252 } 253 quantile = p; 254 } 255 256 /** 257 * {@inheritDoc} 258 */ 259 @Override 260 public Percentile copy() { 261 Percentile result = new Percentile(); 262 copy(this, result); 263 return result; 264 } 265 266 /** 267 * Copies source to dest. 268 * <p>Neither source nor dest can be null.</p> 269 * 270 * @param source Percentile to copy 271 * @param dest Percentile to copy to 272 * @throws NullPointerException if either source or dest is null 273 */ 274 public static void copy(Percentile source, Percentile dest) { 275 dest.quantile = source.quantile; 276 } 277 278 }