001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math.distribution; 019 020 import java.io.Serializable; 021 022 import org.apache.commons.math.MathRuntimeException; 023 import org.apache.commons.math.exception.util.LocalizedFormats; 024 import org.apache.commons.math.util.FastMath; 025 026 /** 027 * Implementation for the {@link ZipfDistribution}. 028 * 029 * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $ 030 */ 031 public class ZipfDistributionImpl extends AbstractIntegerDistribution 032 implements ZipfDistribution, Serializable { 033 034 /** Serializable version identifier. */ 035 private static final long serialVersionUID = -140627372283420404L; 036 037 /** Number of elements. */ 038 private int numberOfElements; 039 040 /** Exponent parameter of the distribution. */ 041 private double exponent; 042 043 /** 044 * Create a new Zipf distribution with the given number of elements and 045 * exponent. Both values must be positive; otherwise an 046 * <code>IllegalArgumentException</code> is thrown. 047 * 048 * @param numberOfElements the number of elements 049 * @param exponent the exponent 050 * @exception IllegalArgumentException if n ≤ 0 or s ≤ 0.0 051 */ 052 public ZipfDistributionImpl(final int numberOfElements, final double exponent) 053 throws IllegalArgumentException { 054 setNumberOfElementsInternal(numberOfElements); 055 setExponentInternal(exponent); 056 } 057 058 /** 059 * Get the number of elements (e.g. corpus size) for the distribution. 060 * 061 * @return the number of elements 062 */ 063 public int getNumberOfElements() { 064 return numberOfElements; 065 } 066 067 /** 068 * Set the number of elements (e.g. corpus size) for the distribution. 069 * The parameter value must be positive; otherwise an 070 * <code>IllegalArgumentException</code> is thrown. 071 * 072 * @param n the number of elements 073 * @exception IllegalArgumentException if n ≤ 0 074 * @deprecated as of 2.1 (class will become immutable in 3.0) 075 */ 076 @Deprecated 077 public void setNumberOfElements(final int n) { 078 setNumberOfElementsInternal(n); 079 } 080 /** 081 * Set the number of elements (e.g. corpus size) for the distribution. 082 * The parameter value must be positive; otherwise an 083 * <code>IllegalArgumentException</code> is thrown. 084 * 085 * @param n the number of elements 086 * @exception IllegalArgumentException if n ≤ 0 087 */ 088 private void setNumberOfElementsInternal(final int n) 089 throws IllegalArgumentException { 090 if (n <= 0) { 091 throw MathRuntimeException.createIllegalArgumentException( 092 LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0); 093 } 094 this.numberOfElements = n; 095 } 096 097 /** 098 * Get the exponent characterising the distribution. 099 * 100 * @return the exponent 101 */ 102 public double getExponent() { 103 return exponent; 104 } 105 106 /** 107 * Set the exponent characterising the distribution. 108 * The parameter value must be positive; otherwise an 109 * <code>IllegalArgumentException</code> is thrown. 110 * 111 * @param s the exponent 112 * @exception IllegalArgumentException if s ≤ 0.0 113 * @deprecated as of 2.1 (class will become immutable in 3.0) 114 */ 115 @Deprecated 116 public void setExponent(final double s) { 117 setExponentInternal(s); 118 } 119 120 /** 121 * Set the exponent characterising the distribution. 122 * The parameter value must be positive; otherwise an 123 * <code>IllegalArgumentException</code> is thrown. 124 * 125 * @param s the exponent 126 * @exception IllegalArgumentException if s ≤ 0.0 127 */ 128 private void setExponentInternal(final double s) 129 throws IllegalArgumentException { 130 if (s <= 0.0) { 131 throw MathRuntimeException.createIllegalArgumentException( 132 LocalizedFormats.NOT_POSITIVE_EXPONENT, 133 s); 134 } 135 this.exponent = s; 136 } 137 138 /** 139 * The probability mass function P(X = x) for a Zipf distribution. 140 * 141 * @param x the value at which the probability density function is evaluated. 142 * @return the value of the probability mass function at x 143 */ 144 public double probability(final int x) { 145 if (x <= 0 || x > numberOfElements) { 146 return 0.0; 147 } 148 149 return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); 150 151 } 152 153 /** 154 * The probability distribution function P(X <= x) for a Zipf distribution. 155 * 156 * @param x the value at which the PDF is evaluated. 157 * @return Zipf distribution function evaluated at x 158 */ 159 @Override 160 public double cumulativeProbability(final int x) { 161 if (x <= 0) { 162 return 0.0; 163 } else if (x >= numberOfElements) { 164 return 1.0; 165 } 166 167 return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); 168 169 } 170 171 /** 172 * Access the domain value lower bound, based on <code>p</code>, used to 173 * bracket a PDF root. 174 * 175 * @param p the desired probability for the critical value 176 * @return domain value lower bound, i.e. 177 * P(X < <i>lower bound</i>) < <code>p</code> 178 */ 179 @Override 180 protected int getDomainLowerBound(final double p) { 181 return 0; 182 } 183 184 /** 185 * Access the domain value upper bound, based on <code>p</code>, used to 186 * bracket a PDF root. 187 * 188 * @param p the desired probability for the critical value 189 * @return domain value upper bound, i.e. 190 * P(X < <i>upper bound</i>) > <code>p</code> 191 */ 192 @Override 193 protected int getDomainUpperBound(final double p) { 194 return numberOfElements; 195 } 196 197 198 /** 199 * Calculates the Nth generalized harmonic number. See 200 * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic 201 * Series</a>. 202 * 203 * @param n the term in the series to calculate (must be ≥ 1) 204 * @param m the exponent; special case m == 1.0 is the harmonic series 205 * @return the nth generalized harmonic number 206 */ 207 private double generalizedHarmonic(final int n, final double m) { 208 double value = 0; 209 for (int k = n; k > 0; --k) { 210 value += 1.0 / FastMath.pow(k, m); 211 } 212 return value; 213 } 214 215 /** 216 * Returns the lower bound of the support for the distribution. 217 * 218 * The lower bound of the support is always 1 no matter the parameters. 219 * 220 * @return lower bound of the support (always 1) 221 * @since 2.2 222 */ 223 public int getSupportLowerBound() { 224 return 1; 225 } 226 227 /** 228 * Returns the upper bound of the support for the distribution. 229 * 230 * The upper bound of the support is the number of elements 231 * 232 * @return upper bound of the support 233 * @since 2.2 234 */ 235 public int getSupportUpperBound() { 236 return getNumberOfElements(); 237 } 238 239 /** 240 * Returns the mean. 241 * 242 * For number of elements N and exponent s, the mean is 243 * <code>Hs1 / Hs</code> where 244 * <ul> 245 * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> 246 * <li><code>Hs = generalizedHarmonic(N, s)</code></li> 247 * </ul> 248 * 249 * @return the mean 250 * @since 2.2 251 */ 252 protected double getNumericalMean() { 253 final int N = getNumberOfElements(); 254 final double s = getExponent(); 255 256 final double Hs1 = generalizedHarmonic(N, s - 1); 257 final double Hs = generalizedHarmonic(N, s); 258 259 return Hs1 / Hs; 260 } 261 262 /** 263 * Returns the variance. 264 * 265 * For number of elements N and exponent s, the mean is 266 * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where 267 * <ul> 268 * <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li> 269 * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> 270 * <li><code>Hs = generalizedHarmonic(N, s)</code></li> 271 * </ul> 272 * 273 * @return the variance 274 * @since 2.2 275 */ 276 protected double getNumericalVariance() { 277 final int N = getNumberOfElements(); 278 final double s = getExponent(); 279 280 final double Hs2 = generalizedHarmonic(N, s - 2); 281 final double Hs1 = generalizedHarmonic(N, s - 1); 282 final double Hs = generalizedHarmonic(N, s); 283 284 return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); 285 } 286 }