001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math.distribution; 019 020 import java.io.Serializable; 021 022 import org.apache.commons.math.MathRuntimeException; 023 import org.apache.commons.math.util.MathUtils; 024 025 /** 026 * The default implementation of {@link HypergeometricDistribution}. 027 * 028 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $ 029 */ 030 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution 031 implements HypergeometricDistribution, Serializable 032 { 033 034 /** Serializable version identifier */ 035 private static final long serialVersionUID = -436928820673516179L; 036 037 /** The number of successes in the population. */ 038 private int numberOfSuccesses; 039 040 /** The population size. */ 041 private int populationSize; 042 043 /** The sample size. */ 044 private int sampleSize; 045 046 /** 047 * Construct a new hypergeometric distribution with the given the population 048 * size, the number of successes in the population, and the sample size. 049 * @param populationSize the population size. 050 * @param numberOfSuccesses number of successes in the population. 051 * @param sampleSize the sample size. 052 */ 053 public HypergeometricDistributionImpl(int populationSize, 054 int numberOfSuccesses, int sampleSize) { 055 super(); 056 if (numberOfSuccesses > populationSize) { 057 throw MathRuntimeException.createIllegalArgumentException( 058 "number of successes ({0}) must be less than or equal to population size ({1})", 059 numberOfSuccesses, populationSize); 060 } 061 if (sampleSize > populationSize) { 062 throw MathRuntimeException.createIllegalArgumentException( 063 "sample size ({0}) must be less than or equal to population size ({1})", 064 sampleSize, populationSize); 065 } 066 setPopulationSize(populationSize); 067 setSampleSize(sampleSize); 068 setNumberOfSuccesses(numberOfSuccesses); 069 } 070 071 /** 072 * For this distribution, X, this method returns P(X ≤ x). 073 * @param x the value at which the PDF is evaluated. 074 * @return PDF for this distribution. 075 */ 076 @Override 077 public double cumulativeProbability(int x) { 078 double ret; 079 080 int n = getPopulationSize(); 081 int m = getNumberOfSuccesses(); 082 int k = getSampleSize(); 083 084 int[] domain = getDomain(n, m, k); 085 if (x < domain[0]) { 086 ret = 0.0; 087 } else if(x >= domain[1]) { 088 ret = 1.0; 089 } else { 090 ret = innerCumulativeProbability(domain[0], x, 1, n, m, k); 091 } 092 093 return ret; 094 } 095 096 /** 097 * Return the domain for the given hypergeometric distribution parameters. 098 * @param n the population size. 099 * @param m number of successes in the population. 100 * @param k the sample size. 101 * @return a two element array containing the lower and upper bounds of the 102 * hypergeometric distribution. 103 */ 104 private int[] getDomain(int n, int m, int k){ 105 return new int[]{ 106 getLowerDomain(n, m, k), 107 getUpperDomain(m, k) 108 }; 109 } 110 111 /** 112 * Access the domain value lower bound, based on <code>p</code>, used to 113 * bracket a PDF root. 114 * 115 * @param p the desired probability for the critical value 116 * @return domain value lower bound, i.e. 117 * P(X < <i>lower bound</i>) < <code>p</code> 118 */ 119 @Override 120 protected int getDomainLowerBound(double p) { 121 return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(), 122 getSampleSize()); 123 } 124 125 /** 126 * Access the domain value upper bound, based on <code>p</code>, used to 127 * bracket a PDF root. 128 * 129 * @param p the desired probability for the critical value 130 * @return domain value upper bound, i.e. 131 * P(X < <i>upper bound</i>) > <code>p</code> 132 */ 133 @Override 134 protected int getDomainUpperBound(double p) { 135 return getUpperDomain(getSampleSize(), getNumberOfSuccesses()); 136 } 137 138 /** 139 * Return the lowest domain value for the given hypergeometric distribution 140 * parameters. 141 * @param n the population size. 142 * @param m number of successes in the population. 143 * @param k the sample size. 144 * @return the lowest domain value of the hypergeometric distribution. 145 */ 146 private int getLowerDomain(int n, int m, int k) { 147 return Math.max(0, m - (n - k)); 148 } 149 150 /** 151 * Access the number of successes. 152 * @return the number of successes. 153 */ 154 public int getNumberOfSuccesses() { 155 return numberOfSuccesses; 156 } 157 158 /** 159 * Access the population size. 160 * @return the population size. 161 */ 162 public int getPopulationSize() { 163 return populationSize; 164 } 165 166 /** 167 * Access the sample size. 168 * @return the sample size. 169 */ 170 public int getSampleSize() { 171 return sampleSize; 172 } 173 174 /** 175 * Return the highest domain value for the given hypergeometric distribution 176 * parameters. 177 * @param m number of successes in the population. 178 * @param k the sample size. 179 * @return the highest domain value of the hypergeometric distribution. 180 */ 181 private int getUpperDomain(int m, int k){ 182 return Math.min(k, m); 183 } 184 185 /** 186 * For this distribution, X, this method returns P(X = x). 187 * 188 * @param x the value at which the PMF is evaluated. 189 * @return PMF for this distribution. 190 */ 191 public double probability(int x) { 192 double ret; 193 194 int n = getPopulationSize(); 195 int m = getNumberOfSuccesses(); 196 int k = getSampleSize(); 197 198 int[] domain = getDomain(n, m, k); 199 if(x < domain[0] || x > domain[1]){ 200 ret = 0.0; 201 } else { 202 ret = probability(n, m, k, x); 203 } 204 205 return ret; 206 } 207 208 /** 209 * For the distribution, X, defined by the given hypergeometric distribution 210 * parameters, this method returns P(X = x). 211 * 212 * @param n the population size. 213 * @param m number of successes in the population. 214 * @param k the sample size. 215 * @param x the value at which the PMF is evaluated. 216 * @return PMF for the distribution. 217 */ 218 private double probability(int n, int m, int k, int x) { 219 return Math.exp(MathUtils.binomialCoefficientLog(m, x) + 220 MathUtils.binomialCoefficientLog(n - m, k - x) - 221 MathUtils.binomialCoefficientLog(n, k)); 222 } 223 224 /** 225 * Modify the number of successes. 226 * @param num the new number of successes. 227 * @throws IllegalArgumentException if <code>num</code> is negative. 228 */ 229 public void setNumberOfSuccesses(int num) { 230 if(num < 0){ 231 throw MathRuntimeException.createIllegalArgumentException( 232 "number of successes must be non-negative ({0})", 233 num); 234 } 235 numberOfSuccesses = num; 236 } 237 238 /** 239 * Modify the population size. 240 * @param size the new population size. 241 * @throws IllegalArgumentException if <code>size</code> is not positive. 242 */ 243 public void setPopulationSize(int size) { 244 if(size <= 0){ 245 throw MathRuntimeException.createIllegalArgumentException( 246 "population size must be positive ({0})", 247 size); 248 } 249 populationSize = size; 250 } 251 252 /** 253 * Modify the sample size. 254 * @param size the new sample size. 255 * @throws IllegalArgumentException if <code>size</code> is negative. 256 */ 257 public void setSampleSize(int size) { 258 if (size < 0) { 259 throw MathRuntimeException.createIllegalArgumentException( 260 "sample size must be positive ({0})", 261 size); 262 } 263 sampleSize = size; 264 } 265 266 /** 267 * For this distribution, X, this method returns P(X ≥ x). 268 * @param x the value at which the CDF is evaluated. 269 * @return upper tail CDF for this distribution. 270 * @since 1.1 271 */ 272 public double upperCumulativeProbability(int x) { 273 double ret; 274 275 int n = getPopulationSize(); 276 int m = getNumberOfSuccesses(); 277 int k = getSampleSize(); 278 279 int[] domain = getDomain(n, m, k); 280 if (x < domain[0]) { 281 ret = 1.0; 282 } else if(x > domain[1]) { 283 ret = 0.0; 284 } else { 285 ret = innerCumulativeProbability(domain[1], x, -1, n, m, k); 286 } 287 288 return ret; 289 } 290 291 /** 292 * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This 293 * probability is computed by summing the point probabilities for the values 294 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 295 * @param x0 the inclusive, lower bound 296 * @param x1 the inclusive, upper bound 297 * @param dx the direction of summation. 1 indicates summing from x0 to x1. 298 * 0 indicates summing from x1 to x0. 299 * @param n the population size. 300 * @param m number of successes in the population. 301 * @param k the sample size. 302 * @return P(x0 ≤ X ≤ x1). 303 */ 304 private double innerCumulativeProbability( 305 int x0, int x1, int dx, int n, int m, int k) 306 { 307 double ret = probability(n, m, k, x0); 308 while (x0 != x1) { 309 x0 += dx; 310 ret += probability(n, m, k, x0); 311 } 312 return ret; 313 } 314 }