001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.MathRuntimeException;
023    import org.apache.commons.math.exception.util.LocalizedFormats;
024    import org.apache.commons.math.util.FastMath;
025    
026    /**
027     * Implementation for the {@link ZipfDistribution}.
028     *
029     * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
030     */
031    public class ZipfDistributionImpl extends AbstractIntegerDistribution
032        implements ZipfDistribution, Serializable {
033    
034        /** Serializable version identifier. */
035        private static final long serialVersionUID = -140627372283420404L;
036    
037        /** Number of elements. */
038        private int numberOfElements;
039    
040        /** Exponent parameter of the distribution. */
041        private double exponent;
042    
043        /**
044         * Create a new Zipf distribution with the given number of elements and
045         * exponent. Both values must be positive; otherwise an
046         * <code>IllegalArgumentException</code> is thrown.
047         *
048         * @param numberOfElements the number of elements
049         * @param exponent the exponent
050         * @exception IllegalArgumentException if n &le; 0 or s &le; 0.0
051         */
052        public ZipfDistributionImpl(final int numberOfElements, final double exponent)
053            throws IllegalArgumentException {
054            setNumberOfElementsInternal(numberOfElements);
055            setExponentInternal(exponent);
056        }
057    
058        /**
059         * Get the number of elements (e.g. corpus size) for the distribution.
060         *
061         * @return the number of elements
062         */
063        public int getNumberOfElements() {
064            return numberOfElements;
065        }
066    
067        /**
068         * Set the number of elements (e.g. corpus size) for the distribution.
069         * The parameter value must be positive; otherwise an
070         * <code>IllegalArgumentException</code> is thrown.
071         *
072         * @param n the number of elements
073         * @exception IllegalArgumentException if n &le; 0
074         * @deprecated as of 2.1 (class will become immutable in 3.0)
075         */
076        @Deprecated
077        public void setNumberOfElements(final int n) {
078            setNumberOfElementsInternal(n);
079        }
080        /**
081         * Set the number of elements (e.g. corpus size) for the distribution.
082         * The parameter value must be positive; otherwise an
083         * <code>IllegalArgumentException</code> is thrown.
084         *
085         * @param n the number of elements
086         * @exception IllegalArgumentException if n &le; 0
087         */
088        private void setNumberOfElementsInternal(final int n)
089            throws IllegalArgumentException {
090            if (n <= 0) {
091                throw MathRuntimeException.createIllegalArgumentException(
092                        LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0);
093            }
094            this.numberOfElements = n;
095        }
096    
097        /**
098         * Get the exponent characterising the distribution.
099         *
100         * @return the exponent
101         */
102        public double getExponent() {
103            return exponent;
104        }
105    
106        /**
107         * Set the exponent characterising the distribution.
108         * The parameter value must be positive; otherwise an
109         * <code>IllegalArgumentException</code> is thrown.
110         *
111         * @param s the exponent
112         * @exception IllegalArgumentException if s &le; 0.0
113         * @deprecated as of 2.1 (class will become immutable in 3.0)
114         */
115        @Deprecated
116        public void setExponent(final double s) {
117            setExponentInternal(s);
118        }
119    
120        /**
121         * Set the exponent characterising the distribution.
122         * The parameter value must be positive; otherwise an
123         * <code>IllegalArgumentException</code> is thrown.
124         *
125         * @param s the exponent
126         * @exception IllegalArgumentException if s &le; 0.0
127         */
128        private void setExponentInternal(final double s)
129            throws IllegalArgumentException {
130            if (s <= 0.0) {
131                throw MathRuntimeException.createIllegalArgumentException(
132                        LocalizedFormats.NOT_POSITIVE_EXPONENT,
133                        s);
134            }
135            this.exponent = s;
136        }
137    
138        /**
139         * The probability mass function P(X = x) for a Zipf distribution.
140         *
141         * @param x the value at which the probability density function is evaluated.
142         * @return the value of the probability mass function at x
143         */
144        public double probability(final int x) {
145            if (x <= 0 || x > numberOfElements) {
146                return 0.0;
147            }
148    
149            return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
150    
151        }
152    
153        /**
154         * The probability distribution function P(X <= x) for a Zipf distribution.
155         *
156         * @param x the value at which the PDF is evaluated.
157         * @return Zipf distribution function evaluated at x
158         */
159        @Override
160        public double cumulativeProbability(final int x) {
161            if (x <= 0) {
162                return 0.0;
163            } else if (x >= numberOfElements) {
164                return 1.0;
165            }
166    
167            return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
168    
169        }
170    
171        /**
172         * Access the domain value lower bound, based on <code>p</code>, used to
173         * bracket a PDF root.
174         *
175         * @param p the desired probability for the critical value
176         * @return domain value lower bound, i.e.
177         *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code>
178         */
179        @Override
180        protected int getDomainLowerBound(final double p) {
181            return 0;
182        }
183    
184        /**
185         * Access the domain value upper bound, based on <code>p</code>, used to
186         * bracket a PDF root.
187         *
188         * @param p the desired probability for the critical value
189         * @return domain value upper bound, i.e.
190         *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code>
191         */
192        @Override
193        protected int getDomainUpperBound(final double p) {
194            return numberOfElements;
195        }
196    
197    
198        /**
199         * Calculates the Nth generalized harmonic number. See
200         * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
201         * Series</a>.
202         *
203         * @param n the term in the series to calculate (must be &ge; 1)
204         * @param m the exponent; special case m == 1.0 is the harmonic series
205         * @return the nth generalized harmonic number
206         */
207        private double generalizedHarmonic(final int n, final double m) {
208            double value = 0;
209            for (int k = n; k > 0; --k) {
210                value += 1.0 / FastMath.pow(k, m);
211            }
212            return value;
213        }
214    
215        /**
216         * Returns the lower bound of the support for the distribution.
217         *
218         * The lower bound of the support is always 1 no matter the parameters.
219         *
220         * @return lower bound of the support (always 1)
221         * @since 2.2
222         */
223        public int getSupportLowerBound() {
224            return 1;
225        }
226    
227        /**
228         * Returns the upper bound of the support for the distribution.
229         *
230         * The upper bound of the support is the number of elements
231         *
232         * @return upper bound of the support
233         * @since 2.2
234         */
235        public int getSupportUpperBound() {
236            return getNumberOfElements();
237        }
238    
239        /**
240         * Returns the mean.
241         *
242         * For number of elements N and exponent s, the mean is
243         * <code>Hs1 / Hs</code> where
244         * <ul>
245         *  <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
246         *  <li><code>Hs = generalizedHarmonic(N, s)</code></li>
247         * </ul>
248         *
249         * @return the mean
250         * @since 2.2
251         */
252        protected double getNumericalMean() {
253            final int N = getNumberOfElements();
254            final double s = getExponent();
255    
256            final double Hs1 = generalizedHarmonic(N, s - 1);
257            final double Hs = generalizedHarmonic(N, s);
258    
259            return Hs1 / Hs;
260        }
261    
262        /**
263         * Returns the variance.
264         *
265         * For number of elements N and exponent s, the mean is
266         * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where
267         * <ul>
268         *  <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li>
269         *  <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
270         *  <li><code>Hs = generalizedHarmonic(N, s)</code></li>
271         * </ul>
272         *
273         * @return the variance
274         * @since 2.2
275         */
276        protected double getNumericalVariance() {
277            final int N = getNumberOfElements();
278            final double s = getExponent();
279    
280            final double Hs2 = generalizedHarmonic(N, s - 2);
281            final double Hs1 = generalizedHarmonic(N, s - 1);
282            final double Hs = generalizedHarmonic(N, s);
283    
284            return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
285        }
286    }