001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive;
018    
019    import java.io.Serializable;
020    
021    import org.apache.commons.math.MathRuntimeException;
022    import org.apache.commons.math.exception.util.LocalizedFormats;
023    import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
024    import org.apache.commons.math.stat.descriptive.moment.Mean;
025    import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
026    import org.apache.commons.math.stat.descriptive.moment.Variance;
027    import org.apache.commons.math.stat.descriptive.rank.Max;
028    import org.apache.commons.math.stat.descriptive.rank.Min;
029    import org.apache.commons.math.stat.descriptive.summary.Sum;
030    import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
031    import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
032    import org.apache.commons.math.util.MathUtils;
033    import org.apache.commons.math.util.FastMath;
034    
035    /**
036     * <p>
037     * Computes summary statistics for a stream of data values added using the
038     * {@link #addValue(double) addValue} method. The data values are not stored in
039     * memory, so this class can be used to compute statistics for very large data
040     * streams.
041     * </p>
042     * <p>
043     * The {@link StorelessUnivariateStatistic} instances used to maintain summary
044     * state and compute statistics are configurable via setters. For example, the
045     * default implementation for the variance can be overridden by calling
046     * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
047     * these methods must implement the {@link StorelessUnivariateStatistic}
048     * interface and configuration must be completed before <code>addValue</code>
049     * is called. No configuration is necessary to use the default, commons-math
050     * provided implementations.
051     * </p>
052     * <p>
053     * Note: This class is not thread-safe. Use
054     * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
055     * threads is required.
056     * </p>
057     * @version $Revision: 1042376 $ $Date: 2010-12-05 16:54:55 +0100 (dim. 05 d??c. 2010) $
058     */
059    public class SummaryStatistics implements StatisticalSummary, Serializable {
060    
061        /** Serialization UID */
062        private static final long serialVersionUID = -2021321786743555871L;
063    
064        /** count of values that have been added */
065        protected long n = 0;
066    
067        /** SecondMoment is used to compute the mean and variance */
068        protected SecondMoment secondMoment = new SecondMoment();
069    
070        /** sum of values that have been added */
071        protected Sum sum = new Sum();
072    
073        /** sum of the square of each value that has been added */
074        protected SumOfSquares sumsq = new SumOfSquares();
075    
076        /** min of values that have been added */
077        protected Min min = new Min();
078    
079        /** max of values that have been added */
080        protected Max max = new Max();
081    
082        /** sumLog of values that have been added */
083        protected SumOfLogs sumLog = new SumOfLogs();
084    
085        /** geoMean of values that have been added */
086        protected GeometricMean geoMean = new GeometricMean(sumLog);
087    
088        /** mean of values that have been added */
089        protected Mean mean = new Mean();
090    
091        /** variance of values that have been added */
092        protected Variance variance = new Variance();
093    
094        /** Sum statistic implementation - can be reset by setter. */
095        private StorelessUnivariateStatistic sumImpl = sum;
096    
097        /** Sum of squares statistic implementation - can be reset by setter. */
098        private StorelessUnivariateStatistic sumsqImpl = sumsq;
099    
100        /** Minimum statistic implementation - can be reset by setter. */
101        private StorelessUnivariateStatistic minImpl = min;
102    
103        /** Maximum statistic implementation - can be reset by setter. */
104        private StorelessUnivariateStatistic maxImpl = max;
105    
106        /** Sum of log statistic implementation - can be reset by setter. */
107        private StorelessUnivariateStatistic sumLogImpl = sumLog;
108    
109        /** Geometric mean statistic implementation - can be reset by setter. */
110        private StorelessUnivariateStatistic geoMeanImpl = geoMean;
111    
112        /** Mean statistic implementation - can be reset by setter. */
113        private StorelessUnivariateStatistic meanImpl = mean;
114    
115        /** Variance statistic implementation - can be reset by setter. */
116        private StorelessUnivariateStatistic varianceImpl = variance;
117    
118        /**
119         * Construct a SummaryStatistics instance
120         */
121        public SummaryStatistics() {
122        }
123    
124        /**
125         * A copy constructor. Creates a deep-copy of the {@code original}.
126         *
127         * @param original the {@code SummaryStatistics} instance to copy
128         */
129        public SummaryStatistics(SummaryStatistics original) {
130            copy(original, this);
131        }
132    
133        /**
134         * Return a {@link StatisticalSummaryValues} instance reporting current
135         * statistics.
136         * @return Current values of statistics
137         */
138        public StatisticalSummary getSummary() {
139            return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
140                    getMax(), getMin(), getSum());
141        }
142    
143        /**
144         * Add a value to the data
145         * @param value the value to add
146         */
147        public void addValue(double value) {
148            sumImpl.increment(value);
149            sumsqImpl.increment(value);
150            minImpl.increment(value);
151            maxImpl.increment(value);
152            sumLogImpl.increment(value);
153            secondMoment.increment(value);
154            // If mean, variance or geomean have been overridden,
155            // need to increment these
156            if (!(meanImpl instanceof Mean)) {
157                meanImpl.increment(value);
158            }
159            if (!(varianceImpl instanceof Variance)) {
160                varianceImpl.increment(value);
161            }
162            if (!(geoMeanImpl instanceof GeometricMean)) {
163                geoMeanImpl.increment(value);
164            }
165            n++;
166        }
167    
168        /**
169         * Returns the number of available values
170         * @return The number of available values
171         */
172        public long getN() {
173            return n;
174        }
175    
176        /**
177         * Returns the sum of the values that have been added
178         * @return The sum or <code>Double.NaN</code> if no values have been added
179         */
180        public double getSum() {
181            return sumImpl.getResult();
182        }
183    
184        /**
185         * Returns the sum of the squares of the values that have been added.
186         * <p>
187         * Double.NaN is returned if no values have been added.
188         * </p>
189         * @return The sum of squares
190         */
191        public double getSumsq() {
192            return sumsqImpl.getResult();
193        }
194    
195        /**
196         * Returns the mean of the values that have been added.
197         * <p>
198         * Double.NaN is returned if no values have been added.
199         * </p>
200         * @return the mean
201         */
202        public double getMean() {
203            if (mean == meanImpl) {
204                return new Mean(secondMoment).getResult();
205            } else {
206                return meanImpl.getResult();
207            }
208        }
209    
210        /**
211         * Returns the standard deviation of the values that have been added.
212         * <p>
213         * Double.NaN is returned if no values have been added.
214         * </p>
215         * @return the standard deviation
216         */
217        public double getStandardDeviation() {
218            double stdDev = Double.NaN;
219            if (getN() > 0) {
220                if (getN() > 1) {
221                    stdDev = FastMath.sqrt(getVariance());
222                } else {
223                    stdDev = 0.0;
224                }
225            }
226            return stdDev;
227        }
228    
229        /**
230         * Returns the variance of the values that have been added.
231         * <p>
232         * Double.NaN is returned if no values have been added.
233         * </p>
234         * @return the variance
235         */
236        public double getVariance() {
237            if (varianceImpl == variance) {
238                return new Variance(secondMoment).getResult();
239            } else {
240                return varianceImpl.getResult();
241            }
242        }
243    
244        /**
245         * Returns the maximum of the values that have been added.
246         * <p>
247         * Double.NaN is returned if no values have been added.
248         * </p>
249         * @return the maximum
250         */
251        public double getMax() {
252            return maxImpl.getResult();
253        }
254    
255        /**
256         * Returns the minimum of the values that have been added.
257         * <p>
258         * Double.NaN is returned if no values have been added.
259         * </p>
260         * @return the minimum
261         */
262        public double getMin() {
263            return minImpl.getResult();
264        }
265    
266        /**
267         * Returns the geometric mean of the values that have been added.
268         * <p>
269         * Double.NaN is returned if no values have been added.
270         * </p>
271         * @return the geometric mean
272         */
273        public double getGeometricMean() {
274            return geoMeanImpl.getResult();
275        }
276    
277        /**
278         * Returns the sum of the logs of the values that have been added.
279         * <p>
280         * Double.NaN is returned if no values have been added.
281         * </p>
282         * @return the sum of logs
283         * @since 1.2
284         */
285        public double getSumOfLogs() {
286            return sumLogImpl.getResult();
287        }
288    
289        /**
290         * Returns a statistic related to the Second Central Moment.  Specifically,
291         * what is returned is the sum of squared deviations from the sample mean
292         * among the values that have been added.
293         * <p>
294         * Returns <code>Double.NaN</code> if no data values have been added and
295         * returns <code>0</code> if there is just one value in the data set.</p>
296         * <p>
297         * @return second central moment statistic
298         * @since 2.0
299         */
300        public double getSecondMoment() {
301            return secondMoment.getResult();
302        }
303    
304        /**
305         * Generates a text report displaying summary statistics from values that
306         * have been added.
307         * @return String with line feeds displaying statistics
308         * @since 1.2
309         */
310        @Override
311        public String toString() {
312            StringBuilder outBuffer = new StringBuilder();
313            String endl = "\n";
314            outBuffer.append("SummaryStatistics:").append(endl);
315            outBuffer.append("n: ").append(getN()).append(endl);
316            outBuffer.append("min: ").append(getMin()).append(endl);
317            outBuffer.append("max: ").append(getMax()).append(endl);
318            outBuffer.append("mean: ").append(getMean()).append(endl);
319            outBuffer.append("geometric mean: ").append(getGeometricMean())
320                .append(endl);
321            outBuffer.append("variance: ").append(getVariance()).append(endl);
322            outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
323            outBuffer.append("standard deviation: ").append(getStandardDeviation())
324                .append(endl);
325            outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
326            return outBuffer.toString();
327        }
328    
329        /**
330         * Resets all statistics and storage
331         */
332        public void clear() {
333            this.n = 0;
334            minImpl.clear();
335            maxImpl.clear();
336            sumImpl.clear();
337            sumLogImpl.clear();
338            sumsqImpl.clear();
339            geoMeanImpl.clear();
340            secondMoment.clear();
341            if (meanImpl != mean) {
342                meanImpl.clear();
343            }
344            if (varianceImpl != variance) {
345                varianceImpl.clear();
346            }
347        }
348    
349        /**
350         * Returns true iff <code>object</code> is a
351         * <code>SummaryStatistics</code> instance and all statistics have the
352         * same values as this.
353         * @param object the object to test equality against.
354         * @return true if object equals this
355         */
356        @Override
357        public boolean equals(Object object) {
358            if (object == this) {
359                return true;
360            }
361            if (object instanceof SummaryStatistics == false) {
362                return false;
363            }
364            SummaryStatistics stat = (SummaryStatistics)object;
365            return MathUtils.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
366                   MathUtils.equalsIncludingNaN(stat.getMax(),           getMax())           &&
367                   MathUtils.equalsIncludingNaN(stat.getMean(),          getMean())          &&
368                   MathUtils.equalsIncludingNaN(stat.getMin(),           getMin())           &&
369                   MathUtils.equalsIncludingNaN(stat.getN(),             getN())             &&
370                   MathUtils.equalsIncludingNaN(stat.getSum(),           getSum())           &&
371                   MathUtils.equalsIncludingNaN(stat.getSumsq(),         getSumsq())         &&
372                   MathUtils.equalsIncludingNaN(stat.getVariance(),      getVariance());
373        }
374    
375        /**
376         * Returns hash code based on values of statistics
377         * @return hash code
378         */
379        @Override
380        public int hashCode() {
381            int result = 31 + MathUtils.hash(getGeometricMean());
382            result = result * 31 + MathUtils.hash(getGeometricMean());
383            result = result * 31 + MathUtils.hash(getMax());
384            result = result * 31 + MathUtils.hash(getMean());
385            result = result * 31 + MathUtils.hash(getMin());
386            result = result * 31 + MathUtils.hash(getN());
387            result = result * 31 + MathUtils.hash(getSum());
388            result = result * 31 + MathUtils.hash(getSumsq());
389            result = result * 31 + MathUtils.hash(getVariance());
390            return result;
391        }
392    
393        // Getters and setters for statistics implementations
394        /**
395         * Returns the currently configured Sum implementation
396         * @return the StorelessUnivariateStatistic implementing the sum
397         * @since 1.2
398         */
399        public StorelessUnivariateStatistic getSumImpl() {
400            return sumImpl;
401        }
402    
403        /**
404         * <p>
405         * Sets the implementation for the Sum.
406         * </p>
407         * <p>
408         * This method must be activated before any data has been added - i.e.,
409         * before {@link #addValue(double) addValue} has been used to add data;
410         * otherwise an IllegalStateException will be thrown.
411         * </p>
412         * @param sumImpl the StorelessUnivariateStatistic instance to use for
413         *        computing the Sum
414         * @throws IllegalStateException if data has already been added (i.e if n >
415         *         0)
416         * @since 1.2
417         */
418        public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
419            checkEmpty();
420            this.sumImpl = sumImpl;
421        }
422    
423        /**
424         * Returns the currently configured sum of squares implementation
425         * @return the StorelessUnivariateStatistic implementing the sum of squares
426         * @since 1.2
427         */
428        public StorelessUnivariateStatistic getSumsqImpl() {
429            return sumsqImpl;
430        }
431    
432        /**
433         * <p>
434         * Sets the implementation for the sum of squares.
435         * </p>
436         * <p>
437         * This method must be activated before any data has been added - i.e.,
438         * before {@link #addValue(double) addValue} has been used to add data;
439         * otherwise an IllegalStateException will be thrown.
440         * </p>
441         * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
442         *        computing the sum of squares
443         * @throws IllegalStateException if data has already been added (i.e if n >
444         *         0)
445         * @since 1.2
446         */
447        public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
448            checkEmpty();
449            this.sumsqImpl = sumsqImpl;
450        }
451    
452        /**
453         * Returns the currently configured minimum implementation
454         * @return the StorelessUnivariateStatistic implementing the minimum
455         * @since 1.2
456         */
457        public StorelessUnivariateStatistic getMinImpl() {
458            return minImpl;
459        }
460    
461        /**
462         * <p>
463         * Sets the implementation for the minimum.
464         * </p>
465         * <p>
466         * This method must be activated before any data has been added - i.e.,
467         * before {@link #addValue(double) addValue} has been used to add data;
468         * otherwise an IllegalStateException will be thrown.
469         * </p>
470         * @param minImpl the StorelessUnivariateStatistic instance to use for
471         *        computing the minimum
472         * @throws IllegalStateException if data has already been added (i.e if n >
473         *         0)
474         * @since 1.2
475         */
476        public void setMinImpl(StorelessUnivariateStatistic minImpl) {
477            checkEmpty();
478            this.minImpl = minImpl;
479        }
480    
481        /**
482         * Returns the currently configured maximum implementation
483         * @return the StorelessUnivariateStatistic implementing the maximum
484         * @since 1.2
485         */
486        public StorelessUnivariateStatistic getMaxImpl() {
487            return maxImpl;
488        }
489    
490        /**
491         * <p>
492         * Sets the implementation for the maximum.
493         * </p>
494         * <p>
495         * This method must be activated before any data has been added - i.e.,
496         * before {@link #addValue(double) addValue} has been used to add data;
497         * otherwise an IllegalStateException will be thrown.
498         * </p>
499         * @param maxImpl the StorelessUnivariateStatistic instance to use for
500         *        computing the maximum
501         * @throws IllegalStateException if data has already been added (i.e if n >
502         *         0)
503         * @since 1.2
504         */
505        public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
506            checkEmpty();
507            this.maxImpl = maxImpl;
508        }
509    
510        /**
511         * Returns the currently configured sum of logs implementation
512         * @return the StorelessUnivariateStatistic implementing the log sum
513         * @since 1.2
514         */
515        public StorelessUnivariateStatistic getSumLogImpl() {
516            return sumLogImpl;
517        }
518    
519        /**
520         * <p>
521         * Sets the implementation for the sum of logs.
522         * </p>
523         * <p>
524         * This method must be activated before any data has been added - i.e.,
525         * before {@link #addValue(double) addValue} has been used to add data;
526         * otherwise an IllegalStateException will be thrown.
527         * </p>
528         * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
529         *        computing the log sum
530         * @throws IllegalStateException if data has already been added (i.e if n >
531         *         0)
532         * @since 1.2
533         */
534        public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
535            checkEmpty();
536            this.sumLogImpl = sumLogImpl;
537            geoMean.setSumLogImpl(sumLogImpl);
538        }
539    
540        /**
541         * Returns the currently configured geometric mean implementation
542         * @return the StorelessUnivariateStatistic implementing the geometric mean
543         * @since 1.2
544         */
545        public StorelessUnivariateStatistic getGeoMeanImpl() {
546            return geoMeanImpl;
547        }
548    
549        /**
550         * <p>
551         * Sets the implementation for the geometric mean.
552         * </p>
553         * <p>
554         * This method must be activated before any data has been added - i.e.,
555         * before {@link #addValue(double) addValue} has been used to add data;
556         * otherwise an IllegalStateException will be thrown.
557         * </p>
558         * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
559         *        computing the geometric mean
560         * @throws IllegalStateException if data has already been added (i.e if n >
561         *         0)
562         * @since 1.2
563         */
564        public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
565            checkEmpty();
566            this.geoMeanImpl = geoMeanImpl;
567        }
568    
569        /**
570         * Returns the currently configured mean implementation
571         * @return the StorelessUnivariateStatistic implementing the mean
572         * @since 1.2
573         */
574        public StorelessUnivariateStatistic getMeanImpl() {
575            return meanImpl;
576        }
577    
578        /**
579         * <p>
580         * Sets the implementation for the mean.
581         * </p>
582         * <p>
583         * This method must be activated before any data has been added - i.e.,
584         * before {@link #addValue(double) addValue} has been used to add data;
585         * otherwise an IllegalStateException will be thrown.
586         * </p>
587         * @param meanImpl the StorelessUnivariateStatistic instance to use for
588         *        computing the mean
589         * @throws IllegalStateException if data has already been added (i.e if n >
590         *         0)
591         * @since 1.2
592         */
593        public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
594            checkEmpty();
595            this.meanImpl = meanImpl;
596        }
597    
598        /**
599         * Returns the currently configured variance implementation
600         * @return the StorelessUnivariateStatistic implementing the variance
601         * @since 1.2
602         */
603        public StorelessUnivariateStatistic getVarianceImpl() {
604            return varianceImpl;
605        }
606    
607        /**
608         * <p>
609         * Sets the implementation for the variance.
610         * </p>
611         * <p>
612         * This method must be activated before any data has been added - i.e.,
613         * before {@link #addValue(double) addValue} has been used to add data;
614         * otherwise an IllegalStateException will be thrown.
615         * </p>
616         * @param varianceImpl the StorelessUnivariateStatistic instance to use for
617         *        computing the variance
618         * @throws IllegalStateException if data has already been added (i.e if n >
619         *         0)
620         * @since 1.2
621         */
622        public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
623            checkEmpty();
624            this.varianceImpl = varianceImpl;
625        }
626    
627        /**
628         * Throws IllegalStateException if n > 0.
629         */
630        private void checkEmpty() {
631            if (n > 0) {
632                throw MathRuntimeException.createIllegalStateException(
633                        LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
634                        n);
635            }
636        }
637    
638        /**
639         * Returns a copy of this SummaryStatistics instance with the same internal state.
640         *
641         * @return a copy of this
642         */
643        public SummaryStatistics copy() {
644            SummaryStatistics result = new SummaryStatistics();
645            copy(this, result);
646            return result;
647        }
648    
649        /**
650         * Copies source to dest.
651         * <p>Neither source nor dest can be null.</p>
652         *
653         * @param source SummaryStatistics to copy
654         * @param dest SummaryStatistics to copy to
655         * @throws NullPointerException if either source or dest is null
656         */
657        public static void copy(SummaryStatistics source, SummaryStatistics dest) {
658            dest.maxImpl = source.maxImpl.copy();
659            dest.meanImpl = source.meanImpl.copy();
660            dest.minImpl = source.minImpl.copy();
661            dest.sumImpl = source.sumImpl.copy();
662            dest.varianceImpl = source.varianceImpl.copy();
663            dest.sumLogImpl = source.sumLogImpl.copy();
664            dest.sumsqImpl = source.sumsqImpl.copy();
665            if (source.getGeoMeanImpl() instanceof GeometricMean) {
666                // Keep geoMeanImpl, sumLogImpl in synch
667                dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
668            } else {
669                dest.geoMeanImpl = source.geoMeanImpl.copy();
670            }
671            SecondMoment.copy(source.secondMoment, dest.secondMoment);
672            dest.n = source.n;
673    
674            // Make sure that if stat == statImpl in source, same
675            // holds in dest; otherwise copy stat
676            if (source.geoMean == source.geoMeanImpl) {
677                dest.geoMean = (GeometricMean) dest.geoMeanImpl;
678            } else {
679                GeometricMean.copy(source.geoMean, dest.geoMean);
680            }
681            if (source.max == source.maxImpl) {
682                dest.max = (Max) dest.maxImpl;
683            } else {
684                Max.copy(source.max, dest.max);
685            }
686            if (source.mean == source.meanImpl) {
687                dest.mean = (Mean) dest.meanImpl;
688            } else {
689                Mean.copy(source.mean, dest.mean);
690            }
691            if (source.min == source.minImpl) {
692                dest.min = (Min) dest.minImpl;
693            } else {
694                Min.copy(source.min, dest.min);
695            }
696            if (source.sum == source.sumImpl) {
697                dest.sum = (Sum) dest.sumImpl;
698            } else {
699                Sum.copy(source.sum, dest.sum);
700            }
701            if (source.variance == source.varianceImpl) {
702                dest.variance = (Variance) dest.varianceImpl;
703            } else {
704                Variance.copy(source.variance, dest.variance);
705            }
706            if (source.sumLog == source.sumLogImpl) {
707                dest.sumLog = (SumOfLogs) dest.sumLogImpl;
708            } else {
709                SumOfLogs.copy(source.sumLog, dest.sumLog);
710            }
711            if (source.sumsq == source.sumsqImpl) {
712                dest.sumsq = (SumOfSquares) dest.sumsqImpl;
713            } else {
714                SumOfSquares.copy(source.sumsq, dest.sumsq);
715            }
716        }
717    }