001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.stat.correlation;
019    
020    import org.apache.commons.math.MathRuntimeException;
021    import org.apache.commons.math.exception.util.LocalizedFormats;
022    import org.apache.commons.math.linear.BlockRealMatrix;
023    import org.apache.commons.math.linear.RealMatrix;
024    import org.apache.commons.math.stat.ranking.NaturalRanking;
025    import org.apache.commons.math.stat.ranking.RankingAlgorithm;
026    
027    /**
028     * <p>Spearman's rank correlation. This implementation performs a rank
029     * transformation on the input data and then computes {@link PearsonsCorrelation}
030     * on the ranked data.</p>
031     *
032     * <p>By default, ranks are computed using {@link NaturalRanking} with default
033     * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
034     * The ranking algorithm can be set using a constructor argument.</p>
035     *
036     * @since 2.0
037     * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 ao??t 2010) $
038     */
039    
040    public class SpearmansCorrelation {
041    
042        /** Input data */
043        private final RealMatrix data;
044    
045        /** Ranking algorithm  */
046        private final RankingAlgorithm rankingAlgorithm;
047    
048        /** Rank correlation */
049        private final PearsonsCorrelation rankCorrelation;
050    
051        /**
052         * Create a SpearmansCorrelation with the given input data matrix
053         * and ranking algorithm.
054         *
055         * @param dataMatrix matrix of data with columns representing
056         * variables to correlate
057         * @param rankingAlgorithm ranking algorithm
058         */
059        public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
060            this.data = dataMatrix.copy();
061            this.rankingAlgorithm = rankingAlgorithm;
062            rankTransform(data);
063            rankCorrelation = new PearsonsCorrelation(data);
064        }
065    
066        /**
067         * Create a SpearmansCorrelation from the given data matrix.
068         *
069         * @param dataMatrix matrix of data with columns representing
070         * variables to correlate
071         */
072        public SpearmansCorrelation(final RealMatrix dataMatrix) {
073            this(dataMatrix, new NaturalRanking());
074        }
075    
076        /**
077         * Create a SpearmansCorrelation without data.
078         */
079        public SpearmansCorrelation() {
080            data = null;
081            this.rankingAlgorithm = new NaturalRanking();
082            rankCorrelation = null;
083        }
084    
085        /**
086         * Calculate the Spearman Rank Correlation Matrix.
087         *
088         * @return Spearman Rank Correlation Matrix
089         */
090        public RealMatrix getCorrelationMatrix() {
091            return rankCorrelation.getCorrelationMatrix();
092        }
093    
094        /**
095         * Returns a {@link PearsonsCorrelation} instance constructed from the
096         * ranked input data. That is,
097         * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
098         * is equivalent to
099         * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
100         * <code>rankTransform(matrix)</code> is the result of applying the
101         * configured <code>RankingAlgorithm</code> to each of the columns of
102         * <code>matrix.</code>
103         *
104         * @return PearsonsCorrelation among ranked column data
105         */
106        public PearsonsCorrelation getRankCorrelation() {
107            return rankCorrelation;
108        }
109    
110        /**
111         * Computes the Spearman's rank correlation matrix for the columns of the
112         * input matrix.
113         *
114         * @param matrix matrix with columns representing variables to correlate
115         * @return correlation matrix
116         */
117        public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
118            RealMatrix matrixCopy = matrix.copy();
119            rankTransform(matrixCopy);
120            return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
121        }
122    
123        /**
124         * Computes the Spearman's rank correlation matrix for the columns of the
125         * input rectangular array.  The columns of the array represent values
126         * of variables to be correlated.
127         *
128         * @param matrix matrix with columns representing variables to correlate
129         * @return correlation matrix
130         */
131        public RealMatrix computeCorrelationMatrix(double[][] matrix) {
132           return computeCorrelationMatrix(new BlockRealMatrix(matrix));
133        }
134    
135        /**
136         * Computes the Spearman's rank correlation coefficient between the two arrays.
137         *
138         * </p>Throws IllegalArgumentException if the arrays do not have the same length
139         * or their common length is less than 2</p>
140         *
141         * @param xArray first data array
142         * @param yArray second data array
143         * @return Returns Spearman's rank correlation coefficient for the two arrays
144         * @throws  IllegalArgumentException if the arrays lengths do not match or
145         * there is insufficient data
146         */
147        public double correlation(final double[] xArray, final double[] yArray)
148        throws IllegalArgumentException {
149            if (xArray.length != yArray.length) {
150                throw MathRuntimeException.createIllegalArgumentException(
151                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, xArray.length, yArray.length);
152            } else if (xArray.length < 2) {
153                throw MathRuntimeException.createIllegalArgumentException(
154                      LocalizedFormats.INSUFFICIENT_DIMENSION, xArray.length, 2);
155            } else {
156                return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
157                        rankingAlgorithm.rank(yArray));
158            }
159        }
160    
161        /**
162         * Applies rank transform to each of the columns of <code>matrix</code>
163         * using the current <code>rankingAlgorithm</code>
164         *
165         * @param matrix matrix to transform
166         */
167        private void rankTransform(RealMatrix matrix) {
168            for (int i = 0; i < matrix.getColumnDimension(); i++) {
169                matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
170            }
171        }
172    }