001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.correlation; 018 019 import org.apache.commons.math.MathRuntimeException; 020 import org.apache.commons.math.linear.RealMatrix; 021 import org.apache.commons.math.linear.BlockRealMatrix; 022 import org.apache.commons.math.stat.descriptive.moment.Mean; 023 import org.apache.commons.math.stat.descriptive.moment.Variance; 024 025 /** 026 * Computes covariances for pairs of arrays or columns of a matrix. 027 * 028 * <p>The constructors that take <code>RealMatrix</code> or 029 * <code>double[][]</code> arguments generate covariance matrices. The 030 * columns of the input matrices are assumed to represent variable values.</p> 031 * 032 * <p>The constructor argument <code>biasCorrected</code> determines whether or 033 * not computed covariances are bias-corrected.</p> 034 * 035 * <p>Unbiased covariances are given by the formula</p> 036 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code> 037 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code> 038 * is the mean of the <code>Y</code> values. 039 * 040 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code> 041 * 042 * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $ 043 * @since 2.0 044 */ 045 public class Covariance { 046 047 /** covariance matrix */ 048 private final RealMatrix covarianceMatrix; 049 050 /** 051 * Create an empty covariance matrix. 052 */ 053 /** Number of observations (length of covariate vectors) */ 054 private final int n; 055 056 /** 057 * Create a Covariance with no data 058 */ 059 public Covariance() { 060 super(); 061 covarianceMatrix = null; 062 n = 0; 063 } 064 065 /** 066 * Create a Covariance matrix from a rectangular array 067 * whose columns represent covariates. 068 * 069 * <p>The <code>biasCorrected</code> parameter determines whether or not 070 * covariance estimates are bias-corrected.</p> 071 * 072 * <p>The input array must be rectangular with at least two columns 073 * and two rows.</p> 074 * 075 * @param data rectangular array with columns representing covariates 076 * @param biasCorrected true means covariances are bias-corrected 077 * @throws IllegalArgumentException if the input data array is not 078 * rectangular with at least two rows and two columns. 079 */ 080 public Covariance(double[][] data, boolean biasCorrected) { 081 this(new BlockRealMatrix(data), biasCorrected); 082 } 083 084 /** 085 * Create a Covariance matrix from a rectangular array 086 * whose columns represent covariates. 087 * 088 * <p>The input array must be rectangular with at least two columns 089 * and two rows</p> 090 * 091 * @param data rectangular array with columns representing covariates 092 * @throws IllegalArgumentException if the input data array is not 093 * rectangular with at least two rows and two columns. 094 */ 095 public Covariance(double[][] data) { 096 this(data, true); 097 } 098 099 /** 100 * Create a covariance matrix from a matrix whose columns 101 * represent covariates. 102 * 103 * <p>The <code>biasCorrected</code> parameter determines whether or not 104 * covariance estimates are bias-corrected.</p> 105 * 106 * <p>The matrix must have at least two columns and two rows</p> 107 * 108 * @param matrix matrix with columns representing covariates 109 * @param biasCorrected true means covariances are bias-corrected 110 * @throws IllegalArgumentException if the input matrix does not have 111 * at least two rows and two columns 112 */ 113 public Covariance(RealMatrix matrix, boolean biasCorrected) { 114 checkSufficientData(matrix); 115 n = matrix.getRowDimension(); 116 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected); 117 } 118 119 /** 120 * Create a covariance matrix from a matrix whose columns 121 * represent covariates. 122 * 123 * <p>The matrix must have at least two columns and two rows</p> 124 * 125 * @param matrix matrix with columns representing covariates 126 * @throws IllegalArgumentException if the input matrix does not have 127 * at least two rows and two columns 128 */ 129 public Covariance(RealMatrix matrix) { 130 this(matrix, true); 131 } 132 133 /** 134 * Returns the covariance matrix 135 * 136 * @return covariance matrix 137 */ 138 public RealMatrix getCovarianceMatrix() { 139 return covarianceMatrix; 140 } 141 142 /** 143 * Returns the number of observations (length of covariate vectors) 144 * 145 * @return number of observations 146 */ 147 148 public int getN() { 149 return n; 150 } 151 152 /** 153 * Compute a covariance matrix from a matrix whose columns represent 154 * covariates. 155 * @param matrix input matrix (must have at least two columns and two rows) 156 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 157 * @return covariance matrix 158 */ 159 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) { 160 int dimension = matrix.getColumnDimension(); 161 Variance variance = new Variance(biasCorrected); 162 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); 163 for (int i = 0; i < dimension; i++) { 164 for (int j = 0; j < i; j++) { 165 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); 166 outMatrix.setEntry(i, j, cov); 167 outMatrix.setEntry(j, i, cov); 168 } 169 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); 170 } 171 return outMatrix; 172 } 173 174 /** 175 * Create a covariance matrix from a matrix whose columns represent 176 * covariates. Covariances are computed using the bias-corrected formula. 177 * @param matrix input matrix (must have at least two columns and two rows) 178 * @return covariance matrix 179 * @see #Covariance 180 */ 181 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) { 182 return computeCovarianceMatrix(matrix, true); 183 } 184 185 /** 186 * Compute a covariance matrix from a rectangular array whose columns represent 187 * covariates. 188 * @param data input array (must have at least two columns and two rows) 189 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 190 * @return covariance matrix 191 */ 192 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) { 193 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected); 194 } 195 196 /** 197 * Create a covariance matrix from a rectangual array whose columns represent 198 * covariates. Covariances are computed using the bias-corrected formula. 199 * @param data input array (must have at least two columns and two rows) 200 * @return covariance matrix 201 * @see #Covariance 202 */ 203 protected RealMatrix computeCovarianceMatrix(double[][] data) { 204 return computeCovarianceMatrix(data, true); 205 } 206 207 /** 208 * Computes the covariance between the two arrays. 209 * 210 * <p>Array lengths must match and the common length must be at least 2.</p> 211 * 212 * @param xArray first data array 213 * @param yArray second data array 214 * @param biasCorrected if true, returned value will be bias-corrected 215 * @return returns the covariance for the two arrays 216 * @throws IllegalArgumentException if the arrays lengths do not match or 217 * there is insufficient data 218 */ 219 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected) 220 throws IllegalArgumentException { 221 Mean mean = new Mean(); 222 double result = 0d; 223 int length = xArray.length; 224 if(length == yArray.length && length > 1) { 225 double xMean = mean.evaluate(xArray); 226 double yMean = mean.evaluate(yArray); 227 for (int i = 0; i < length; i++) { 228 double xDev = xArray[i] - xMean; 229 double yDev = yArray[i] - yMean; 230 result += (xDev * yDev - result) / (i + 1); 231 } 232 } 233 else { 234 throw MathRuntimeException.createIllegalArgumentException( 235 "arrays must have the same length and both must have at " + 236 "least two elements. xArray has size {0}, yArray has {1} elements", 237 length, yArray.length); 238 } 239 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result; 240 } 241 242 /** 243 * Computes the covariance between the two arrays, using the bias-corrected 244 * formula. 245 * 246 * <p>Array lengths must match and the common length must be at least 2.</p> 247 * 248 * @param xArray first data array 249 * @param yArray second data array 250 * @return returns the covariance for the two arrays 251 * @throws IllegalArgumentException if the arrays lengths do not match or 252 * there is insufficient data 253 */ 254 public double covariance(final double[] xArray, final double[] yArray) 255 throws IllegalArgumentException { 256 return covariance(xArray, yArray, true); 257 } 258 259 /** 260 * Throws IllegalArgumentException of the matrix does not have at least 261 * two columns and two rows 262 * @param matrix matrix to check 263 */ 264 private void checkSufficientData(final RealMatrix matrix) { 265 int nRows = matrix.getRowDimension(); 266 int nCols = matrix.getColumnDimension(); 267 if (nRows < 2 || nCols < 2) { 268 throw MathRuntimeException.createIllegalArgumentException( 269 "insufficient data: only {0} rows and {1} columns.", 270 nRows, nCols); 271 } 272 } 273 }