# -*- coding: utf-8 -*-
"""
Created on Fri Aug 17 13:10:52 2012
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
def clip_evals(x, value=0): #threshold=0, value=0):
evals, evecs = np.linalg.eigh(x)
clipped = np.any(evals < 0)
x_new = np.dot(evecs * np.maximum(evals, value), evecs.T)
return x_new, clipped
[docs]def corr_nearest(corr, threshold=1e-15, n_fact=100):
'''
Find the nearest correlation matrix that is positive semi-definite.
The function iteratively adjust the correlation matrix by clipping the
eigenvalues of a difference matrix. The diagonal elements are set to one.
Parameters
----------
corr : ndarray, (k, k)
initial correlation matrix
threshold : float
clipping threshold for smallest eigenvalue, see Notes
n_fact : int or float
factor to determine the maximum number of iterations. The maximum
number of iterations is the integer part of the number of columns in
the correlation matrix times n_fact.
Returns
-------
corr_new : ndarray, (optional)
corrected correlation matrix
Notes
-----
The smallest eigenvalue of the corrected correlation matrix is
approximately equal to the ``threshold``.
If the threshold=0, then the smallest eigenvalue of the correlation matrix
might be negative, but zero within a numerical error, for example in the
range of -1e-16.
Assumes input correlation matrix is symmetric.
Stops after the first step if correlation matrix is already positive
semi-definite or positive definite, so that smallest eigenvalue is above
threshold. In this case, the returned array is not the original, but
is equal to it within numerical precision.
See Also
--------
corr_clipped
cov_nearest
'''
k_vars = corr.shape[0]
if k_vars != corr.shape[1]:
raise ValueError("matrix is not square")
diff = np.zeros(corr.shape)
x_new = corr.copy()
diag_idx = np.arange(k_vars)
for ii in range(int(len(corr) * n_fact)):
x_adj = x_new - diff
x_psd, clipped = clip_evals(x_adj, value=threshold)
if not clipped:
x_new = x_psd
break
diff = x_psd - x_adj
x_new = x_psd.copy()
x_new[diag_idx, diag_idx] = 1
else:
import warnings
warnings.warn('maximum iteration reached')
return x_new
[docs]def corr_clipped(corr, threshold=1e-15):
'''
Find a near correlation matrix that is positive semi-definite
This function clips the eigenvalues, replacing eigenvalues smaller than
the threshold by the threshold. The new matrix is normalized, so that the
diagonal elements are one.
Compared to corr_nearest, the distance between the original correlation
matrix and the positive definite correlation matrix is larger, however,
it is much faster since it only computes eigenvalues only once.
Parameters
----------
corr : ndarray, (k, k)
initial correlation matrix
threshold : float
clipping threshold for smallest eigenvalue, see Notes
Returns
-------
corr_new : ndarray, (optional)
corrected correlation matrix
Notes
-----
The smallest eigenvalue of the corrected correlation matrix is
approximately equal to the ``threshold``. In examples, the
smallest eigenvalue can be by a factor of 10 smaller than the threshold,
e.g. threshold 1e-8 can result in smallest eigenvalue in the range
between 1e-9 and 1e-8.
If the threshold=0, then the smallest eigenvalue of the correlation matrix
might be negative, but zero within a numerical error, for example in the
range of -1e-16.
Assumes input correlation matrix is symmetric. The diagonal elements of
returned correlation matrix is set to ones.
If the correlation matrix is already positive semi-definite given the
threshold, then the original correlation matrix is returned.
``cov_clipped`` is 40 or more times faster than ``cov_nearest`` in simple
example, but has a slightly larger approximation error.
See Also
--------
corr_nearest
cov_nearest
'''
x_new, clipped = clip_evals(corr, value=threshold)
if not clipped:
return corr
#cov2corr
x_std = np.sqrt(np.diag(x_new))
x_new = x_new / x_std / x_std[:,None]
return x_new
[docs]def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100,
return_all=False):
'''
Find the nearest covariance matrix that is postive (semi-) definite
This leaves the diagonal, i.e. the variance, unchanged
Parameters
----------
cov : ndarray, (k,k)
initial covariance matrix
method : string
if "clipped", then the faster but less accurate ``corr_clipped`` is used.
if "nearest", then ``corr_nearest`` is used
threshold : float
clipping threshold for smallest eigen value, see Notes
nfact : int or float
factor to determine the maximum number of iterations in
``corr_nearest``. See its doc string
return_all : bool
if False (default), then only the covariance matrix is returned.
If True, then correlation matrix and standard deviation are
additionally returned.
Returns
-------
cov_ : ndarray
corrected covariance matrix
corr_ : ndarray, (optional)
corrected correlation matrix
std_ : ndarray, (optional)
standard deviation
Notes
-----
This converts the covariance matrix to a correlation matrix. Then, finds
the nearest correlation matrix that is positive semidefinite and converts
it back to a covariance matrix using the initial standard deviation.
The smallest eigenvalue of the intermediate correlation matrix is
approximately equal to the ``threshold``.
If the threshold=0, then the smallest eigenvalue of the correlation matrix
might be negative, but zero within a numerical error, for example in the
range of -1e-16.
Assumes input covariance matrix is symmetric.
See Also
--------
corr_nearest
corr_clipped
'''
from statsmodels.stats.moment_helpers import cov2corr, corr2cov
cov_, std_ = cov2corr(cov, return_std=True)
if method == 'clipped':
corr_ = corr_clipped(cov_, threshold=threshold)
elif method == 'nearest':
corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact)
cov_ = corr2cov(corr_, std_)
if return_all:
return cov_, corr_, std_
else:
return cov_
if __name__ == '__main__':
pass