Python源码示例:numpy.cov()
示例1
def PCA_components(x):
"""
Principal Component Analysis helper to check out eigenvalues of components.
**Args:**
* `x` : input matrix (2d array), every row represents new sample
**Returns:**
* `components`: sorted array of principal components eigenvalues
"""
# validate inputs
try:
x = np.array(x)
except:
raise ValueError('Impossible to convert x to a numpy array.')
# eigen values and eigen vectors of data covariance matrix
eigen_values, eigen_vectors = np.linalg.eig(np.cov(x.T))
# sort eigen vectors according biggest eigen value
eigen_order = eigen_vectors.T[(-eigen_values).argsort()]
# form output - order the eigenvalues
return eigen_values[(-eigen_values).argsort()]
示例2
def get_ma_dist(A, B):
Y = A.copy()
X = B.copy()
S = np.cov(X.T)
try:
SI = np.linalg.inv(S)
except:
print("Singular Matrix: using np.linalg.pinv")
SI = np.linalg.pinv(S)
mu = np.mean(X, axis=0)
diff = Y - mu
Dct_c = np.diag(diff @ SI @ diff.T)
return Dct_c
示例3
def calculate_activation_statistics(pointclouds, model, batch_size=100,
dims=1808, device=None, verbose=False):
"""Calculation of the statistics used by the FID.
Params:
-- pointcloud : pytorch Tensor of pointclouds.
-- model : Instance of inception model
-- batch_size : The images numpy array is split into batches with
batch size batch_size. A reasonable batch size
depends on the hardware.
-- dims : Dimensionality of features returned by Inception
-- device : If set to device, use GPU
-- verbose : If set to True and parameter out_step is given, the
number of calculated batches is reported.
Returns:
-- mu : The mean over samples of the activations of the pool_3 layer of
the inception model.
-- sigma : The covariance matrix of the activations of the pool_3 layer of
the inception model.
"""
act = get_activations(pointclouds, model, batch_size, dims, device, verbose)
mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False)
return mu, sigma
示例4
def test_1d_with_missing(self):
# Test cov 1 1D variable w/missing values
x = self.data
x[-1] = masked
x -= x.mean()
nx = x.compressed()
assert_almost_equal(np.cov(nx), cov(x))
assert_almost_equal(np.cov(nx, rowvar=False), cov(x, rowvar=False))
assert_almost_equal(np.cov(nx, rowvar=False, bias=True),
cov(x, rowvar=False, bias=True))
#
try:
cov(x, allow_masked=False)
except ValueError:
pass
#
# 2 1D variables w/ missing values
nx = x[1:-1]
assert_almost_equal(np.cov(nx, nx[::-1]), cov(x, x[::-1]))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False),
cov(x, x[::-1], rowvar=False))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True),
cov(x, x[::-1], rowvar=False, bias=True))
示例5
def test_2d_with_missing(self):
# Test cov on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
valid = np.logical_not(getmaskarray(x)).astype(int)
frac = np.dot(valid, valid.T)
xf = (x - x.mean(1)[:, None]).filled(0)
assert_almost_equal(cov(x),
np.cov(xf) * (x.shape[1] - 1) / (frac - 1.))
assert_almost_equal(cov(x, bias=True),
np.cov(xf, bias=True) * x.shape[1] / frac)
frac = np.dot(valid.T, valid)
xf = (x - x.mean(0)).filled(0)
assert_almost_equal(cov(x, rowvar=False),
(np.cov(xf, rowvar=False) *
(x.shape[0] - 1) / (frac - 1.)))
assert_almost_equal(cov(x, rowvar=False, bias=True),
(np.cov(xf, rowvar=False, bias=True) *
x.shape[0] / frac))
示例6
def test_expanding_cov_diff_index(self):
# GH 7512
s1 = Series([1, 2, 3], index=[0, 1, 2])
s2 = Series([1, 3], index=[0, 2])
result = s1.expanding().cov(s2)
expected = Series([None, None, 2.0])
tm.assert_series_equal(result, expected)
s2a = Series([1, None, 3], index=[0, 1, 2])
result = s1.expanding().cov(s2a)
tm.assert_series_equal(result, expected)
s1 = Series([7, 8, 10], index=[0, 1, 3])
s2 = Series([7, 9, 10], index=[0, 2, 3])
result = s1.expanding().cov(s2)
expected = Series([None, None, None, 4.5])
tm.assert_series_equal(result, expected)
示例7
def test_rolling_functions_window_non_shrinkage_binary(self):
# corr/cov return a MI DataFrame
df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]],
columns=Index(['A', 'B'], name='foo'),
index=Index(range(4), name='bar'))
df_expected = DataFrame(
columns=Index(['A', 'B'], name='foo'),
index=pd.MultiIndex.from_product([df.index, df.columns],
names=['bar', 'foo']),
dtype='float64')
functions = [lambda x: (x.rolling(window=10, min_periods=5)
.cov(x, pairwise=True)),
lambda x: (x.rolling(window=10, min_periods=5)
.corr(x, pairwise=True))]
for f in functions:
df_result = f(df)
tm.assert_frame_equal(df_result, df_expected)
示例8
def test_expanding_cov_pairwise_diff_length(self):
# GH 7512
df1 = DataFrame([[1, 5], [3, 2], [3, 9]],
columns=Index(['A', 'B'], name='foo'))
df1a = DataFrame([[1, 5], [3, 9]],
index=[0, 2],
columns=Index(['A', 'B'], name='foo'))
df2 = DataFrame([[5, 6], [None, None], [2, 1]],
columns=Index(['X', 'Y'], name='foo'))
df2a = DataFrame([[5, 6], [2, 1]],
index=[0, 2],
columns=Index(['X', 'Y'], name='foo'))
# TODO: xref gh-15826
# .loc is not preserving the names
result1 = df1.expanding().cov(df2a, pairwise=True).loc[2]
result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
expected = DataFrame([[-3.0, -6.0], [-5.0, -10.0]],
columns=Index(['A', 'B'], name='foo'),
index=Index(['X', 'Y'], name='foo'))
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)
tm.assert_frame_equal(result4, expected)
示例9
def test_expanding_corr_cov(self):
g = self.frame.groupby('A')
r = g.expanding()
for f in ['corr', 'cov']:
result = getattr(r, f)(self.frame)
def func(x):
return getattr(x.expanding(), f)(self.frame)
expected = g.apply(func)
tm.assert_frame_equal(result, expected)
result = getattr(r.B, f)(pairwise=True)
def func(x):
return getattr(x.B.expanding(), f)(pairwise=True)
expected = g.apply(func)
tm.assert_series_equal(result, expected)
示例10
def test_rolling_cov_offset(self):
# GH16058
idx = pd.date_range('2017-01-01', periods=24, freq='1h')
ss = Series(np.arange(len(idx)), index=idx)
result = ss.rolling('2h').cov()
expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
tm.assert_series_equal(result, expected)
expected2 = ss.rolling(2, min_periods=1).cov()
tm.assert_series_equal(result, expected2)
result = ss.rolling('3h').cov()
expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
tm.assert_series_equal(result, expected)
expected2 = ss.rolling(3, min_periods=1).cov()
tm.assert_series_equal(result, expected2)
示例11
def nancov(a, b, min_periods=None):
if len(a) != len(b):
raise AssertionError('Operands to nancov must have same size')
if min_periods is None:
min_periods = 1
valid = notna(a) & notna(b)
if not valid.all():
a = a[valid]
b = b[valid]
if len(a) < min_periods:
return np.nan
return np.cov(a, b)[0, 1]
示例12
def test_shape_inference(self):
with self.session(use_gpu=True):
# Static
mean = 10 * np.random.normal(size=(10, 11, 2)).astype('d')
cov = np.zeros((10, 11, 2, 2))
dst = MultivariateNormalCholesky(
tf.constant(mean), tf.constant(cov))
self.assertEqual(dst.get_batch_shape().as_list(), [10, 11])
self.assertEqual(dst.get_value_shape().as_list(), [2])
# Dynamic
unk_mean = tf.placeholder(tf.float32, None)
unk_cov = tf.placeholder(tf.float32, None)
dst = MultivariateNormalCholesky(unk_mean, unk_cov)
self.assertEqual(dst.get_value_shape().as_list(), [None])
feed_dict = {unk_mean: np.ones(2), unk_cov: np.eye(2)}
self.assertEqual(list(dst.batch_shape.eval(feed_dict)), [])
self.assertEqual(list(dst.value_shape.eval(feed_dict)), [2])
示例13
def test_sample(self):
with self.fixed_randomness_session(233):
def test_sample_with(seed):
mean, cov, cov_chol = self._gen_test_params(seed)
dst = MultivariateNormalCholesky(
tf.constant(mean), tf.constant(cov_chol))
n_exp = 20000
samples = dst.sample(n_exp)
sample_shape = (n_exp, 10, 11, 3)
self.assertEqual(samples.shape.as_list(), list(sample_shape))
samples = dst.sample(n_exp).eval()
self.assertEqual(samples.shape, sample_shape)
self.assertAllClose(
np.mean(samples, axis=0), mean, rtol=5e-2, atol=5e-2)
for i in range(10):
for j in range(11):
self.assertAllClose(
np.cov(samples[:, i, j, :].T), cov[i, j],
rtol=1e-1, atol=1e-1)
for seed in [23, 233, 2333]:
test_sample_with(seed)
示例14
def test_prob(self):
with self.fixed_randomness_session(233):
def test_prob_with(seed):
mean, cov, cov_chol = self._gen_test_params(seed)
dst = MultivariateNormalCholesky(
tf.constant(mean), tf.constant(cov_chol),
check_numerics=True)
n_exp = 200
samples = dst.sample(n_exp).eval()
log_pdf = dst.log_prob(tf.constant(samples))
pdf_shape = (n_exp, 10, 11)
self.assertEqual(log_pdf.shape.as_list(), list(pdf_shape))
log_pdf = log_pdf.eval()
self.assertEqual(log_pdf.shape, pdf_shape)
for i in range(10):
for j in range(11):
log_pdf_exact = stats.multivariate_normal.logpdf(
samples[:, i, j, :], mean[i, j], cov[i, j])
self.assertAllClose(
log_pdf_exact, log_pdf[:, i, j])
self.assertAllClose(
np.exp(log_pdf), dst.prob(tf.constant(samples)).eval())
for seed in [23, 233, 2333]:
test_prob_with(seed)
示例15
def calculate_activation_statistics(images, model, batch_size=64, dims=2048, device=None):
"""Calculation of the statistics used by the FID.
Params:
-- images : Numpy array of dimension (n_images, 3, hi, wi). The values
must lie between 0 and 1.
-- model : Instance of inception model
-- batch_size : The images numpy array is split into batches with
batch size batch_size. A reasonable batch size
depends on the hardware.
-- dims : Dimensionality of features returned by Inception
-- device : If set to True, use GPU
-- verbose : If set to True and parameter out_step is given, the
number of calculated batches is reported.
Returns:
-- mu : The mean over samples of the activations of the pool_3 layer of
the inception model.
-- sigma : The covariance matrix of the activations of the pool_3 layer of
the inception model.
"""
act = get_activations(images, model, batch_size, dims, device)
mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False)
return mu, sigma
示例16
def testCovExecution(self):
data = np.array([[0, 2], [1, 1], [2, 0]]).T
x = tensor(data, chunk_size=1)
t = cov(x)
res = self.executor.execute_tensor(t, concat=True)[0]
expected = np.cov(data)
np.testing.assert_equal(res, expected)
data_x = [-2.1, -1, 4.3]
data_y = [3, 1.1, 0.12]
x = tensor(data_x, chunk_size=1)
y = tensor(data_y, chunk_size=1)
X = stack((x, y), axis=0)
t = cov(x, y)
r = tall(t == cov(X))
self.assertTrue(self.executor.execute_tensor(r)[0])
示例17
def test_1d_with_missing(self):
# Test cov 1 1D variable w/missing values
x = self.data
x[-1] = masked
x -= x.mean()
nx = x.compressed()
assert_almost_equal(np.cov(nx), cov(x))
assert_almost_equal(np.cov(nx, rowvar=False), cov(x, rowvar=False))
assert_almost_equal(np.cov(nx, rowvar=False, bias=True),
cov(x, rowvar=False, bias=True))
#
try:
cov(x, allow_masked=False)
except ValueError:
pass
#
# 2 1D variables w/ missing values
nx = x[1:-1]
assert_almost_equal(np.cov(nx, nx[::-1]), cov(x, x[::-1]))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False),
cov(x, x[::-1], rowvar=False))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True),
cov(x, x[::-1], rowvar=False, bias=True))
示例18
def test_2d_with_missing(self):
# Test cov on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
valid = np.logical_not(getmaskarray(x)).astype(int)
frac = np.dot(valid, valid.T)
xf = (x - x.mean(1)[:, None]).filled(0)
assert_almost_equal(cov(x),
np.cov(xf) * (x.shape[1] - 1) / (frac - 1.))
assert_almost_equal(cov(x, bias=True),
np.cov(xf, bias=True) * x.shape[1] / frac)
frac = np.dot(valid.T, valid)
xf = (x - x.mean(0)).filled(0)
assert_almost_equal(cov(x, rowvar=False),
(np.cov(xf, rowvar=False) *
(x.shape[0] - 1) / (frac - 1.)))
assert_almost_equal(cov(x, rowvar=False, bias=True),
(np.cov(xf, rowvar=False, bias=True) *
x.shape[0] / frac))
示例19
def calculate_activation_statistics(
dataloader, device="cuda", num_images=500, real_dataset=False
):
"""Calculate the activation statistics for a dataset.
Args:
dataloader: Dataloader of data from which to obtain activations.
device: to perform the evaluation (e.g. 'cuda' for GPU).
num_images: number of images to evaluate.
real_dataset: bool (whether the dataset is real or generated).
Returns:
Mean activations (np.array), std of activations (np.array).
"""
act = get_activations(
dataloader,
device,
dims=2048,
num_images=num_images,
real_dataset=real_dataset,
)
mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False)
return mu, sigma
示例20
def calculate_activation_statistics(images, model, batch_size=50,#
dims=2048, cuda=False, verbose=False):
"""Calculation of the statistics used by the FID.
Params:
-- images : Numpy array of dimension (n_images, 3, hi, wi). The values
must lie between 0 and 1.
-- model : Instance of inception model
-- batch_size : The images numpy array is split into batches with
batch size batch_size. A reasonable batch size
depends on the hardware.
-- dims : Dimensionality of features returned by Inception
-- cuda : If set to True, use GPU
-- verbose : If set to True and parameter out_step is given, the
number of calculated batches is reported.
Returns:
-- mu : The mean over samples of the activations of the pool_3 layer of
the inception model.
-- sigma : The covariance matrix of the activations of the pool_3 layer of
the inception model.
"""
act = get_activations(images, model, batch_size, dims, cuda, verbose)
mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False)
return mu, sigma
示例21
def _internal_tell_candidate(self, candidate: p.Parameter, value: float) -> None:
self.children.append(candidate)
if self._POPSIZE_ADAPTATION:
self.popsize.add_value(value)
if len(self.children) >= self.popsize.llambda:
self.children = sorted(self.children, key=lambda c: c.loss)
population_data = [c.get_standardized_data(reference=self.parametrization) for c in self.children]
mu = self.popsize.mu
arrays = population_data[:mu]
# covariance
# TODO: check actual covariance that should be used
centered_arrays = np.array([x - self.current_center for x in arrays])
cov = centered_arrays.T.dot(centered_arrays)
# cov = np.cov(np.array(population_data).T)
mem_factor = 0.9 if self._COVARIANCE_MEMORY else 0
self.covariance *= mem_factor
self.covariance += (1 - mem_factor) * cov
# Computing the new parent
self.current_center = sum(arrays) / mu # type: ignore
self.sigma = np.exp(sum([np.log(c._meta["sigma"]) for c in self.children[:mu]]) / mu)
self.parents = self.children[:mu]
self.children = []
示例22
def test_1d_w_missing(self):
# Test cov 1 1D variable w/missing values
x = self.data
x[-1] = masked
x -= x.mean()
nx = x.compressed()
assert_almost_equal(np.cov(nx), cov(x))
assert_almost_equal(np.cov(nx, rowvar=False), cov(x, rowvar=False))
assert_almost_equal(np.cov(nx, rowvar=False, bias=True),
cov(x, rowvar=False, bias=True))
#
try:
cov(x, allow_masked=False)
except ValueError:
pass
#
# 2 1D variables w/ missing values
nx = x[1:-1]
assert_almost_equal(np.cov(nx, nx[::-1]), cov(x, x[::-1]))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False),
cov(x, x[::-1], rowvar=False))
assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True),
cov(x, x[::-1], rowvar=False, bias=True))
示例23
def test_2d_w_missing(self):
# Test cov on 2D variable w/ missing value
x = self.data
x[-1] = masked
x = x.reshape(3, 4)
valid = np.logical_not(getmaskarray(x)).astype(int)
frac = np.dot(valid, valid.T)
xf = (x - x.mean(1)[:, None]).filled(0)
assert_almost_equal(cov(x),
np.cov(xf) * (x.shape[1] - 1) / (frac - 1.))
assert_almost_equal(cov(x, bias=True),
np.cov(xf, bias=True) * x.shape[1] / frac)
frac = np.dot(valid.T, valid)
xf = (x - x.mean(0)).filled(0)
assert_almost_equal(cov(x, rowvar=False),
(np.cov(xf, rowvar=False) *
(x.shape[0] - 1) / (frac - 1.)))
assert_almost_equal(cov(x, rowvar=False, bias=True),
(np.cov(xf, rowvar=False, bias=True) *
x.shape[0] / frac))
示例24
def test_cholesky_and_cholesky_grad_shape():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
rng = numpy.random.RandomState(utt.fetch_seed())
x = tensor.matrix()
for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)):
f_chol = theano.function([x], l.shape)
g = tensor.grad(l.sum(), x)
f_cholgrad = theano.function([x], g.shape)
topo_chol = f_chol.maker.fgraph.toposort()
topo_cholgrad = f_cholgrad.maker.fgraph.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == Cholesky
for node in topo_chol]) == 0
assert sum([node.op.__class__ == CholeskyGrad
for node in topo_cholgrad]) == 0
for shp in [2, 3, 5]:
m = numpy.cov(rng.randn(shp, shp + 10)).astype(config.floatX)
yield numpy.testing.assert_equal, f_chol(m), (shp, shp)
yield numpy.testing.assert_equal, f_cholgrad(m), (shp, shp)
示例25
def _compute_covariance(self):
self.factor = self.scotts_factor()
# Cache covariance and inverse covariance of the data
if not hasattr(self, '_data_inv_cov'):
self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
bias=False))
self._data_inv_cov = linalg.inv(self._data_covariance)
self.covariance = self._data_covariance * self.factor**2
self.inv_cov = self._data_inv_cov / self.factor**2
self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n
示例26
def reg_cov(self, X):
"""
Regularize covariance matrix until non-singular.
Parameters
----------
C : array
square symmetric covariance matrix.
Returns
-------
C : array
regularized covariance matrix.
"""
# Compute mean of data
muX = np.mean(X, axis=0, keepdims=1)
# Compute covariance matrix without regularization
SX = np.cov((X - muX).T)
# Initialize regularization parameter
reg = 1e-6
# Keep going until non-singular
while not self.is_pos_def(SX):
# Compute covariance matrix with regularization
SX = np.cov((X - muX).T) + reg*np.eye(X.shape[1])
# Increment reg
reg *= 10
# Report regularization
print('Final regularization parameter = {}'.format(reg))
return SX
示例27
def zca_whiten(self, X):
"""
Perform ZCA whitening (aka Mahalanobis whitening).
Parameters
----------
X : array (M samples x D features)
data matrix.
Returns
-------
X : array (M samples x D features)
whitened data.
"""
# Covariance matrix
Sigma = np.cov(X.T)
# Singular value decomposition
U, S, V = svd(Sigma)
# Whitening constant to prevent division by zero
epsilon = 1e-5
# ZCA whitening matrix
W = np.dot(U, np.dot(np.diag(1.0 / np.sqrt(S + epsilon)), V))
# Apply whitening matrix
return np.dot(X, W)
示例28
def PCA(x, n=False):
"""
Principal component analysis function.
**Args:**
* `x` : input matrix (2d array), every row represents new sample
**Kwargs:**
* `n` : number of features returned (integer) - how many columns
should the output keep
**Returns:**
* `new_x` : matrix with reduced size (lower number of columns)
"""
# select n if not provided
if not n:
n = x.shape[1] - 1
# validate inputs
try:
x = np.array(x)
except:
raise ValueError('Impossible to convert x to a numpy array.')
assert type(n) == int, "Provided n is not an integer."
assert x.shape[1] > n, "The requested n is bigger than \
number of features in x."
# eigen values and eigen vectors of data covariance matrix
eigen_values, eigen_vectors = np.linalg.eig(np.cov(x.T))
# sort eigen vectors according biggest eigen value
eigen_order = eigen_vectors.T[(-eigen_values).argsort()]
# form output - reduced x matrix
return eigen_order[:n].dot(x.T).T
示例29
def fit(self, Xs, Xt):
'''
Perform CORAL on the source domain features
:param Xs: ns * n_feature, source feature
:param Xt: nt * n_feature, target feature
:return: New source domain features
'''
cov_src = np.cov(Xs.T) + np.eye(Xs.shape[1])
cov_tar = np.cov(Xt.T) + np.eye(Xt.shape[1])
A_coral = np.dot(scipy.linalg.fractional_matrix_power(cov_src, -0.5),
scipy.linalg.fractional_matrix_power(cov_tar, 0.5))
Xs_new = np.dot(Xs, A_coral)
return Xs_new
示例30
def fit(self, Xs, Xt):
'''
find pivot features and transfer the Xs and Xt
Param Xs: source data
Param Xt: target data
output Xs_new: new source data features
output Xt_new: new target data features
output W: transform matrix
'''
_, ds = Xs.shape
_, dt = Xt.shape
assert ds == dt
X = np.concatenate((Xs, Xt), axis=0)
ix = np.argsort(np.sum(X, axis=0))
ix = ix[::-1][:self.num_pivots]
pivots = (X[:, ix]>0).astype('float')
p = np.zeros((ds, self.num_pivots))
# train for the classifers
for i in range(self.num_pivots):
clf = linear_model.SGDClassifier(loss="modified_huber", alpha=self.l2)
clf.fit(X, pivots[:, i])
p[:, i] = clf.coef_
_, W = np.linalg.eig(np.cov(p))
W = W[:, :self.num_pivots].astype('float')
self.W = W
Xs_new = np.concatenate((np.dot(Xs, W), Xs), axis=1)
Xt_new = np.concatenate((np.dot(Xt, W), Xt), axis=1)
return Xs_new, Xt_new, W