Python源码示例:sklearn.neighbors.KernelDensity()
示例1
def __init__(self, hybrid=False, kernel='gaussian', n_jobs=-1, seed=None, **kwargs):
"""Init Kernel Density Estimation instance."""
self.kernel = kernel
self.n_jobs = n_jobs
self.seed = seed
self.model = KernelDensity(kernel=kernel, **kwargs)
self.bandwidth = self.model.bandwidth
self.hybrid = hybrid
self.ae_net = None # autoencoder network for the case of a hybrid model
self.results = {
'train_time': None,
'test_time': None,
'test_auc': None,
'test_scores': None
}
示例2
def gen_exp_name(model_class, model_kwargs):
"""Generates experiment name from model class and parameters.
:param model_class: (type) the class, one of GaussianMixture, PCAPreDensity or KernelDensity.
:param model_kwargs: (dict) constructor arguments to the class.
:return A string succinctly encoding the class and parameters."""
if model_class == GaussianMixture:
n_components = model_kwargs.get("n_components", 1)
covariance_type = model_kwargs.get("covariance_type", "full")
return f"gmm_{n_components}_components_{covariance_type}"
elif model_class == PCAPreDensity:
if model_kwargs["density_class"] == KernelDensity:
return "pca_kde"
elif model_kwargs["density_class"] == GaussianMixture:
return "pca_gmm"
else:
return "pca_unknown"
elif model_class == KernelDensity:
return "kde"
else:
return "default"
示例3
def test_kde_badargs():
assert_raises(ValueError, KernelDensity,
algorithm='blah')
assert_raises(ValueError, KernelDensity,
bandwidth=0)
assert_raises(ValueError, KernelDensity,
kernel='blah')
assert_raises(ValueError, KernelDensity,
metric='blah')
assert_raises(ValueError, KernelDensity,
algorithm='kd_tree', metric='blah')
kde = KernelDensity()
assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
sample_weight=np.random.random((200, 10)))
assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
sample_weight=-np.random.random(200))
示例4
def test_pickling(tmpdir, sample_weight):
# Make sure that predictions are the same before and after pickling. Used
# to be a bug because sample_weights wasn't pickled and the resulting tree
# would miss some info.
kde = KernelDensity()
data = np.reshape([1., 2., 3.], (-1, 1))
kde.fit(data, sample_weight=sample_weight)
X = np.reshape([1.1, 2.1], (-1, 1))
scores = kde.score_samples(X)
file_path = str(tmpdir.join('dump.pkl'))
_joblib.dump(kde, file_path)
kde = _joblib.load(file_path)
scores_pickled = kde.score_samples(X)
assert_allclose(scores, scores_pickled)
示例5
def kde_sklearn(data, grid, **kwargs):
"""
Kernel Density Estimation with Scikit-learn
Parameters
----------
data : numpy.array
Data points used to compute a density estimator. It
has `n x p` dimensions, representing n points and p
variables.
grid : numpy.array
Data points at which the desity will be estimated. It
has `m x p` dimensions, representing m points and p
variables.
Returns
-------
out : numpy.array
Density estimate. Has `m x 1` dimensions
"""
kde_skl = KernelDensity(**kwargs)
kde_skl.fit(data)
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(grid)
return np.exp(log_pdf)
示例6
def __init__(self, D_d_sample, D_delta_t_sample, kde_type='scipy_gaussian', bandwidth=1):
"""
:param D_d_sample: 1-d numpy array of angular diameter distances to the lens plane
:param D_delta_t_sample: 1-d numpy array of time-delay distances
kde_type : string
The kernel to use. Valid kernels are
'scipy_gaussian' or
['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']
Default is 'gaussian'.
:param bandwidth: width of kernel (in same units as the angular diameter quantities)
"""
values = np.vstack([D_d_sample, D_delta_t_sample])
if kde_type == 'scipy_gaussian':
self._PDF_kernel = stats.gaussian_kde(values)
else:
from sklearn.neighbors import KernelDensity
self._kde = KernelDensity(bandwidth=bandwidth, kernel=kde_type)
values = np.vstack([D_d_sample, D_delta_t_sample])
self._kde.fit(values.T)
self._kde_type = kde_type
示例7
def test_optuna_search_invalid_param_dist():
# type: () -> None
X, y = make_blobs(n_samples=10)
est = KernelDensity()
param_dist = ["kernel", distributions.CategoricalDistribution(("gaussian", "linear"))]
optuna_search = integration.OptunaSearchCV(
est,
param_dist, # type: ignore
cv=3,
error_score="raise",
random_state=0,
return_train_score=True,
)
with pytest.raises(ValueError, match="param_distributions must be a dictionary."):
optuna_search.fit(X)
示例8
def test_optuna_search_pruning_without_partial_fit():
# type: () -> None
X, y = make_blobs(n_samples=10)
est = KernelDensity()
param_dist = {} # type: ignore
optuna_search = integration.OptunaSearchCV(
est,
param_dist,
cv=3,
enable_pruning=True,
error_score="raise",
random_state=0,
return_train_score=True,
)
with pytest.raises(ValueError, match="estimator must support partial_fit."):
optuna_search.fit(X)
示例9
def test_optuna_search_negative_max_iter():
# type: () -> None
X, y = make_blobs(n_samples=10)
est = KernelDensity()
param_dist = {} # type: ignore
optuna_search = integration.OptunaSearchCV(
est,
param_dist,
cv=3,
max_iter=-1,
error_score="raise",
random_state=0,
return_train_score=True,
)
with pytest.raises(ValueError, match="max_iter must be > 0"):
optuna_search.fit(X)
示例10
def test_optuna_search_tuple_instead_of_distribution():
# type: () -> None
X, y = make_blobs(n_samples=10)
est = KernelDensity()
param_dist = {"kernel": ("gaussian", "linear")}
optuna_search = integration.OptunaSearchCV(
est,
param_dist, # type: ignore
cv=3,
error_score="raise",
random_state=0,
return_train_score=True,
)
with pytest.raises(ValueError, match="must be a optuna distribution."):
optuna_search.fit(X)
示例11
def test_optuna_search_verbosity(verbose):
# type: (int) -> None
X, y = make_blobs(n_samples=10)
est = KernelDensity()
param_dist = {} # type: ignore
optuna_search = integration.OptunaSearchCV(
est,
param_dist,
cv=3,
error_score="raise",
random_state=0,
return_train_score=True,
verbose=verbose,
)
optuna_search.fit(X)
示例12
def test_optuna_search_subsample():
# type: () -> None
X, y = make_blobs(n_samples=10)
est = KernelDensity()
param_dist = {} # type: ignore
optuna_search = integration.OptunaSearchCV(
est,
param_dist,
cv=3,
error_score="raise",
random_state=0,
return_train_score=True,
subsample=5,
)
optuna_search.fit(X)
示例13
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.neighbors.NearestNeighbors,
neighbors.NearestNeighbors)
self.assertIs(df.neighbors.KNeighborsClassifier,
neighbors.KNeighborsClassifier)
self.assertIs(df.neighbors.RadiusNeighborsClassifier,
neighbors.RadiusNeighborsClassifier)
self.assertIs(df.neighbors.KNeighborsRegressor,
neighbors.KNeighborsRegressor)
self.assertIs(df.neighbors.RadiusNeighborsRegressor,
neighbors.RadiusNeighborsRegressor)
self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid)
self.assertIs(df.neighbors.BallTree, neighbors.BallTree)
self.assertIs(df.neighbors.KDTree, neighbors.KDTree)
self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric)
self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity)
示例14
def display(self, output_filename):
fig, (self.ax) = plt.subplots(1, 1)
self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
has_legend = False
for dataset in self.datasets:
self._display_dataset(dataset)
if dataset.label is not None:
has_legend = True
if self.title is not None:
self.ax.set_xlabel(self.title)
self.ax.set_ylabel('Density')
if has_legend:
self.ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3,
mode='expand', borderaxespad=0.)
fig.savefig(output_filename)
plt.close(fig)
示例15
def test_kde_algorithm_metric_choice():
# Smoke test for various metrics and algorithms
rng = np.random.RandomState(0)
X = rng.randn(10, 2) # 2 features required for haversine dist.
Y = rng.randn(10, 2)
for algorithm in ['auto', 'ball_tree', 'kd_tree']:
for metric in ['euclidean', 'minkowski', 'manhattan',
'chebyshev', 'haversine']:
if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
assert_raises(ValueError, KernelDensity,
algorithm=algorithm, metric=metric)
else:
kde = KernelDensity(algorithm=algorithm, metric=metric)
kde.fit(X)
y_dens = kde.score_samples(Y)
assert_equal(y_dens.shape, Y.shape[:1])
示例16
def kde(data, rng, grid_size=10, **kwargs):
"""Kernel Density Estimation with Scikit-learn"""
n_samples = data.shape[0]
n_dims = data.shape[1]
bandwidth = (n_samples * (n_dims + 2) / 4.)**(-1. / (n_dims + 4.))
kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
kde_skl.fit(data)
space = [linspace(i[0], i[1], grid_size) for i in rng]
grid = meshgrid(*tuple(space))
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(vstack(map(ravel, grid)).T)
return exp(log_pdf), space
示例17
def pca_kde():
model_class = PCAPreDensity
model_kwargs = {"density_class": KernelDensity}
_ = locals() # quieten flake8 unused variable warning
del _
示例18
def kde():
model_class = KernelDensity
_ = locals() # quieten flake8 unused variable warning
del _
示例19
def _fit_single_density(self, flat_transitions):
# This bandwidth was chosen to make sense with standardised inputs that
# have unit variance in each component. There might be a better way to
# choose it automatically.
density_model = KernelDensity(
kernel=self.kernel, bandwidth=self.kernel_bandwidth
)
density_model.fit(flat_transitions)
return density_model
示例20
def test_gridsearch_no_predict():
# test grid-search with an estimator without predict.
# slight duplication of a test from KDE
def custom_scoring(estimator, X):
return 42 if estimator.bandwidth == .1 else 0
X, _ = make_blobs(cluster_std=.1, random_state=1,
centers=[[0, 1], [1, 0], [0, 0]])
search = GridSearchCV(KernelDensity(),
param_grid=dict(bandwidth=[.01, .1, 1]),
scoring=custom_scoring)
search.fit(X)
assert_equal(search.best_params_['bandwidth'], .1)
assert_equal(search.best_score_, 42)
示例21
def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true):
kde = KernelDensity(kernel=kernel, bandwidth=bandwidth,
atol=atol, rtol=rtol)
log_dens = kde.fit(X).score_samples(Y)
assert_allclose(np.exp(log_dens), dens_true,
atol=atol, rtol=max(1E-7, rtol))
assert_allclose(np.exp(kde.score(Y)),
np.prod(dens_true),
atol=atol, rtol=max(1E-7, rtol))
示例22
def test_kernel_density_sampling(n_samples=100, n_features=3):
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features)
bandwidth = 0.2
for kernel in ['gaussian', 'tophat']:
# draw a tophat sample
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
samp = kde.sample(100)
assert_equal(X.shape, samp.shape)
# check that samples are in the right range
nbrs = NearestNeighbors(n_neighbors=1).fit(X)
dist, ind = nbrs.kneighbors(X, return_distance=True)
if kernel == 'tophat':
assert np.all(dist < bandwidth)
elif kernel == 'gaussian':
# 5 standard deviations is safe for 100 samples, but there's a
# very small chance this test could fail.
assert np.all(dist < 5 * bandwidth)
# check unsupported kernels
for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
assert_raises(NotImplementedError, kde.sample, 100)
# non-regression test: used to return a scalar
X = rng.randn(4, 1)
kde = KernelDensity(kernel="gaussian").fit(X)
assert_equal(kde.sample().shape, (1, 1))
示例23
def test_kde_algorithm_metric_choice(algorithm, metric):
# Smoke test for various metrics and algorithms
rng = np.random.RandomState(0)
X = rng.randn(10, 2) # 2 features required for haversine dist.
Y = rng.randn(10, 2)
if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
assert_raises(ValueError, KernelDensity,
algorithm=algorithm, metric=metric)
else:
kde = KernelDensity(algorithm=algorithm, metric=metric)
kde.fit(X)
y_dens = kde.score_samples(Y)
assert_equal(y_dens.shape, Y.shape[:1])
示例24
def _fit(self, X):
self.estimator_ = KernelDensity(
algorithm = self.algorithm,
atol = self.atol,
bandwidth = self.bandwidth,
breadth_first = self.breadth_first,
kernel = self.kernel,
leaf_size = self.leaf_size,
metric = self.metric,
rtol = self.rtol,
metric_params = self.metric_params
).fit(X)
return self
示例25
def setup(self):
self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
height, width = self.shape
self.uniform_density = -np.log(width*height)
self.kde_constant = np.log(1-self.regularization)
self.uniform_constant = np.log(self.regularization)
示例26
def setup(self):
self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
self.kde_constant = np.log(1-self.regularization)
self.uniform_constant = np.log(self.regularization)
示例27
def setup(self):
self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
示例28
def __init__(self, stimuli, fixations, bandwidth, eps = 1e-20, **kwargs):
super(CrossvalidatedBaselineModel, self).__init__(**kwargs)
self.stimuli = stimuli
self.fixations = fixations
self.bandwidth = bandwidth
self.eps = eps
self.xs, self.ys = normalize_fixations(stimuli, fixations)
#self.kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(np.vstack([self.xs, self.ys]).T)
self.shape_cache = {}
示例29
def display(x, color, list_save=None) :
kde = KernelDensity(kernel='gaussian', bandwidth= .005 ).fit(x.data.cpu().numpy())
dens = np.exp( kde.score_samples(t_plot) )
dens[0] = 0 ; dens[-1] = 0;
plt.fill(t_plot, dens, color=color)
if list_save is not None :
list_save.append(dens.ravel()) # We'll save a csv at the end
示例30
def optimize_kde(X):
# use grid search cross-validation to optimize the bandwidth
params = {'bandwidth': np.logspace(-3, 1, 20)}
grid = GridSearchCV(KernelDensity(), params, n_jobs=8, cv=5, verbose=1)
grid.fit(X)
print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
# use the best estimator to compute the kernel density estimate
kde = grid.best_estimator_
return kde