Python源码示例:sklearn.neighbors.BallTree()
示例1
def avgdigamma(data, dvec, leaf_size=16):
"""Convenience function for finding expectation value of <psi(nx)> given
some number of neighbors in some radius in a marginal space.
Parameters
----------
points : numpy.ndarray
dvec : array_like (n_points,)
Returns
-------
avgdigamma : float
expectation value of <psi(nx)>
"""
tree = BallTree(data, leaf_size=leaf_size, p=float('inf'))
n_points = tree.query_radius(data, dvec - EPS, count_only=True)
return digamma(n_points).mean()
示例2
def test_unsupervised_inputs():
# test the types of valid input into NearestNeighbors
X = rng.random_sample((10, 3))
nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
nbrs_fid.fit(X)
dist1, ind1 = nbrs_fid.kneighbors(X)
nbrs = neighbors.NearestNeighbors(n_neighbors=1)
for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
nbrs.fit(input)
dist2, ind2 = nbrs.kneighbors(X)
assert_array_almost_equal(dist1, dist2)
assert_array_almost_equal(ind1, ind2)
示例3
def test_haversine():
tree = BallTree(spatial_data[:, :2], metric="haversine")
dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
test_matrix = np.array(
[
[
dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
for j in range(spatial_data.shape[0])
]
for i in range(spatial_data.shape[0])
]
)
test_matrix.sort(axis=1)
assert_array_almost_equal(
test_matrix,
dist_matrix,
err_msg="Distances don't match " "for metric haversine",
)
示例4
def test_haversine(spatial_data):
tree = BallTree(spatial_data[:, :2], metric="haversine")
dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
test_matrix = np.array(
[
[
dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
for j in range(spatial_data.shape[0])
]
for i in range(spatial_data.shape[0])
]
)
test_matrix.sort(axis=1)
assert_array_almost_equal(
test_matrix,
dist_matrix,
err_msg="Distances don't match " "for metric haversine",
)
示例5
def make_propensity_lists(self, train_ids, benchmark):
input_data, ids, pair_data = benchmark.get_data_access().get_rows(train_ids)
assignments = map(benchmark.get_assignment, ids, input_data)
treatment_data, batch_y = zip(*assignments)
treatment_data = np.array(treatment_data)
if pair_data.shape[-1] > 200 and False:
self.pca = PCA(50, svd_solver="randomized")
pair_data = self.pca.fit_transform(pair_data)
else:
self.pca = None
# covariance_matrix = np.cov(pair_data, rowvar=False)
self.original_data = [pair_data[treatment_data == t]
for t in range(benchmark.get_num_treatments())]
# self.ball_trees = [BallTree(pair_data[treatment_data == t], metric="mahalanobis",
# V=covariance_matrix)
# for t in range(benchmark.get_num_treatments())]
self.ball_trees = [BallTree(pair_data[treatment_data == t])
for t in range(benchmark.get_num_treatments())]
self.treatment_ids = [ids[treatment_data == t]
for t in range(benchmark.get_num_treatments())]
示例6
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.neighbors.NearestNeighbors,
neighbors.NearestNeighbors)
self.assertIs(df.neighbors.KNeighborsClassifier,
neighbors.KNeighborsClassifier)
self.assertIs(df.neighbors.RadiusNeighborsClassifier,
neighbors.RadiusNeighborsClassifier)
self.assertIs(df.neighbors.KNeighborsRegressor,
neighbors.KNeighborsRegressor)
self.assertIs(df.neighbors.RadiusNeighborsRegressor,
neighbors.RadiusNeighborsRegressor)
self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid)
self.assertIs(df.neighbors.BallTree, neighbors.BallTree)
self.assertIs(df.neighbors.KDTree, neighbors.KDTree)
self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric)
self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity)
示例7
def calc_vert_vals(verts, pts, vals, method='max', k_points=100):
from sklearn.neighbors import BallTree
ball_tree = BallTree(pts)
k_points = min([k_points, len(pts)])
dists, pts_inds = ball_tree.query(verts, k=k_points, return_distance=True)
near_vals = vals[pts_inds]
# sig_dists = dists[np.where(abs(near_vals)>2)]
cover = len(np.unique(pts_inds.ravel()))/float(len(pts))
print('{}% of the points are covered'.format(cover*100))
if method == 'dist':
n_dists = 1/(dists**2)
norm = 1/np.sum(n_dists, 1)
norm = np.reshape(norm, (len(norm), 1))
n_dists = norm * n_dists
verts_vals = np.sum(near_vals * n_dists, 1)
elif method == 'max':
verts_vals = near_vals[range(near_vals.shape[0]), np.argmax(abs(near_vals), 1)]
return verts_vals
示例8
def test_unsupervised_inputs():
# test the types of valid input into NearestNeighbors
X = rng.random_sample((10, 3))
nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
nbrs_fid.fit(X)
dist1, ind1 = nbrs_fid.kneighbors(X)
nbrs = neighbors.NearestNeighbors(n_neighbors=1)
for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
nbrs.fit(input)
dist2, ind2 = nbrs.kneighbors(X)
assert_array_almost_equal(dist1, dist2)
assert_array_almost_equal(ind1, ind2)
示例9
def test_barnes_hut_angle():
# When Barnes-Hut's angle=0 this corresponds to the exact method.
angle = 0.0
perplexity = 10
n_samples = 100
for n_components in [2, 3]:
n_features = 5
degrees_of_freedom = float(n_components - 1.0)
random_state = check_random_state(0)
distances = random_state.randn(n_samples, n_features)
distances = distances.astype(np.float32)
distances = abs(distances.dot(distances.T))
np.fill_diagonal(distances, 0.0)
params = random_state.randn(n_samples, n_components)
P = _joint_probabilities(distances, perplexity, verbose=0)
kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
n_samples, n_components)
k = n_samples - 1
bt = BallTree(distances)
distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
neighbors_nn = neighbors_nn[:, 1:]
distances_nn = np.array([distances[i, neighbors_nn[i]]
for i in range(n_samples)])
assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
abs(distances[0, neighbors_nn[0]] - distances_nn[0])
P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
perplexity, verbose=0)
kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
n_samples, n_components,
angle=angle, skip_num_points=0,
verbose=0)
P = squareform(P)
P_bh = P_bh.toarray()
assert_array_almost_equal(P_bh, P, decimal=5)
assert_almost_equal(kl_exact, kl_bh, decimal=3)
示例10
def fit(self, X):
X = self._validate_input(X, return_compact=False)
self._tree = BallTree(X, metric='hamming', leaf_size=self.leaf_size)
return self
示例11
def build_tree(points):
if points.shape[1] >= 20:
return BallTree(points, metric='chebyshev')
return KDTree(points, metric='chebyshev')
# TESTS
示例12
def update_tree(self, time):
print 'rebuild tree'
self.tree = BallTree(self.state[:self.items, :], leaf_size=self.size)
self.last_tree_built_time = time
print 'rebuild done'
示例13
def transform(self, documents):
return [
BallTree(documents)
]
示例14
def fit_transform(self, documents):
# Transformer will be False if pipeline hasn't been fit yet,
# Trigger fit_transform and save the transformer and lexicon.
if self.transformer == False:
self.transformer = Pipeline([
('norm', TextNormalizer(minimum=50, maximum=200)),
('transform', Pipeline([
('tfidf', TfidfVectorizer()),
('svd', TruncatedSVD(n_components=200))
])
)
])
self.lexicon = self.transformer.fit_transform(documents)
self.tree = BallTree(self.lexicon)
self.save()
示例15
def knn_interpolation(cumulated_pc: np.ndarray, full_sized_data: np.ndarray, k=5):
"""
Using k-nn interpolation to find labels of points of the full sized pointcloud
:param cumulated_pc: cumulated pointcloud results after running the network
:param full_sized_data: full sized point cloud
:param k: k for k nearest neighbor interpolation
:return: pointcloud with predicted labels in last column and ground truth labels in last but one column
"""
labeled = cumulated_pc[cumulated_pc[:, -1] != -1]
to_be_predicted = full_sized_data.copy()
ball_tree = BallTree(labeled[:, :3], metric='euclidean')
knn_classes = labeled[ball_tree.query(to_be_predicted[:, :3], k=k)[1]][:, :, -1].astype(int)
interpolated = np.zeros(knn_classes.shape[0])
for i in range(knn_classes.shape[0]):
interpolated[i] = np.bincount(knn_classes[i]).argmax()
output = np.zeros((to_be_predicted.shape[0], to_be_predicted.shape[1]+1))
output[:, :-1] = to_be_predicted
output[:, -1] = interpolated
return output
示例16
def _calc_ball_trees(self, metric='euclidean'):
ball_trees = []
for pointcloud_data in tqdm(self.dataset.data, desc='Ball trees have to be calculated from scratch'):
ball_trees.append(BallTree(pointcloud_data[:, :2], metric=metric))
return ball_trees
示例17
def __init__(self, num_corrections=10, num_basic_results=10,
home_dir=".",
embedding_json=None,
vocab_int_json=None, *args, **kwargs):
super().__init__(num_res_return=num_basic_results, *args, **kwargs)
self.use_embedding = False
if embedding_json and vocab_int_json:
self.use_embedding = True
embedding_json = path.join(home_dir, embedding_json)
vocab_int_json = path.join(home_dir, vocab_int_json)
# load json files
print("Loading JSON files, may take a while.")
with open(embedding_json, 'r') as read_file:
self.embeddings = np.array(json.load(read_file))
with open(vocab_int_json, 'r') as read_file:
self.vocab_int = json.load(read_file)
self.int_vocab = {i: word for word, i in self.vocab_int.items()}
# train k nearest neighbor model
print("Training BallTree k-nearest neighbor searcher...")
self.searcher = BallTree(self.embeddings, leaf_size=10)
self.checker = Spell.Spell()
self.num_corrections = num_corrections
self.num_basic_search_results = num_basic_results
self.max_total_res = min(10, num_basic_results+num_corrections)
print("Ready to use.")
示例18
def fit(self, X):
"""Fit detector. y is optional for unsupervised methods.
Parameters
----------
X : dataframe of shape (n_samples, n_features)
The input samples.
"""
# validate inputs X and y (optional)
X = X.to_numpy()
if self.metric_params is not None:
self.tree_ = BallTree(X, leaf_size=self.leaf_size,
metric=self.metric,
**self.metric_params)
else:
self.tree_ = BallTree(X, leaf_size=self.leaf_size,
metric=self.metric)
self.neigh_.fit(X)
dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
return_distance=True)
dist = self._get_dist_by_method(dist_arr)
self.decision_scores_ = dist.ravel()
self._process_decision_scores()
return self
示例19
def test_barnes_hut_angle():
# When Barnes-Hut's angle=0 this corresponds to the exact method.
angle = 0.0
perplexity = 10
n_samples = 100
for n_components in [2, 3]:
n_features = 5
degrees_of_freedom = float(n_components - 1.0)
random_state = check_random_state(0)
distances = random_state.randn(n_samples, n_features)
distances = distances.astype(np.float32)
distances = abs(distances.dot(distances.T))
np.fill_diagonal(distances, 0.0)
params = random_state.randn(n_samples, n_components)
P = _joint_probabilities(distances, perplexity, verbose=0)
kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
n_samples, n_components)
k = n_samples - 1
bt = BallTree(distances)
distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
neighbors_nn = neighbors_nn[:, 1:]
distances_nn = np.array([distances[i, neighbors_nn[i]]
for i in range(n_samples)])
assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
abs(distances[0, neighbors_nn[0]] - distances_nn[0])
P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
perplexity, verbose=0)
kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
n_samples, n_components,
angle=angle, skip_num_points=0,
verbose=0)
P = squareform(P)
P_bh = P_bh.toarray()
assert_array_almost_equal(P_bh, P, decimal=5)
assert_almost_equal(kl_exact, kl_bh, decimal=3)
示例20
def fit(self, X, y=None):
"""Fit detector. y is ignored in unsupervised methods.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples.
y : Ignored
Not used, present for API consistency by convention.
Returns
-------
self : object
Fitted estimator.
"""
# validate inputs X and y (optional)
X = check_array(X)
self._set_n_classes(y)
self.neigh_.fit(X)
# In certain cases, _tree does not exist for NearestNeighbors
# See Issue #158 (https://github.com/yzhao062/pyod/issues/158)
# n_neighbors = 100
if self.neigh_._tree is not None:
self.tree_ = self.neigh_._tree
else:
if self.metric_params is not None:
self.tree_ = BallTree(X, leaf_size=self.leaf_size,
metric=self.metric,
**self.metric_params)
else:
self.tree_ = BallTree(X, leaf_size=self.leaf_size,
metric=self.metric)
dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
return_distance=True)
dist = self._get_dist_by_method(dist_arr)
self.decision_scores_ = dist.ravel()
self._process_decision_scores()
return self
示例21
def test_ball_tree(N=1):
np.random.seed(12345)
i = 0
while i < N:
N = np.random.randint(2, 100)
M = np.random.randint(2, 100)
k = np.random.randint(1, N)
ls = np.min([np.random.randint(1, 10), N - 1])
X = np.random.rand(N, M)
BT = BallTree(leaf_size=ls, metric=euclidean)
BT.fit(X)
x = np.random.rand(M)
mine = BT.nearest_neighbors(k, x)
assert len(mine) == k
mine_neighb = np.array([n.key for n in mine])
mine_dist = np.array([n.distance for n in mine])
sort_ix = np.argsort(mine_dist)
mine_dist = mine_dist[sort_ix]
mine_neighb = mine_neighb[sort_ix]
sk = sk_BallTree(X, leaf_size=ls)
theirs_dist, ind = sk.query(x.reshape(1, -1), k=k)
sort_ix = np.argsort(theirs_dist.flatten())
theirs_dist = theirs_dist.flatten()[sort_ix]
theirs_neighb = X[ind.flatten()[sort_ix]]
for j in range(len(theirs_dist)):
np.testing.assert_almost_equal(mine_neighb[j], theirs_neighb[j])
np.testing.assert_almost_equal(mine_dist[j], theirs_dist[j])
print("PASSED")
i += 1
#######################################################################
# Graphs #
#######################################################################