Python源码示例:sklearn.neighbors.BallTree()

示例1
def avgdigamma(data, dvec, leaf_size=16):
    """Convenience function for finding expectation value of <psi(nx)> given
    some number of neighbors in some radius in a marginal space.

    Parameters
    ----------
    points : numpy.ndarray
    dvec : array_like (n_points,)
    Returns
    -------
    avgdigamma : float
        expectation value of <psi(nx)>
    """
    tree = BallTree(data, leaf_size=leaf_size, p=float('inf'))

    n_points = tree.query_radius(data, dvec - EPS, count_only=True)

    return digamma(n_points).mean() 
示例2
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
示例3
def test_haversine():
    tree = BallTree(spatial_data[:, :2], metric="haversine")
    dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
    test_matrix = np.array(
        [
            [
                dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    test_matrix.sort(axis=1)
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric haversine",
    ) 
示例4
def test_haversine(spatial_data):
    tree = BallTree(spatial_data[:, :2], metric="haversine")
    dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
    test_matrix = np.array(
        [
            [
                dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    test_matrix.sort(axis=1)
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric haversine",
    ) 
示例5
def make_propensity_lists(self, train_ids, benchmark):
        input_data, ids, pair_data = benchmark.get_data_access().get_rows(train_ids)
        assignments = map(benchmark.get_assignment, ids, input_data)
        treatment_data, batch_y = zip(*assignments)
        treatment_data = np.array(treatment_data)

        if pair_data.shape[-1] > 200 and False:
            self.pca = PCA(50, svd_solver="randomized")
            pair_data = self.pca.fit_transform(pair_data)
        else:
            self.pca = None

        # covariance_matrix = np.cov(pair_data, rowvar=False)
        self.original_data = [pair_data[treatment_data == t]
                              for t in range(benchmark.get_num_treatments())]
        # self.ball_trees = [BallTree(pair_data[treatment_data == t], metric="mahalanobis",
        #                             V=covariance_matrix)
        #                    for t in range(benchmark.get_num_treatments())]
        self.ball_trees = [BallTree(pair_data[treatment_data == t])
                           for t in range(benchmark.get_num_treatments())]
        self.treatment_ids = [ids[treatment_data == t]
                              for t in range(benchmark.get_num_treatments())] 
示例6
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.neighbors.NearestNeighbors,
                      neighbors.NearestNeighbors)
        self.assertIs(df.neighbors.KNeighborsClassifier,
                      neighbors.KNeighborsClassifier)
        self.assertIs(df.neighbors.RadiusNeighborsClassifier,
                      neighbors.RadiusNeighborsClassifier)
        self.assertIs(df.neighbors.KNeighborsRegressor,
                      neighbors.KNeighborsRegressor)
        self.assertIs(df.neighbors.RadiusNeighborsRegressor,
                      neighbors.RadiusNeighborsRegressor)
        self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid)
        self.assertIs(df.neighbors.BallTree, neighbors.BallTree)
        self.assertIs(df.neighbors.KDTree, neighbors.KDTree)
        self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric)
        self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity) 
示例7
def calc_vert_vals(verts, pts, vals, method='max', k_points=100):
    from sklearn.neighbors import BallTree
    ball_tree = BallTree(pts)
    k_points = min([k_points, len(pts)])
    dists, pts_inds = ball_tree.query(verts, k=k_points, return_distance=True)
    near_vals = vals[pts_inds]
    # sig_dists = dists[np.where(abs(near_vals)>2)]
    cover = len(np.unique(pts_inds.ravel()))/float(len(pts))
    print('{}% of the points are covered'.format(cover*100))
    if method == 'dist':
        n_dists = 1/(dists**2)
        norm = 1/np.sum(n_dists, 1)
        norm = np.reshape(norm, (len(norm), 1))
        n_dists = norm * n_dists
        verts_vals = np.sum(near_vals * n_dists, 1)
    elif method == 'max':
        verts_vals = near_vals[range(near_vals.shape[0]), np.argmax(abs(near_vals), 1)]
    return verts_vals 
示例8
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
示例9
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array([distances[i, neighbors_nn[i]]
                                 for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                       perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3) 
示例10
def fit(self, X):
        X = self._validate_input(X, return_compact=False)
        self._tree = BallTree(X, metric='hamming', leaf_size=self.leaf_size)
        return self 
示例11
def build_tree(points):
    if points.shape[1] >= 20:
        return BallTree(points, metric='chebyshev')
    return KDTree(points, metric='chebyshev')

# TESTS 
示例12
def update_tree(self, time):
        print 'rebuild tree'
        self.tree = BallTree(self.state[:self.items, :], leaf_size=self.size)
        self.last_tree_built_time = time
        print 'rebuild done' 
示例13
def transform(self, documents):
        return [
            BallTree(documents)
        ] 
示例14
def fit_transform(self, documents):
        # Transformer will be False if pipeline hasn't been fit yet,
        # Trigger fit_transform and save the transformer and lexicon.
        if self.transformer == False:
            self.transformer = Pipeline([
                ('norm', TextNormalizer(minimum=50, maximum=200)),
                ('transform', Pipeline([
                    ('tfidf', TfidfVectorizer()),
                    ('svd', TruncatedSVD(n_components=200))
                ])
                 )
            ])
            self.lexicon = self.transformer.fit_transform(documents)
            self.tree = BallTree(self.lexicon)
            self.save() 
示例15
def knn_interpolation(cumulated_pc: np.ndarray, full_sized_data: np.ndarray, k=5):
    """
    Using k-nn interpolation to find labels of points of the full sized pointcloud
    :param cumulated_pc: cumulated pointcloud results after running the network
    :param full_sized_data: full sized point cloud
    :param k: k for k nearest neighbor interpolation
    :return: pointcloud with predicted labels in last column and ground truth labels in last but one column
    """

    labeled = cumulated_pc[cumulated_pc[:, -1] != -1]
    to_be_predicted = full_sized_data.copy()

    ball_tree = BallTree(labeled[:, :3], metric='euclidean')

    knn_classes = labeled[ball_tree.query(to_be_predicted[:, :3], k=k)[1]][:, :, -1].astype(int)

    interpolated = np.zeros(knn_classes.shape[0])

    for i in range(knn_classes.shape[0]):
        interpolated[i] = np.bincount(knn_classes[i]).argmax()

    output = np.zeros((to_be_predicted.shape[0], to_be_predicted.shape[1]+1))
    output[:, :-1] = to_be_predicted

    output[:, -1] = interpolated

    return output 
示例16
def _calc_ball_trees(self, metric='euclidean'):
        ball_trees = []
        for pointcloud_data in tqdm(self.dataset.data, desc='Ball trees have to be calculated from scratch'):
            ball_trees.append(BallTree(pointcloud_data[:, :2], metric=metric))
        return ball_trees 
示例17
def __init__(self, num_corrections=10, num_basic_results=10,
                 home_dir=".",
                 embedding_json=None,
                 vocab_int_json=None, *args, **kwargs):
        super().__init__(num_res_return=num_basic_results, *args, **kwargs)

        self.use_embedding = False

        if embedding_json and vocab_int_json:
            self.use_embedding = True
            embedding_json = path.join(home_dir, embedding_json)
            vocab_int_json = path.join(home_dir, vocab_int_json)
            # load json files
            print("Loading JSON files, may take a while.")
            with open(embedding_json, 'r') as read_file:
                self.embeddings = np.array(json.load(read_file))
            with open(vocab_int_json, 'r') as read_file:
                self.vocab_int = json.load(read_file)
            self.int_vocab = {i: word for word, i in self.vocab_int.items()}

            # train k nearest neighbor model
            print("Training BallTree k-nearest neighbor searcher...")
            self.searcher = BallTree(self.embeddings, leaf_size=10)

        self.checker = Spell.Spell()
        self.num_corrections = num_corrections
        self.num_basic_search_results = num_basic_results
        self.max_total_res = min(10, num_basic_results+num_corrections)

        print("Ready to use.") 
示例18
def fit(self, X):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : dataframe of shape (n_samples, n_features)
            The input samples.
        """

        # validate inputs X and y (optional)
        X = X.to_numpy()

        if self.metric_params is not None:
            self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                  metric=self.metric,
                                  **self.metric_params)
        else:
            self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                  metric=self.metric)
        self.neigh_.fit(X)

        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self 
示例19
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array([distances[i, neighbors_nn[i]]
                                 for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                       perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3) 
示例20
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """

        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.neigh_.fit(X)

        # In certain cases, _tree does not exist for NearestNeighbors
        # See Issue #158 (https://github.com/yzhao062/pyod/issues/158)
        # n_neighbors = 100
        if self.neigh_._tree is not None:
            self.tree_ = self.neigh_._tree

        else:
            if self.metric_params is not None:
                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                      metric=self.metric,
                                      **self.metric_params)
            else:
                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                      metric=self.metric)

        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self 
示例21
def test_ball_tree(N=1):
    np.random.seed(12345)
    i = 0
    while i < N:
        N = np.random.randint(2, 100)
        M = np.random.randint(2, 100)
        k = np.random.randint(1, N)
        ls = np.min([np.random.randint(1, 10), N - 1])

        X = np.random.rand(N, M)
        BT = BallTree(leaf_size=ls, metric=euclidean)
        BT.fit(X)

        x = np.random.rand(M)
        mine = BT.nearest_neighbors(k, x)
        assert len(mine) == k

        mine_neighb = np.array([n.key for n in mine])
        mine_dist = np.array([n.distance for n in mine])

        sort_ix = np.argsort(mine_dist)
        mine_dist = mine_dist[sort_ix]
        mine_neighb = mine_neighb[sort_ix]

        sk = sk_BallTree(X, leaf_size=ls)
        theirs_dist, ind = sk.query(x.reshape(1, -1), k=k)
        sort_ix = np.argsort(theirs_dist.flatten())

        theirs_dist = theirs_dist.flatten()[sort_ix]
        theirs_neighb = X[ind.flatten()[sort_ix]]

        for j in range(len(theirs_dist)):
            np.testing.assert_almost_equal(mine_neighb[j], theirs_neighb[j])
            np.testing.assert_almost_equal(mine_dist[j], theirs_dist[j])

        print("PASSED")
        i += 1


#######################################################################
#                               Graphs                                #
#######################################################################