Python源码示例:sklearn.neighbors.KernelDensity()

示例1
def __init__(self, hybrid=False, kernel='gaussian', n_jobs=-1, seed=None, **kwargs):
        """Init Kernel Density Estimation instance."""
        self.kernel = kernel
        self.n_jobs = n_jobs
        self.seed = seed

        self.model = KernelDensity(kernel=kernel, **kwargs)
        self.bandwidth = self.model.bandwidth

        self.hybrid = hybrid
        self.ae_net = None  # autoencoder network for the case of a hybrid model

        self.results = {
            'train_time': None,
            'test_time': None,
            'test_auc': None,
            'test_scores': None
        } 
示例2
def gen_exp_name(model_class, model_kwargs):
    """Generates experiment name from model class and parameters.

    :param model_class: (type) the class, one of GaussianMixture, PCAPreDensity or KernelDensity.
    :param model_kwargs: (dict) constructor arguments to the class.
    :return A string succinctly encoding the class and parameters."""
    if model_class == GaussianMixture:
        n_components = model_kwargs.get("n_components", 1)
        covariance_type = model_kwargs.get("covariance_type", "full")
        return f"gmm_{n_components}_components_{covariance_type}"
    elif model_class == PCAPreDensity:
        if model_kwargs["density_class"] == KernelDensity:
            return "pca_kde"
        elif model_kwargs["density_class"] == GaussianMixture:
            return "pca_gmm"
        else:
            return "pca_unknown"
    elif model_class == KernelDensity:
        return "kde"
    else:
        return "default" 
示例3
def test_kde_badargs():
    assert_raises(ValueError, KernelDensity,
                  algorithm='blah')
    assert_raises(ValueError, KernelDensity,
                  bandwidth=0)
    assert_raises(ValueError, KernelDensity,
                  kernel='blah')
    assert_raises(ValueError, KernelDensity,
                  metric='blah')
    assert_raises(ValueError, KernelDensity,
                  algorithm='kd_tree', metric='blah')
    kde = KernelDensity()
    assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
                  sample_weight=np.random.random((200, 10)))
    assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
                  sample_weight=-np.random.random(200)) 
示例4
def test_pickling(tmpdir, sample_weight):
    # Make sure that predictions are the same before and after pickling. Used
    # to be a bug because sample_weights wasn't pickled and the resulting tree
    # would miss some info.

    kde = KernelDensity()
    data = np.reshape([1., 2., 3.], (-1, 1))
    kde.fit(data, sample_weight=sample_weight)

    X = np.reshape([1.1, 2.1], (-1, 1))
    scores = kde.score_samples(X)

    file_path = str(tmpdir.join('dump.pkl'))
    _joblib.dump(kde, file_path)
    kde = _joblib.load(file_path)
    scores_pickled = kde.score_samples(X)

    assert_allclose(scores, scores_pickled) 
示例5
def kde_sklearn(data, grid, **kwargs):
    """
    Kernel Density Estimation with Scikit-learn

    Parameters
    ----------
    data : numpy.array
        Data points used to compute a density estimator. It
        has `n x p` dimensions, representing n points and p
        variables.
    grid : numpy.array
        Data points at which the desity will be estimated. It
        has `m x p` dimensions, representing m points and p
        variables.

    Returns
    -------
    out : numpy.array
        Density estimate. Has `m x 1` dimensions
    """
    kde_skl = KernelDensity(**kwargs)
    kde_skl.fit(data)
    # score_samples() returns the log-likelihood of the samples
    log_pdf = kde_skl.score_samples(grid)
    return np.exp(log_pdf) 
示例6
def __init__(self, D_d_sample, D_delta_t_sample, kde_type='scipy_gaussian', bandwidth=1):
        """

        :param D_d_sample: 1-d numpy array of angular diameter distances to the lens plane
        :param D_delta_t_sample: 1-d numpy array of time-delay distances
        kde_type : string
            The kernel to use.  Valid kernels are
            'scipy_gaussian' or
            ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']
            Default is 'gaussian'.
        :param bandwidth: width of kernel (in same units as the angular diameter quantities)
        """
        values = np.vstack([D_d_sample, D_delta_t_sample])
        if kde_type == 'scipy_gaussian':
            self._PDF_kernel = stats.gaussian_kde(values)
        else:
            from sklearn.neighbors import KernelDensity
            self._kde = KernelDensity(bandwidth=bandwidth, kernel=kde_type)
            values = np.vstack([D_d_sample, D_delta_t_sample])
            self._kde.fit(values.T)
        self._kde_type = kde_type 
示例7
def test_optuna_search_invalid_param_dist():
    # type: () -> None

    X, y = make_blobs(n_samples=10)
    est = KernelDensity()
    param_dist = ["kernel", distributions.CategoricalDistribution(("gaussian", "linear"))]
    optuna_search = integration.OptunaSearchCV(
        est,
        param_dist,  # type: ignore
        cv=3,
        error_score="raise",
        random_state=0,
        return_train_score=True,
    )

    with pytest.raises(ValueError, match="param_distributions must be a dictionary."):
        optuna_search.fit(X) 
示例8
def test_optuna_search_pruning_without_partial_fit():
    # type: () -> None

    X, y = make_blobs(n_samples=10)
    est = KernelDensity()
    param_dist = {}  # type: ignore
    optuna_search = integration.OptunaSearchCV(
        est,
        param_dist,
        cv=3,
        enable_pruning=True,
        error_score="raise",
        random_state=0,
        return_train_score=True,
    )

    with pytest.raises(ValueError, match="estimator must support partial_fit."):
        optuna_search.fit(X) 
示例9
def test_optuna_search_negative_max_iter():
    # type: () -> None

    X, y = make_blobs(n_samples=10)
    est = KernelDensity()
    param_dist = {}  # type: ignore
    optuna_search = integration.OptunaSearchCV(
        est,
        param_dist,
        cv=3,
        max_iter=-1,
        error_score="raise",
        random_state=0,
        return_train_score=True,
    )

    with pytest.raises(ValueError, match="max_iter must be > 0"):
        optuna_search.fit(X) 
示例10
def test_optuna_search_tuple_instead_of_distribution():
    # type: () -> None

    X, y = make_blobs(n_samples=10)
    est = KernelDensity()
    param_dist = {"kernel": ("gaussian", "linear")}
    optuna_search = integration.OptunaSearchCV(
        est,
        param_dist,  # type: ignore
        cv=3,
        error_score="raise",
        random_state=0,
        return_train_score=True,
    )

    with pytest.raises(ValueError, match="must be a optuna distribution."):
        optuna_search.fit(X) 
示例11
def test_optuna_search_verbosity(verbose):
    # type: (int) -> None

    X, y = make_blobs(n_samples=10)
    est = KernelDensity()
    param_dist = {}  # type: ignore
    optuna_search = integration.OptunaSearchCV(
        est,
        param_dist,
        cv=3,
        error_score="raise",
        random_state=0,
        return_train_score=True,
        verbose=verbose,
    )
    optuna_search.fit(X) 
示例12
def test_optuna_search_subsample():
    # type: () -> None

    X, y = make_blobs(n_samples=10)
    est = KernelDensity()
    param_dist = {}  # type: ignore
    optuna_search = integration.OptunaSearchCV(
        est,
        param_dist,
        cv=3,
        error_score="raise",
        random_state=0,
        return_train_score=True,
        subsample=5,
    )
    optuna_search.fit(X) 
示例13
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.neighbors.NearestNeighbors,
                      neighbors.NearestNeighbors)
        self.assertIs(df.neighbors.KNeighborsClassifier,
                      neighbors.KNeighborsClassifier)
        self.assertIs(df.neighbors.RadiusNeighborsClassifier,
                      neighbors.RadiusNeighborsClassifier)
        self.assertIs(df.neighbors.KNeighborsRegressor,
                      neighbors.KNeighborsRegressor)
        self.assertIs(df.neighbors.RadiusNeighborsRegressor,
                      neighbors.RadiusNeighborsRegressor)
        self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid)
        self.assertIs(df.neighbors.BallTree, neighbors.BallTree)
        self.assertIs(df.neighbors.KDTree, neighbors.KDTree)
        self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric)
        self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity) 
示例14
def display(self, output_filename):
        fig, (self.ax) = plt.subplots(1, 1)
        self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
        has_legend = False
        for dataset in self.datasets:
            self._display_dataset(dataset)
            if dataset.label is not None:
                has_legend = True
        if self.title is not None:
            self.ax.set_xlabel(self.title)
        self.ax.set_ylabel('Density')
        if has_legend:
            self.ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3,
                           mode='expand', borderaxespad=0.)
        fig.savefig(output_filename)
        plt.close(fig) 
示例15
def test_kde_algorithm_metric_choice():
    # Smoke test for various metrics and algorithms
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)    # 2 features required for haversine dist.
    Y = rng.randn(10, 2)

    for algorithm in ['auto', 'ball_tree', 'kd_tree']:
        for metric in ['euclidean', 'minkowski', 'manhattan',
                       'chebyshev', 'haversine']:
            if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
                assert_raises(ValueError, KernelDensity,
                              algorithm=algorithm, metric=metric)
            else:
                kde = KernelDensity(algorithm=algorithm, metric=metric)
                kde.fit(X)
                y_dens = kde.score_samples(Y)
                assert_equal(y_dens.shape, Y.shape[:1]) 
示例16
def kde(data, rng, grid_size=10,  **kwargs):
    """Kernel Density Estimation with Scikit-learn"""
    n_samples = data.shape[0]
    n_dims = data.shape[1]

    bandwidth = (n_samples * (n_dims + 2) / 4.)**(-1. / (n_dims + 4.))
    kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
    kde_skl.fit(data)

    space = [linspace(i[0], i[1], grid_size) for i in rng]
    grid = meshgrid(*tuple(space))

    # score_samples() returns the log-likelihood of the samples
    log_pdf = kde_skl.score_samples(vstack(map(ravel, grid)).T)
    return exp(log_pdf), space 
示例17
def pca_kde():
    model_class = PCAPreDensity
    model_kwargs = {"density_class": KernelDensity}
    _ = locals()  # quieten flake8 unused variable warning
    del _ 
示例18
def kde():
    model_class = KernelDensity
    _ = locals()  # quieten flake8 unused variable warning
    del _ 
示例19
def _fit_single_density(self, flat_transitions):
        # This bandwidth was chosen to make sense with standardised inputs that
        # have unit variance in each component. There might be a better way to
        # choose it automatically.
        density_model = KernelDensity(
            kernel=self.kernel, bandwidth=self.kernel_bandwidth
        )
        density_model.fit(flat_transitions)
        return density_model 
示例20
def test_gridsearch_no_predict():
    # test grid-search with an estimator without predict.
    # slight duplication of a test from KDE
    def custom_scoring(estimator, X):
        return 42 if estimator.bandwidth == .1 else 0
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    search = GridSearchCV(KernelDensity(),
                          param_grid=dict(bandwidth=[.01, .1, 1]),
                          scoring=custom_scoring)
    search.fit(X)
    assert_equal(search.best_params_['bandwidth'], .1)
    assert_equal(search.best_score_, 42) 
示例21
def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true):
    kde = KernelDensity(kernel=kernel, bandwidth=bandwidth,
                        atol=atol, rtol=rtol)
    log_dens = kde.fit(X).score_samples(Y)
    assert_allclose(np.exp(log_dens), dens_true,
                    atol=atol, rtol=max(1E-7, rtol))
    assert_allclose(np.exp(kde.score(Y)),
                    np.prod(dens_true),
                    atol=atol, rtol=max(1E-7, rtol)) 
示例22
def test_kernel_density_sampling(n_samples=100, n_features=3):
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features)

    bandwidth = 0.2

    for kernel in ['gaussian', 'tophat']:
        # draw a tophat sample
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        samp = kde.sample(100)
        assert_equal(X.shape, samp.shape)

        # check that samples are in the right range
        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
        dist, ind = nbrs.kneighbors(X, return_distance=True)

        if kernel == 'tophat':
            assert np.all(dist < bandwidth)
        elif kernel == 'gaussian':
            # 5 standard deviations is safe for 100 samples, but there's a
            # very small chance this test could fail.
            assert np.all(dist < 5 * bandwidth)

    # check unsupported kernels
    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        assert_raises(NotImplementedError, kde.sample, 100)

    # non-regression test: used to return a scalar
    X = rng.randn(4, 1)
    kde = KernelDensity(kernel="gaussian").fit(X)
    assert_equal(kde.sample().shape, (1, 1)) 
示例23
def test_kde_algorithm_metric_choice(algorithm, metric):
    # Smoke test for various metrics and algorithms
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)    # 2 features required for haversine dist.
    Y = rng.randn(10, 2)

    if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
        assert_raises(ValueError, KernelDensity,
                      algorithm=algorithm, metric=metric)
    else:
        kde = KernelDensity(algorithm=algorithm, metric=metric)
        kde.fit(X)
        y_dens = kde.score_samples(Y)
        assert_equal(y_dens.shape, Y.shape[:1]) 
示例24
def _fit(self, X):
        self.estimator_   = KernelDensity(
            algorithm     = self.algorithm,
            atol          = self.atol,
            bandwidth     = self.bandwidth,
            breadth_first = self.breadth_first,
            kernel        = self.kernel,
            leaf_size     = self.leaf_size,
            metric        = self.metric,
            rtol          = self.rtol,
            metric_params = self.metric_params
        ).fit(X)

        return self 
示例25
def setup(self):
        self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)

        height, width = self.shape
        self.uniform_density = -np.log(width*height)

        self.kde_constant = np.log(1-self.regularization)
        self.uniform_constant = np.log(self.regularization) 
示例26
def setup(self):
        self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)

        self.kde_constant = np.log(1-self.regularization)
        self.uniform_constant = np.log(self.regularization) 
示例27
def setup(self):
        self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth) 
示例28
def __init__(self, stimuli, fixations, bandwidth, eps = 1e-20, **kwargs):
        super(CrossvalidatedBaselineModel, self).__init__(**kwargs)
        self.stimuli = stimuli
        self.fixations = fixations
        self.bandwidth = bandwidth
        self.eps = eps
        self.xs, self.ys = normalize_fixations(stimuli, fixations)
        #self.kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(np.vstack([self.xs, self.ys]).T)
        self.shape_cache = {} 
示例29
def display(x, color, list_save=None) :
    kde  = KernelDensity(kernel='gaussian', bandwidth= .005 ).fit(x.data.cpu().numpy())
    dens = np.exp( kde.score_samples(t_plot) )
    dens[0] = 0 ; dens[-1] = 0;
    plt.fill(t_plot, dens, color=color)
    if list_save is not None :
        list_save.append(dens.ravel()) # We'll save a csv at the end 
示例30
def optimize_kde(X):
    # use grid search cross-validation to optimize the bandwidth
    params = {'bandwidth': np.logspace(-3, 1, 20)}
    grid = GridSearchCV(KernelDensity(), params, n_jobs=8, cv=5, verbose=1)
    grid.fit(X)
    
    print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
    
    # use the best estimator to compute the kernel density estimate
    kde = grid.best_estimator_
    
    return kde