Python源码示例:sklearn.base.clone()

示例1
def _check_behavior_2d(clf):
    # 1d case
    X = np.array([[0], [0], [0], [0]])  # ignored
    y = np.array([1, 2, 1, 1])
    est = clone(clf)
    est.fit(X, y)
    y_pred = est.predict(X)
    assert_equal(y.shape, y_pred.shape)

    # 2d case
    y = np.array([[1, 0],
                  [2, 0],
                  [1, 0],
                  [1, 3]])
    est = clone(clf)
    est.fit(X, y)
    y_pred = est.predict(X)
    assert_equal(y.shape, y_pred.shape) 
示例2
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
                X, y, scorer, parameter_iterable, fit_params,
                error_score, cv, **kwargs):
        groups = kwargs.pop('groups')

        # test_score, n_samples, parameters
        out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
            delayed(_fit_and_score)(
                clone(base_estimator), X, y, scorer,
                train, test, verbose, parameters,
                fit_params=fit_params,
                return_train_score=False,
                return_n_test_samples=True,
                return_times=False,
                return_parameters=True,
                error_score=error_score)
            for parameters in parameter_iterable
            for train, test in cv.split(X, y, groups))

        # test_score, n_samples, _, parameters
        return [(mod[0], mod[1], None, mod[2]) for mod in out] 
示例3
def _make_estimator(self, append=True, random_state=None):
        """Make and configure a copy of the `base_estimator_` attribute.

        sklearn/base.py

        Warning: This method should be used to properly instantiate new
        sub-estimators.
        """

        # TODO: add a check for estimator_param
        estimator = clone(self.base_estimator_)
        estimator.set_params(**self.estimator_params)

        if random_state is not None:
            _set_random_states(estimator, random_state)

        if append:
            self.estimators_.append(estimator)

        return estimator 
示例4
def check_cross_val_predict_binary(est, X, y, method):
    """Helper for tests of cross_val_predict with binary classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    if y.ndim == 1:
        exp_shape = (len(X),) if method == 'decision_function' else (len(X), 2)
    else:
        exp_shape = y.shape
    expected_predictions = np.zeros(exp_shape)
    for train, test in cv.split(X, y):
        est = clone(est).fit(X[train], y[train])
        expected_predictions[test] = getattr(est, method)(X[test])

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions) 
示例5
def check_cross_val_predict_multiclass(est, X, y, method):
    """Helper for tests of cross_val_predict with multiclass classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    float_min = np.finfo(np.float64).min
    default_values = {'decision_function': float_min,
                      'predict_log_proba': float_min,
                      'predict_proba': 0}
    expected_predictions = np.full((len(X), len(set(y))),
                                   default_values[method],
                                   dtype=np.float64)
    _, y_enc = np.unique(y, return_inverse=True)
    for train, test in cv.split(X, y_enc):
        est = clone(est).fit(X[train], y_enc[train])
        fold_preds = getattr(est, method)(X[test])
        i_cols_fit = np.unique(y_enc[train])
        expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions) 
示例6
def test_transform_target_regressor_2d_transformer_multioutput():
    # Check consistency with transformer accepting only 2D array and a 2D y
    # array.
    X = friedman[0]
    y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
    transformer = StandardScaler()
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=transformer)
    y_pred = regr.fit(X, y).predict(X)
    assert y.shape == y_pred.shape
    # consistency forward transform
    y_tran = regr.transformer_.transform(y)
    _check_standard_scaled(y, y_tran)
    assert y.shape == y_pred.shape
    # consistency inverse transform
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran).squeeze())
    # consistency of the regressor
    lr = LinearRegression()
    transformer2 = clone(transformer)
    lr.fit(X, transformer2.fit_transform(y))
    y_lr_pred = lr.predict(X)
    assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
    assert_allclose(regr.regressor_.coef_, lr.coef_) 
示例7
def test_fit_predict_on_pipeline():
    # test that the fit_predict method is implemented on a pipeline
    # test that the fit_predict on pipeline yields same results as applying
    # transform and clustering steps separately
    iris = load_iris()
    scaler = StandardScaler()
    km = KMeans(random_state=0)
    # As pipeline doesn't clone estimators on construction,
    # it must have its own estimators
    scaler_for_pipeline = StandardScaler()
    km_for_pipeline = KMeans(random_state=0)

    # first compute the transform and clustering step separately
    scaled = scaler.fit_transform(iris.data)
    separate_pred = km.fit_predict(scaled)

    # use a pipeline to do the transform and clustering in one step
    pipe = Pipeline([
        ('scaler', scaler_for_pipeline),
        ('Kmeans', km_for_pipeline)
    ])
    pipeline_pred = pipe.fit_predict(iris.data)

    assert_array_almost_equal(pipeline_pred, separate_pred) 
示例8
def test_base_chain_random_order():
    # Fit base chain with random order
    X, Y = generate_multilabel_dataset_with_correlations()
    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain_random = clone(chain).set_params(order='random', random_state=42)
        chain_random.fit(X, Y)
        chain_fixed = clone(chain).set_params(order=chain_random.order_)
        chain_fixed.fit(X, Y)
        assert_array_equal(chain_fixed.order_, chain_random.order_)
        assert_not_equal(list(chain_random.order), list(range(4)))
        assert_equal(len(chain_random.order_), 4)
        assert_equal(len(set(chain_random.order_)), 4)
        # Randomly ordered chain should behave identically to a fixed order
        # chain with the same order.
        for est1, est2 in zip(chain_random.estimators_,
                              chain_fixed.estimators_):
            assert_array_almost_equal(est1.coef_, est2.coef_) 
示例9
def test_base_chain_crossval_fit_and_predict():
    # Fit chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()

    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain.fit(X, Y)
        chain_cv = clone(chain).set_params(cv=3)
        chain_cv.fit(X, Y)
        Y_pred_cv = chain_cv.predict(X)
        Y_pred = chain.predict(X)

        assert Y_pred_cv.shape == Y_pred.shape
        assert not np.all(Y_pred == Y_pred_cv)
        if isinstance(chain, ClassifierChain):
            assert jaccard_score(Y, Y_pred_cv, average='samples') > .4
        else:
            assert mean_squared_error(Y, Y_pred_cv) < .25 
示例10
def test_classifier_results():
    """tests if classifier results match target"""
    alpha = .1
    n_features = 20
    n_samples = 10
    tol = .01
    max_iter = 200
    rng = np.random.RandomState(0)
    X = rng.normal(size=(n_samples, n_features))
    w = rng.normal(size=n_features)
    y = np.dot(X, w)
    y = np.sign(y)
    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=max_iter, tol=tol, random_state=77)
    clf2 = clone(clf1)

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    pred1 = clf1.predict(X)
    pred2 = clf2.predict(X)
    assert_almost_equal(pred1, y, decimal=12)
    assert_almost_equal(pred2, y, decimal=12) 
示例11
def test_weighted_vs_repeated():
    # a sample weight of N should yield the same result as an N-fold
    # repetition of the sample
    rng = np.random.RandomState(0)
    sample_weight = rng.randint(1, 5, size=n_samples)
    X_repeat = np.repeat(X, sample_weight, axis=0)
    estimators = [KMeans(init="k-means++", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init="random", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init=centers.copy(), n_clusters=n_clusters,
                         random_state=42),
                  MiniBatchKMeans(n_clusters=n_clusters, batch_size=10,
                                  random_state=42)]
    for estimator in estimators:
        est_weighted = clone(estimator).fit(X, sample_weight=sample_weight)
        est_repeated = clone(estimator).fit(X_repeat)
        repeated_labels = np.repeat(est_weighted.labels_, sample_weight)
        assert_almost_equal(v_measure_score(est_repeated.labels_,
                                            repeated_labels), 1.0)
        if not isinstance(estimator, MiniBatchKMeans):
            assert_almost_equal(_sort_centers(est_weighted.cluster_centers_),
                                _sort_centers(est_repeated.cluster_centers_)) 
示例12
def test_nmf_sparse_input():
    # Test that sparse matrices are accepted as input
    from scipy.sparse import csc_matrix

    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0
    A_sparse = csc_matrix(A)

    for solver in ('cd', 'mu'):
        est1 = NMF(solver=solver, n_components=5, init='random',
                   random_state=0, tol=1e-2)
        est2 = clone(est1)

    W1 = est1.fit_transform(A)
    W2 = est2.fit_transform(A_sparse)
    H1 = est1.components_
    H2 = est2.components_

    assert_array_almost_equal(W1, W2)
    assert_array_almost_equal(H1, H2) 
示例13
def apply_gridsearch(self,model):
        """
        apply grid search on ml algorithm to specified parameters
        returns updated best score and parameters
        """
        # check if custom evalution function is specified
        if callable(self.params_cv['scoring']):
            scoring = make_scorer(self.params_cv['scoring'],greater_is_better=self._greater_is_better)
        else:
            scoring = self.params_cv['scoring']
        
        gsearch = GridSearchCV(estimator=model,param_grid=self.get_params_tune(),scoring=scoring,
                               iid=self.params_cv['iid'],cv=self.params_cv['cv_folds'],n_jobs=self.params_cv['n_jobs'])
        gsearch.fit(self.X,self.y)
        
        # update best model if best_score is improved
        if (gsearch.best_score_ * self._score_mult) > (self.best_score * self._score_mult):
            self.best_model = clone(gsearch.best_estimator_)
            self.best_score = gsearch.best_score_
        
        # update tuned parameters with optimal values
        for key,value in gsearch.best_params_.items():
            self._params[key] = value
        self._temp_score = gsearch.best_score_
        return self 
示例14
def _clone_and_score_clusterer(clf, X, n_clusters):
    """Clones and scores clusterer instance.

    Args:
        clf: Clusterer instance that implements ``fit``,``fit_predict``, and
            ``score`` methods, and an ``n_clusters`` hyperparameter.
            e.g. :class:`sklearn.cluster.KMeans` instance

        X (array-like, shape (n_samples, n_features)):
            Data to cluster, where n_samples is the number of samples and
            n_features is the number of features.

        n_clusters (int): Number of clusters

    Returns:
        score: Score of clusters

        time: Number of seconds it took to fit cluster
    """
    start = time.time()
    clf = clone(clf)
    setattr(clf, 'n_clusters', n_clusters)
    return clf.fit(X).score(X), time.time() - start 
示例15
def fit(self, X, y=None, **fit_params):

        if not isinstance(X, pd.DataFrame):
            raise ValueError('X is not a pandas.DataFrame')

        self.models_ = {}

        columns = self._get_fit_columns(X)

        for key in X[self.by].unique():

            # Copy the model
            model = clone(self.base_model)

            # Select the rows that will be fitted
            mask = (X[self.by] == key).tolist()
            rows = X.index[mask]

            # Fit the model
            model.fit(X.loc[rows, columns], y[mask], **fit_params)

            # Save the model
            self.models_[key] = model

        return self 
示例16
def net_fit(self, net_cls, module_cls, dummy_callback, data):
        # Careful, don't call additional fits or set_params on this,
        # since that would have side effects on other tests.
        X, y = data

        # We need a new instance of the net and cannot reuse the net
        # fixture, because otherwise fixture net and net_fit refer to
        # the same object; also, we cannot clone(net) because this
        # will result in the dummy_callback not being the mock anymore
        net = net_cls(
            module_cls,
            callbacks=[('dummy', dummy_callback)],
            max_epochs=10,
            lr=0.1,
        )
        return net.fit(X, y) 
示例17
def test_changing_model_reinitializes_optimizer(self, net, data):
        # The idea is that we change the model using `set_params` to
        # add parameters. Since the optimizer depends on the model
        # parameters it needs to be reinitialized.
        X, y = data

        net.set_params(module__nonlin=nn.ReLU())
        net.fit(X, y)

        net.set_params(module__nonlin=nn.PReLU())
        assert isinstance(net.module_.nonlin, nn.PReLU)
        d1 = net.module_.nonlin.weight.data.clone().cpu().numpy()

        # make sure that we do not initialize again by making sure that
        # the network is initialized and by using partial_fit.
        assert net.initialized_
        net.partial_fit(X, y)
        d2 = net.module_.nonlin.weight.data.clone().cpu().numpy()

        # all newly introduced parameters should have been trained (changed)
        # by the optimizer after 10 epochs.
        assert (abs(d2 - d1) > 1e-05).all() 
示例18
def fit(self, X, y):
        """Fit estimator on parameterized data.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features+len(params))]:
            The samples, concatenated with the corresponding parameter values.

        * `y` [array-like, shape=(n_samples,)]:
            The output values.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        self.stacker_ = ParameterStacker(self.params)

        # XXX: this assumes that X is extended with parameters
        self.n_features_ = X.shape[1] - len(self.params)
        self.estimator_ = clone(self.base_estimator).fit(X, y)

        return self 
示例19
def oob_dropcol_importances(rf, X_train, y_train):
    """
    Compute drop-column feature importances for scikit-learn.

    Given a RandomForestClassifier or RandomForestRegressor in rf
    and training X and y data, return a data frame with columns
    Feature and Importance sorted in reverse order by importance.

    A clone of rf is trained once to get the baseline score and then
    again, once per feature to compute the drop in out of bag (OOB)
    score.

    return: A data frame with Feature, Importance columns

    SAMPLE CODE

    rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, oob_score=True)
    X_train, y_train = ..., ...
    rf.fit(X_train, y_train)
    imp = oob_dropcol_importances(rf, X_train, y_train)
    """
    rf_ = clone(rf)
    rf_.random_state = 999
    rf_.fit(X_train, y_train)
    baseline = rf_.oob_score_
    imp = []
    for col in X_train.columns:
        X = X_train.drop(col, axis=1)
        rf_ = clone(rf)
        rf_.random_state = 999
        rf_.fit(X, y_train)
        o = rf_.oob_score_
        imp.append(baseline - o)
    imp = np.array(imp)
    I = pd.DataFrame(data={'Feature':X_train.columns, 'Importance':imp})
    I = I.set_index('Feature')
    I = I.sort_values('Importance', ascending=False)
    return I 
示例20
def oob_dropcol_importances(rf, X_train, y_train):
    """
    Compute drop-column feature importances for scikit-learn.

    Given a RandomForestClassifier or RandomForestRegressor in rf
    and training X and y data, return a data frame with columns
    Feature and Importance sorted in reverse order by importance.

    A clone of rf is trained once to get the baseline score and then
    again, once per feature to compute the drop in out of bag (OOB)
    score.

    return: A data frame with Feature, Importance columns

    SAMPLE CODE

    rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, oob_score=True)
    X_train, y_train = ..., ...
    rf.fit(X_train, y_train)
    imp = oob_dropcol_importances(rf, X_train, y_train)
    """
    rf_ = clone(rf)
    rf_.random_state = 999
    rf_.fit(X_train, y_train)
    baseline = rf_.oob_score_
    imp = []
    for col in X_train.columns:
        X = X_train.drop(col, axis=1)
        rf_ = clone(rf)
        rf_.random_state = 999
        rf_.fit(X, y_train)
        o = rf_.oob_score_
        imp.append(baseline - o)
    imp = np.array(imp)
    I = pd.DataFrame(data={'Feature':X_train.columns, 'Importance':imp})
    I = I.set_index('Feature')
    I = I.sort_values('Importance', ascending=False)
    return I 
示例21
def clone(self):
        return SklearnProbaAdapter(clone(self._model)) 
示例22
def clone(self):
        return SklearnProbaAdapter(clone(self._model)) 
示例23
def __init__(self, estimator, k_features,
        scoring=accuracy_score,
        test_size=0.25, random_state=1):
        self.scoring = scoring
        self.estimator = clone(estimator)
        self.k_features = k_features
        self.test_size = test_size
        self.random_state = random_state 
示例24
def fit(self, X, y):
        """ Fit classifiers.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Matrix of training samples.

        y : array-like, shape = [n_samples]
            Vector of target class labels.

        Returns
        -------
        self : object

        """
        if self.vote not in ('probability', 'classlabel'):
            raise ValueError("vote must be 'probability' or 'classlabel'"
                             "; got (vote=%r)"
                             % vote)

        if self.weights and len(self.weights) != len(self.classifiers):
            raise ValueError('Number of classifiers and weights must be equal'
                             '; got %d weights, %d classifiers'
                             % (len(self.weights), len(self.classifiers)))

        # Use LabelEncoder to ensure class labels start with 0, which
        # is important for np.argmax call in self.predict
        self.lablenc_ = LabelEncoder()
        self.lablenc_.fit(y)
        self.classes_ = self.lablenc_.classes_
        self.classifiers_ = []
        for clf in self.classifiers:
            fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
            self.classifiers_.append(fitted_clf)
        return self 
示例25
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
                X, y, scorer, parameter_iterable, fit_params,
                error_score, cv, **kwargs):
        # test_score, n_samples, score_time, parameters
        return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
            delayed(_fit_and_score)(
                clone(base_estimator), X, y, scorer,
                train, test, verbose, parameters,
                fit_params, return_parameters=True,
                error_score=error_score)
            for parameters in parameter_iterable
            for train, test in cv) 
示例26
def _clone_h2o_obj(estimator, ignore=False, **kwargs):
    # do initial clone
    est = clone(estimator)

    # set kwargs:
    if kwargs:
        for k, v in six.iteritems(kwargs):
            setattr(est, k, v)

    # check on h2o estimator
    if isinstance(estimator, H2OPipeline):
        # the last step from the original estimator
        e = estimator.steps[-1][1]
        if isinstance(e, H2OEstimator):
            last_step = est.steps[-1][1]

            # so it's the last step
            for k, v in six.iteritems(e._parms):
                k, v = _kv_str(k, v)

                # if (not k in PARM_IGNORE) and (not v is None):
                #   e._parms[k] = v
                last_step._parms[k] = v

                # otherwise it's an BaseH2OFunctionWrapper
    return est 
示例27
def _new_base_estimator(est, clonable_kwargs):
    """When the grid searches are pickled, the estimator
    has to be dropped out. When we load it back in, we have
    to reinstate a new one, since the fit is predicated on
    being able to clone a base estimator, we've got to have
    an estimator to clone and fit.

    Parameters
    ----------

    est : str
        The type of model to build

    Returns
    -------

    estimator : H2OEstimator
        The cloned base estimator
    """
    est_map = {
        'dl':   H2ODeepLearningEstimator,
        'gbm':  H2OGradientBoostingEstimator,
        'glm':  H2OGeneralizedLinearEstimator,
        # 'glrm': H2OGeneralizedLowRankEstimator,
        # 'km'  : H2OKMeansEstimator,
        'nb':   H2ONaiveBayesEstimator,
        'rf':   H2ORandomForestEstimator
    }

    estimator = est_map[est]()  # initialize the new ones
    for k, v in six.iteritems(clonable_kwargs):
        k, v = _kv_str(k, v)
        estimator._parms[k] = v

    return estimator 
示例28
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2 
示例29
def build_ensemble(cls, **kwargs):
    """Build ML-Ensemble"""
    ens = cls(**kwargs)

    use = ["ExtraTrees", "RandomForest",
           "LogisticRegression-SAG", "MLP-adam"]

    meta = RandomForestClassifier(n_estimators=100,
                                  random_state=0,
                                  n_jobs=-1)
    base_learners = list()
    for est_name, est in ESTIMATORS.items():
        e = clone(est)
        if est_name not in use:
            continue
        elif est_name == "MLP-adam":
            e.verbose = False
        try:
            e.set_params(**{'n_jobs': 1})
        except ValueError:
            pass

        base_learners.append((est_name, e))
    ens.add(base_learners, proba=True, shuffle=True, random_state=1)
    ens.add_meta(meta, shuffle=True, random_state=2)
    return ens 
示例30
def _pre_train(self, y):
        self.cv = check_cv(self.cv, y)
        self.n_splits = self.cv.get_n_splits()
        self.transformers = [clone(self.base_transformer) for _ in range(self.n_splits + 1)]