当前位置：源码 > Python源码 >

Python源码示例：sklearn.base.ClassifierMixin()

示例1

def test_base_chain_fit_and_predict():
    # Fit base chain and verify predict performance
    X, Y = generate_multilabel_dataset_with_correlations()
    chains = [RegressorChain(Ridge()),
              ClassifierChain(LogisticRegression())]
    for chain in chains:
        chain.fit(X, Y)
        Y_pred = chain.predict(X)
        assert_equal(Y_pred.shape, Y.shape)
        assert_equal([c.coef_.size for c in chain.estimators_],
                     list(range(X.shape[1], X.shape[1] + Y.shape[1])))

    Y_prob = chains[1].predict_proba(X)
    Y_binary = (Y_prob >= .5)
    assert_array_equal(Y_binary, Y_pred)

    assert isinstance(chains[1], ClassifierMixin)

示例2

def __init__(self, classifier=None, predictors="all"):
        """Create an instance of the MissingnessClassifier.

        The MissingnessClassifier inherits from sklearn BaseEstimator and
        ClassifierMixin. This inheritence and this class' implementation
        ensure that the MissingnessClassifier is a valid classifier that will
        work in an sklearn pipeline.

        Args:
            classifier (classifier, optional): valid classifier from sklearn.
                If None, default is xgboost. Note that classifier must
                conform to sklearn style. This means it must implement the
                `predict_proba` method and act as a porper classifier.
            predictors (str, iter, dict, optiona): defaults to all, i.e.
                use all predictors. If all, every column will be used for
                every class prediction. If a list, subset of columns used for
                all predictions. If a dict, specify which columns to use as
                predictors for each imputation. Columns not specified in dict
                will receive `all` by default.
        """
        self.classifier = classifier
        self.predictors = predictors

示例3

def test_template_1():
    """Assert that TPOT template option generates pipeline when each step is a type of operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

示例4

def test_template_2():
    """Assert that TPOT template option generates pipeline when each step is operator type with a duplicate main type."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 4
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[3][1].__class__, ClassifierMixin)

示例5

def test_template_3():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

示例6

def test_template_4():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        population_size=5,
        generations=2,
        random_state=42,
        verbosity=0,
        config_dict = 'TPOT light',
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj.fit(pretest_X, pretest_y)

    assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
    assert not (tpot_obj._start_datetime is None)

    sklearn_pipeline = tpot_obj.fitted_pipeline_
    operator_count = tpot_obj._operator_count(tpot_obj._optimized_pipeline)
    assert operator_count == 3
    assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
    assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
    assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
    assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

示例7

def _get_child_predict(self, clf, X, index=None):
        if self.stack_by_proba and hasattr(clf, 'predict_proba'):
            if self.save_stage0 and index is not None:
                proba = util.saving_predict_proba(clf, X, index)
            else:
                proba = clf.predict_proba(X)
            return proba[:, 1:]
        elif hasattr(clf, 'predict'):
            predict_result = clf.predict(X)
            if isinstance(clf, ClassifierMixin):
                lb = LabelBinarizer()
                lb.fit(predict_result)
                return lb.fit_transform(predict_result)
            else:
                return predict_result.reshape((predict_result.size, 1))
        else:
            return clf.fit_transform(X)

示例8

def test_classifier_chain_fit_and_predict_with_logistic_regression():
    # Fit classifier chain and verify predict performance
    X, Y = generate_multilabel_dataset_with_correlations()
    classifier_chain = ClassifierChain(LogisticRegression())
    classifier_chain.fit(X, Y)

    Y_pred = classifier_chain.predict(X)
    assert_equal(Y_pred.shape, Y.shape)

    Y_prob = classifier_chain.predict_proba(X)
    Y_binary = (Y_prob >= .5)
    assert_array_equal(Y_binary, Y_pred)

    assert_equal([c.coef_.size for c in classifier_chain.estimators_],
                 list(range(X.shape[1], X.shape[1] + Y.shape[1])))

    assert isinstance(classifier_chain, ClassifierMixin)

示例9

def test_sample_weight():
    """Tests sample_weight parameter of VotingClassifier"""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = SVC(gamma='scale', probability=True, random_state=123)
    eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
    eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y)
    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))

    sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
    eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
    eclf3.fit(X, y, sample_weight)
    clf1.fit(X, y, sample_weight)
    assert_array_equal(eclf3.predict(X), clf1.predict(X))
    assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))

    # check that an error is raised and indicative if sample_weight is not
    # supported.
    clf4 = KNeighborsClassifier()
    eclf3 = VotingClassifier(estimators=[
        ('lr', clf1), ('svc', clf3), ('knn', clf4)],
        voting='soft')
    msg = ('Underlying estimator KNeighborsClassifier does not support '
           'sample weights.')
    with pytest.raises(ValueError, match=msg):
        eclf3.fit(X, y, sample_weight)

    # check that _parallel_fit_estimator will raise the right error
    # it should raise the original error if this is not linked to sample_weight
    class ClassifierErrorFit(BaseEstimator, ClassifierMixin):
        def fit(self, X, y, sample_weight):
            raise TypeError('Error unrelated to sample_weight.')
    clf = ClassifierErrorFit()
    with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
        clf.fit(X, y, sample_weight=sample_weight)

示例10

def test_sample_weight_kwargs():
    """Check that VotingClassifier passes sample_weight as kwargs"""
    class MockClassifier(BaseEstimator, ClassifierMixin):
        """Mock Classifier to check that sample_weight is received as kwargs"""
        def fit(self, X, y, *args, **sample_weight):
            assert 'sample_weight' in sample_weight

    clf = MockClassifier()
    eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')

    # Should not raise an error.
    eclf.fit(X, y, sample_weight=np.ones((len(y),)))

示例11

def _generate_bases_test(est, pd_est):
    def test(self):
        self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
        self.assertFalse(isinstance(est, FrameMixin))
        self.assertTrue(isinstance(pd_est, base.BaseEstimator))
        try:
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.DensityMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        except:
            if _sklearn_ver > 17:
                raise
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        for mixin in mixins:
            self.assertEqual(
                isinstance(pd_est, mixin),
                isinstance(est, mixin),
                mixin)

    return test

示例12

def _exposed_methods_mapping(self) -> Dict[str, str]:
        ret = {
            'predict': 'predict'
        }
        if isinstance(self.model, ClassifierMixin):
            ret['predict_proba'] = 'predict_proba'
        return ret

示例13

def transform(self, X):
        X = check_array(X)
        X_transformed = np.copy(X)
        # add class probabilities as a synthetic feature
        if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
            X_transformed = np.hstack((self.estimator.predict_proba(X), X))

        # add class prodiction as a synthetic feature
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))

        return X_transformed

示例14

def transform(self, X):
        """Transform data by adding two synthetic feature(s).

        Parameters
        ----------
        X: numpy ndarray, {n_samples, n_components}
            New data, where n_samples is the number of samples and n_components is the number of components.

        Returns
        -------
        X_transformed: array-like, shape (n_samples, n_features + 1) or (n_samples, n_features + 1 + n_classes) for classifier with predict_proba attribute
            The transformed feature set.
        """
        X = check_array(X)
        X_transformed = np.copy(X)
        # add class probabilities as a synthetic feature
        if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
            y_pred_proba = self.estimator.predict_proba(X)
            # check all values that should be not infinity or not NAN
            if np.all(np.isfinite(y_pred_proba)):
                X_transformed = np.hstack((y_pred_proba, X))

        # add class prediction as a synthetic feature
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))

        return X_transformed

示例15

def score(self, X, y):
        """Force use of accuracy score since we don't inherit
           from ClassifierMixin"""

        from sklearn.metrics import accuracy_score
        return accuracy_score(y, self.predict(X))

示例16

def verify(self, X, predict_params = {}, predict_proba_params = {}, precision = 1e-13, zeroThreshold = 1e-13):
		active_fields = _get_column_names(X)
		if self.active_fields is None or active_fields is None:
			raise ValueError("Cannot perform model validation with anonymous data")
		if self.active_fields.tolist() != active_fields.tolist():
			raise ValueError("The columns between training data {} and verification data {} do not match".format(self.active_fields, active_fields))
		active_values = _get_values(X)
		y = self.predict(X, **predict_params)
		target_values = _get_values(y)
		estimator = self._final_estimator
		if isinstance(estimator, BaseEstimator):
			if isinstance(estimator, RegressorMixin):
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
			elif isinstance(estimator, ClassifierMixin):
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
				if hasattr(estimator, "predict_proba"):
					try:
						y_proba = self.predict_proba(X, **predict_proba_params)
						self.verification.probability_values = _get_values(y_proba)
					except AttributeError:
						pass
		# elif isinstance(estimator, H2OEstimator):
		elif hasattr(estimator, "_estimator_type") and hasattr(estimator, "download_mojo"):
			if estimator._estimator_type == "regressor":
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
			elif estimator._estimator_type == "classifier":
				probability_values = target_values[:, 1:]
				target_values = target_values[:, 0]
				self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
				self.verification.probability_values = probability_values

示例17

def plot_graphviz_tree(self, **kwargs):
        """
        被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF, EMLFitType.E_FIT_REG))装饰，
        即支持有监督学习回归和分类，绘制决策树或者core基于树的分类回归算法的决策示意图绘制，查看
        学习器本身hasattr(fiter, 'tree_')是否有tree_属性，如果没有使用决策树替换

        :param kwargs:  外部可以传递x, y, 通过
                                x = kwargs.pop('x', self.x)
                                y = kwargs.pop('y', self.y)
                        装饰器使用的fiter_type，
                        eg:
                            ttn_abu = AbuML.create_test_more_fiter()
                            ttn_abu.plot_graphviz_tree(fiter_type=ml.EMLFitType.E_FIT_CLF)
        """
        x = kwargs.pop('x', self.x)
        y = kwargs.pop('y', self.y)
        fiter = self.get_fiter()

        if not hasattr(fiter, 'tree_'):
            self.log_func('{} not hasattr tree_, use decision tree replace'.format(
                fiter.__class__.__name__))

            if isinstance(fiter, ClassifierMixin):
                # FIXME 最好不要使用ClassifierMixin判定学习器类型，因为限定了sklearn
                fiter = self.estimator.decision_tree_classifier(assign=False)
            elif isinstance(fiter, RegressorMixin):
                # # FIXME 最好不要使用RegressorMixin, AbuMLCreater中引用了hmmlearn，xgboost等第三方库
                fiter = self.estimator.decision_tree_regressor(assign=False)
            else:
                fiter = self.estimator.decision_tree_classifier(assign=False)
        # 这里需要将self.df.columns做为名字传入
        return ABuMLExecute.graphviz_tree(fiter, self.df.columns, x, y)

示例18

def _scoring_grid(estimator, scoring):
    """
    只针对有监督学习过滤无监督学习，对scoring未赋予的情况根据
    学习器分类器使用accuracy进行度量，回归器使用可释方差值explained_variance_score，
    使用make_scorer对函数进行score封装

    :param estimator: 学习器对象
    :param scoring: 度量使用的方法，未赋予的情况根据
                    学习器分类器使用accuracy进行度量，回归器使用explained_variance_score进行度量
    :return: scoring
    """

    if not isinstance(estimator, (ClassifierMixin, RegressorMixin)):
        logging.info('only support supervised learning')
        # TODO 无监督学习的scoring度量以及GridSearchCV
        return None

    if scoring is None:
        if isinstance(estimator, ClassifierMixin):
            # 分类器使用accuracy
            return 'accuracy'
        elif isinstance(estimator, RegressorMixin):
            # 回归器使用可释方差值explained_variance_score，使用make_scorer对函数进行score封装
            """
                make_scorer中通过greater_is_better对返回值进行正负分配
                eg: sign = 1 if greater_is_better else -1
            """
            return make_scorer(explained_variance_score, greater_is_better=True)
        return None
    return scoring

示例19

def __init__(self, metric='riemann', tsupdate=False,
                 clf=LogisticRegression()):
        """Init."""
        self.metric = metric
        self.tsupdate = tsupdate
        self.clf = clf

        if not isinstance(clf, ClassifierMixin):
            raise TypeError('clf must be a ClassifierMixin')

        TangentSpace(metric=self.metric, tsupdate=self.tsupdate)

示例20

def _get_blend_init(self, y_train, clf):
        if self.stack_by_proba and hasattr(clf, 'predict_proba'):
            width = self.n_classes_ - 1
        elif hasattr(clf, 'predict') and isinstance(clf, ClassifierMixin):
            width = self.n_classes_
        elif hasattr(clf, 'predict'):
            width = 1
        elif hasattr(clf, 'n_components'):
            width = clf.n_components
        else:
            raise Exception('Unimplemented for {0}'.format(type(clf)))
        return np.zeros((y_train.size, width))

示例21

def __init__(self, estimator, context):
        """
        :param estimator: Estimator to convert
        :type estimator: BaseEstimator
        :param context: context to work with
        :type context: TransformationContext
        """
        super(ClassifierConverter, self).__init__(estimator, context, ModelMode.CLASSIFICATION)
        assert isinstance(estimator, ClassifierMixin), 'Classifier converter should only be applied to the classification models'
        for f in context.schemas[Schema.OUTPUT]:
            assert isinstance(f, CategoricalFeature), 'Only categorical outputs are supported for classification task'

        # create hidden variables for each categorical output
        internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
        self.context.schemas[Schema.INTERNAL] = internal_schema

示例22

def __init__(self, estimator, context, mode):
        super(DecisionTreeConverter, self).__init__(estimator, context, mode)

        assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one-label trees are supported'
        assert hasattr(estimator, 'tree_'), 'Estimator has no tree_ attribute'
        if mode == ModelMode.CLASSIFICATION:
            if isinstance(self.context.schemas[Schema.OUTPUT][0], CategoricalFeature):
                self.prediction_output = self.OUTPUT_LABEL
            else:
                self.prediction_output = self.OUTPUT_PROBABILITY
            assert isinstance(self.estimator, ClassifierMixin), \
                'Only a classifier can be serialized in classification mode'
        if mode == ModelMode.REGRESSION:
            assert isinstance(self.context.schemas[Schema.OUTPUT][0], NumericFeature), \
                'Only a numeric feature can be an output of regression'
            assert isinstance(self.estimator, RegressorMixin), \
                'Only a regressor can be serialized in regression mode'
        assert estimator.tree_.value.shape[1] == len(self.context.schemas[Schema.OUTPUT]), \
            'Tree outputs {} results while the schema specifies {} output fields'.format(
                estimator.tree_.value.shape[1], len(self.context.schemas[Schema.OUTPUT]))

        # create hidden variables for each categorical output
        # TODO: this code is copied from the ClassifierConverter. To make things right, we need an abstract tree
        # TODO: converter and subclasses for classifier and regression converters
        internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
        self.context.schemas[Schema.INTERNAL] = internal_schema

示例23

def test_sample_weight_kwargs():
    """Check that VotingClassifier passes sample_weight as kwargs"""
    class MockClassifier(BaseEstimator, ClassifierMixin):
        """Mock Classifier to check that sample_weight is received as kwargs"""
        def fit(self, X, y, *args, **sample_weight):
            assert_true('sample_weight' in sample_weight)

    clf = MockClassifier()
    eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')

    # Should not raise an error.
    eclf.fit(X, y, sample_weight=np.ones((len(y),)))

示例24

def __init__(self, base_estimator, method="histogram", bins="auto",
                 interpolation=None, variable_width=False, cv=1):
        """Constructor.

        Parameters
        ----------
        * `base_estimator` [`ClassifierMixin`]:
            The classifier whose output decision function needs to be
            calibrated to offer more accurate predict_proba outputs. If
            `cv=prefit`, the classifier must have been fit already on data.

        * `method` [string]:
            The method to use for calibration. Supported methods include
            `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
            `"sigmoid"`.

        * `bins` [int, default="auto"]:
            The number of bins, if `method` is `"histogram"`.

        * `interpolation` [string, optional]
            Specifies the kind of interpolation between bins as a string
            (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`,
            `"cubic"`), if `method` is `"histogram"`.

        * `variable_dith_width` [boolean, optional]
            If True use equal probability variable length bins, if
            `method` is `"histogram"`.

        * `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
            Determines the cross-validation splitting strategy.
            Possible inputs for cv are:

            - integer, to specify the number of folds.
            - An object to be used as a cross-validation generator.
            - An iterable yielding train/test splits.

            If `"prefit"` is passed, it is assumed that base_estimator has been
            fitted already and all data is used for calibration. If `cv=1`,
            the training data is used for both training and calibration.
        """
        self.base_estimator = base_estimator
        self.method = method
        self.bins = bins
        self.interpolation = interpolation
        self.variable_width = variable_width
        self.cv = cv

示例25

def as_classifier(regressor):
    """Wrap a Scikit-Learn regressor into a binary classifier.

    This function can be used to solve a binary classification problem as a
    regression problem, where output labels {0,1} are treated as real values.
    The wrapped regressor exhibits the classifier API, with the corresponding
    `predict`, `predict_proba` and `score` methods.

    Parameters
    ----------
    * `regressor` [`RegressorMixin`]:
        The regressor object.

    Returns
    -------
    * `clf` [`ClassifierMixin`]:
        The wrapped regressor, but with a classifier API.
    """
    class Wrapper(BaseEstimator, ClassifierMixin):
        def __init__(self, base_estimator):
            self.base_estimator = base_estimator

        def fit(self, X, y, **kwargs):
            # Check inputs
            X, y = check_X_y(X, y)

            # Convert y
            label_encoder = LabelEncoder()
            y = label_encoder.fit_transform(y).astype(np.float)

            if len(label_encoder.classes_) != 2:
                raise ValueError

            self.classes_ = label_encoder.classes_

            # Fit regressor
            self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs)

            return self

        def predict(self, X):
            return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                            self.classes_[1],
                            self.classes_[0])

        def predict_proba(self, X):
            X = check_array(X)

            df = self.regressor_.predict(X)
            df = np.clip(df, 0., 1.)
            probas = np.zeros((len(X), 2))
            probas[:, 0] = 1. - df
            probas[:, 1] = df

            return probas

        def score(self, X, y):
            return self.regressor_.score(X, y)

    return Wrapper(regressor)

示例26

def evaluate(self, point):
        """
        Fits model using the particular setting of hyperparameters and
        evaluates the model validation data.

        Parameters
        ----------
        * `point`: dict
            A mapping of parameter names to the corresponding values

        Returns
        -------
        * `score`: float
            Score (more is better!) for some specific point
        """
        X_train, y_train, X_test, y_test = (
            self.X_train, self.y_train, self.X_test, self.y_test)

        # apply transformation to model parameters, for example exp transformation
        point_mapped = {}
        for param, val in point.items():
            point_mapped[param] = self.space[param][1](val)

        model_instance = self.model(**point_mapped)

        if 'random_state' in model_instance.get_params():
            model_instance.set_params(random_state=self.random_state)

        min_obj_val = -5.0

        # Infeasible parameters are expected to raise an exception, thus the try
        # catch below, infeasible parameters yield assumed smallest objective.
        try:
            model_instance.fit(X_train, y_train)
            if isinstance(model_instance, RegressorMixin): # r^2 metric
                y_predicted = model_instance.predict(X_test)
                score = r2_score(y_test, y_predicted)
            elif isinstance(model_instance, ClassifierMixin): # log loss
                y_predicted = model_instance.predict_proba(X_test)
                score = -log_loss(y_test, y_predicted) # in the context of this function, the higher score is better
            # avoid any kind of singularitites, eg probability being zero, and thus breaking the log_loss
            if math.isnan(score):
                score = min_obj_val
            score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN
        except BaseException as ex:
            score = min_obj_val # on error: return assumed smallest value of objective function

        return score

# this is necessary to generate table for README in the end

示例27

def enumerate_pipeline_models(pipe, coor=None, vs=None):
    """
    Enumerates all the models within a pipeline.
    """
    if coor is None:
        coor = (0,)
    yield coor, pipe, vs
    if hasattr(pipe, 'transformer_and_mapper_list') and len(
            pipe.transformer_and_mapper_list):
        # azureml DataTransformer
        raise NotImplementedError("Unable to handle this specific case.")
    elif hasattr(pipe, 'mapper') and pipe.mapper:
        # azureml DataTransformer
        for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)):
            yield couple
    elif hasattr(pipe, 'built_features'):
        # sklearn_pandas.dataframe_mapper.DataFrameMapper
        for i, (columns, transformers, _) in enumerate(pipe.built_features):
            if isinstance(columns, str):
                columns = (columns,)
            if transformers is None:
                yield (coor + (i,)), None, columns
            else:
                for couple in enumerate_pipeline_models(transformers,
                                                        coor + (i,),
                                                        columns):
                    yield couple
    elif isinstance(pipe, Pipeline):
        for i, (_, model) in enumerate(pipe.steps):
            for couple in enumerate_pipeline_models(model, coor + (i,)):
                yield couple
    elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer):
        for i, (_, fitted_transformer, column) in enumerate(pipe.transformers):
            for couple in enumerate_pipeline_models(
                    fitted_transformer, coor + (i,), column):
                yield couple
    elif isinstance(pipe, FeatureUnion):
        for i, (_, model) in enumerate(pipe.transformer_list):
            for couple in enumerate_pipeline_models(model, coor + (i,)):
                yield couple
    elif TransformedTargetRegressor is not None and isinstance(
            pipe, TransformedTargetRegressor):
        raise NotImplementedError(
            "Not yet implemented for TransformedTargetRegressor.")
    elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)):
        pass
    elif isinstance(pipe, BaseEstimator):
        pass
    else:
        raise TypeError(
            "Parameter pipe is not a scikit-learn object: {}\n{}".format(
                type(pipe), pipe))

Python源码示例：sklearn.base.ClassifierMixin()

微信关注