Python源码示例:sklearn.base.ClassifierMixin()
示例1
def test_base_chain_fit_and_predict():
# Fit base chain and verify predict performance
X, Y = generate_multilabel_dataset_with_correlations()
chains = [RegressorChain(Ridge()),
ClassifierChain(LogisticRegression())]
for chain in chains:
chain.fit(X, Y)
Y_pred = chain.predict(X)
assert_equal(Y_pred.shape, Y.shape)
assert_equal([c.coef_.size for c in chain.estimators_],
list(range(X.shape[1], X.shape[1] + Y.shape[1])))
Y_prob = chains[1].predict_proba(X)
Y_binary = (Y_prob >= .5)
assert_array_equal(Y_binary, Y_pred)
assert isinstance(chains[1], ClassifierMixin)
示例2
def __init__(self, classifier=None, predictors="all"):
"""Create an instance of the MissingnessClassifier.
The MissingnessClassifier inherits from sklearn BaseEstimator and
ClassifierMixin. This inheritence and this class' implementation
ensure that the MissingnessClassifier is a valid classifier that will
work in an sklearn pipeline.
Args:
classifier (classifier, optional): valid classifier from sklearn.
If None, default is xgboost. Note that classifier must
conform to sklearn style. This means it must implement the
`predict_proba` method and act as a porper classifier.
predictors (str, iter, dict, optiona): defaults to all, i.e.
use all predictors. If all, every column will be used for
every class prediction. If a list, subset of columns used for
all predictions. If a dict, specify which columns to use as
predictors for each imputation. Columns not specified in dict
will receive `all` by default.
"""
self.classifier = classifier
self.predictors = predictors
示例3
def test_template_1():
"""Assert that TPOT template option generates pipeline when each step is a type of operator."""
tpot_obj = TPOTClassifier(
random_state=42,
verbosity=0,
template='Selector-Transformer-Classifier'
)
tpot_obj._fit_init()
pop = tpot_obj._toolbox.population(n=10)
for deap_pipeline in pop:
operator_count = tpot_obj._operator_count(deap_pipeline)
sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
assert operator_count == 3
assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
示例4
def test_template_2():
"""Assert that TPOT template option generates pipeline when each step is operator type with a duplicate main type."""
tpot_obj = TPOTClassifier(
random_state=42,
verbosity=0,
template='Selector-Selector-Transformer-Classifier'
)
tpot_obj._fit_init()
pop = tpot_obj._toolbox.population(n=10)
for deap_pipeline in pop:
operator_count = tpot_obj._operator_count(deap_pipeline)
sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
assert operator_count == 4
assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
assert issubclass(sklearn_pipeline.steps[1][1].__class__, SelectorMixin)
assert issubclass(sklearn_pipeline.steps[2][1].__class__, TransformerMixin)
assert issubclass(sklearn_pipeline.steps[3][1].__class__, ClassifierMixin)
示例5
def test_template_3():
"""Assert that TPOT template option generates pipeline when one of steps is a specific operator."""
tpot_obj = TPOTClassifier(
random_state=42,
verbosity=0,
template='SelectPercentile-Transformer-Classifier'
)
tpot_obj._fit_init()
pop = tpot_obj._toolbox.population(n=10)
for deap_pipeline in pop:
operator_count = tpot_obj._operator_count(deap_pipeline)
sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
assert operator_count == 3
assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
示例6
def test_template_4():
"""Assert that TPOT template option generates pipeline when one of steps is a specific operator."""
tpot_obj = TPOTClassifier(
population_size=5,
generations=2,
random_state=42,
verbosity=0,
config_dict = 'TPOT light',
template='SelectPercentile-Transformer-Classifier'
)
tpot_obj.fit(pretest_X, pretest_y)
assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
assert not (tpot_obj._start_datetime is None)
sklearn_pipeline = tpot_obj.fitted_pipeline_
operator_count = tpot_obj._operator_count(tpot_obj._optimized_pipeline)
assert operator_count == 3
assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
示例7
def _get_child_predict(self, clf, X, index=None):
if self.stack_by_proba and hasattr(clf, 'predict_proba'):
if self.save_stage0 and index is not None:
proba = util.saving_predict_proba(clf, X, index)
else:
proba = clf.predict_proba(X)
return proba[:, 1:]
elif hasattr(clf, 'predict'):
predict_result = clf.predict(X)
if isinstance(clf, ClassifierMixin):
lb = LabelBinarizer()
lb.fit(predict_result)
return lb.fit_transform(predict_result)
else:
return predict_result.reshape((predict_result.size, 1))
else:
return clf.fit_transform(X)
示例8
def test_classifier_chain_fit_and_predict_with_logistic_regression():
# Fit classifier chain and verify predict performance
X, Y = generate_multilabel_dataset_with_correlations()
classifier_chain = ClassifierChain(LogisticRegression())
classifier_chain.fit(X, Y)
Y_pred = classifier_chain.predict(X)
assert_equal(Y_pred.shape, Y.shape)
Y_prob = classifier_chain.predict_proba(X)
Y_binary = (Y_prob >= .5)
assert_array_equal(Y_binary, Y_pred)
assert_equal([c.coef_.size for c in classifier_chain.estimators_],
list(range(X.shape[1], X.shape[1] + Y.shape[1])))
assert isinstance(classifier_chain, ClassifierMixin)
示例9
def test_sample_weight():
"""Tests sample_weight parameter of VotingClassifier"""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = SVC(gamma='scale', probability=True, random_state=123)
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
eclf3.fit(X, y, sample_weight)
clf1.fit(X, y, sample_weight)
assert_array_equal(eclf3.predict(X), clf1.predict(X))
assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))
# check that an error is raised and indicative if sample_weight is not
# supported.
clf4 = KNeighborsClassifier()
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('svc', clf3), ('knn', clf4)],
voting='soft')
msg = ('Underlying estimator KNeighborsClassifier does not support '
'sample weights.')
with pytest.raises(ValueError, match=msg):
eclf3.fit(X, y, sample_weight)
# check that _parallel_fit_estimator will raise the right error
# it should raise the original error if this is not linked to sample_weight
class ClassifierErrorFit(BaseEstimator, ClassifierMixin):
def fit(self, X, y, sample_weight):
raise TypeError('Error unrelated to sample_weight.')
clf = ClassifierErrorFit()
with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
clf.fit(X, y, sample_weight=sample_weight)
示例10
def test_sample_weight_kwargs():
"""Check that VotingClassifier passes sample_weight as kwargs"""
class MockClassifier(BaseEstimator, ClassifierMixin):
"""Mock Classifier to check that sample_weight is received as kwargs"""
def fit(self, X, y, *args, **sample_weight):
assert 'sample_weight' in sample_weight
clf = MockClassifier()
eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')
# Should not raise an error.
eclf.fit(X, y, sample_weight=np.ones((len(y),)))
示例11
def _generate_bases_test(est, pd_est):
def test(self):
self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
self.assertFalse(isinstance(est, FrameMixin))
self.assertTrue(isinstance(pd_est, base.BaseEstimator))
try:
mixins = [
base.ClassifierMixin,
base.ClusterMixin,
base.BiclusterMixin,
base.TransformerMixin,
base.DensityMixin,
base.MetaEstimatorMixin,
base.ClassifierMixin,
base.RegressorMixin]
except:
if _sklearn_ver > 17:
raise
mixins = [
base.ClassifierMixin,
base.ClusterMixin,
base.BiclusterMixin,
base.TransformerMixin,
base.MetaEstimatorMixin,
base.ClassifierMixin,
base.RegressorMixin]
for mixin in mixins:
self.assertEqual(
isinstance(pd_est, mixin),
isinstance(est, mixin),
mixin)
return test
示例12
def _exposed_methods_mapping(self) -> Dict[str, str]:
ret = {
'predict': 'predict'
}
if isinstance(self.model, ClassifierMixin):
ret['predict_proba'] = 'predict_proba'
return ret
示例13
def transform(self, X):
X = check_array(X)
X_transformed = np.copy(X)
# add class probabilities as a synthetic feature
if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
X_transformed = np.hstack((self.estimator.predict_proba(X), X))
# add class prodiction as a synthetic feature
X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))
return X_transformed
示例14
def transform(self, X):
"""Transform data by adding two synthetic feature(s).
Parameters
----------
X: numpy ndarray, {n_samples, n_components}
New data, where n_samples is the number of samples and n_components is the number of components.
Returns
-------
X_transformed: array-like, shape (n_samples, n_features + 1) or (n_samples, n_features + 1 + n_classes) for classifier with predict_proba attribute
The transformed feature set.
"""
X = check_array(X)
X_transformed = np.copy(X)
# add class probabilities as a synthetic feature
if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
y_pred_proba = self.estimator.predict_proba(X)
# check all values that should be not infinity or not NAN
if np.all(np.isfinite(y_pred_proba)):
X_transformed = np.hstack((y_pred_proba, X))
# add class prediction as a synthetic feature
X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))
return X_transformed
示例15
def score(self, X, y):
"""Force use of accuracy score since we don't inherit
from ClassifierMixin"""
from sklearn.metrics import accuracy_score
return accuracy_score(y, self.predict(X))
示例16
def verify(self, X, predict_params = {}, predict_proba_params = {}, precision = 1e-13, zeroThreshold = 1e-13):
active_fields = _get_column_names(X)
if self.active_fields is None or active_fields is None:
raise ValueError("Cannot perform model validation with anonymous data")
if self.active_fields.tolist() != active_fields.tolist():
raise ValueError("The columns between training data {} and verification data {} do not match".format(self.active_fields, active_fields))
active_values = _get_values(X)
y = self.predict(X, **predict_params)
target_values = _get_values(y)
estimator = self._final_estimator
if isinstance(estimator, BaseEstimator):
if isinstance(estimator, RegressorMixin):
self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
elif isinstance(estimator, ClassifierMixin):
self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
if hasattr(estimator, "predict_proba"):
try:
y_proba = self.predict_proba(X, **predict_proba_params)
self.verification.probability_values = _get_values(y_proba)
except AttributeError:
pass
# elif isinstance(estimator, H2OEstimator):
elif hasattr(estimator, "_estimator_type") and hasattr(estimator, "download_mojo"):
if estimator._estimator_type == "regressor":
self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
elif estimator._estimator_type == "classifier":
probability_values = target_values[:, 1:]
target_values = target_values[:, 0]
self.verification = _Verification(active_values, target_values, precision, zeroThreshold)
self.verification.probability_values = probability_values
示例17
def plot_graphviz_tree(self, **kwargs):
"""
被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF, EMLFitType.E_FIT_REG))装饰,
即支持有监督学习回归和分类,绘制决策树或者core基于树的分类回归算法的决策示意图绘制,查看
学习器本身hasattr(fiter, 'tree_')是否有tree_属性,如果没有使用决策树替换
:param kwargs: 外部可以传递x, y, 通过
x = kwargs.pop('x', self.x)
y = kwargs.pop('y', self.y)
装饰器使用的fiter_type,
eg:
ttn_abu = AbuML.create_test_more_fiter()
ttn_abu.plot_graphviz_tree(fiter_type=ml.EMLFitType.E_FIT_CLF)
"""
x = kwargs.pop('x', self.x)
y = kwargs.pop('y', self.y)
fiter = self.get_fiter()
if not hasattr(fiter, 'tree_'):
self.log_func('{} not hasattr tree_, use decision tree replace'.format(
fiter.__class__.__name__))
if isinstance(fiter, ClassifierMixin):
# FIXME 最好不要使用ClassifierMixin判定学习器类型,因为限定了sklearn
fiter = self.estimator.decision_tree_classifier(assign=False)
elif isinstance(fiter, RegressorMixin):
# # FIXME 最好不要使用RegressorMixin, AbuMLCreater中引用了hmmlearn,xgboost等第三方库
fiter = self.estimator.decision_tree_regressor(assign=False)
else:
fiter = self.estimator.decision_tree_classifier(assign=False)
# 这里需要将self.df.columns做为名字传入
return ABuMLExecute.graphviz_tree(fiter, self.df.columns, x, y)
示例18
def _scoring_grid(estimator, scoring):
"""
只针对有监督学习过滤无监督学习,对scoring未赋予的情况根据
学习器分类器使用accuracy进行度量,回归器使用可释方差值explained_variance_score,
使用make_scorer对函数进行score封装
:param estimator: 学习器对象
:param scoring: 度量使用的方法,未赋予的情况根据
学习器分类器使用accuracy进行度量,回归器使用explained_variance_score进行度量
:return: scoring
"""
if not isinstance(estimator, (ClassifierMixin, RegressorMixin)):
logging.info('only support supervised learning')
# TODO 无监督学习的scoring度量以及GridSearchCV
return None
if scoring is None:
if isinstance(estimator, ClassifierMixin):
# 分类器使用accuracy
return 'accuracy'
elif isinstance(estimator, RegressorMixin):
# 回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
"""
make_scorer中通过greater_is_better对返回值进行正负分配
eg: sign = 1 if greater_is_better else -1
"""
return make_scorer(explained_variance_score, greater_is_better=True)
return None
return scoring
示例19
def __init__(self, metric='riemann', tsupdate=False,
clf=LogisticRegression()):
"""Init."""
self.metric = metric
self.tsupdate = tsupdate
self.clf = clf
if not isinstance(clf, ClassifierMixin):
raise TypeError('clf must be a ClassifierMixin')
TangentSpace(metric=self.metric, tsupdate=self.tsupdate)
示例20
def _get_blend_init(self, y_train, clf):
if self.stack_by_proba and hasattr(clf, 'predict_proba'):
width = self.n_classes_ - 1
elif hasattr(clf, 'predict') and isinstance(clf, ClassifierMixin):
width = self.n_classes_
elif hasattr(clf, 'predict'):
width = 1
elif hasattr(clf, 'n_components'):
width = clf.n_components
else:
raise Exception('Unimplemented for {0}'.format(type(clf)))
return np.zeros((y_train.size, width))
示例21
def __init__(self, estimator, context):
"""
:param estimator: Estimator to convert
:type estimator: BaseEstimator
:param context: context to work with
:type context: TransformationContext
"""
super(ClassifierConverter, self).__init__(estimator, context, ModelMode.CLASSIFICATION)
assert isinstance(estimator, ClassifierMixin), 'Classifier converter should only be applied to the classification models'
for f in context.schemas[Schema.OUTPUT]:
assert isinstance(f, CategoricalFeature), 'Only categorical outputs are supported for classification task'
# create hidden variables for each categorical output
internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
self.context.schemas[Schema.INTERNAL] = internal_schema
示例22
def __init__(self, estimator, context, mode):
super(DecisionTreeConverter, self).__init__(estimator, context, mode)
assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one-label trees are supported'
assert hasattr(estimator, 'tree_'), 'Estimator has no tree_ attribute'
if mode == ModelMode.CLASSIFICATION:
if isinstance(self.context.schemas[Schema.OUTPUT][0], CategoricalFeature):
self.prediction_output = self.OUTPUT_LABEL
else:
self.prediction_output = self.OUTPUT_PROBABILITY
assert isinstance(self.estimator, ClassifierMixin), \
'Only a classifier can be serialized in classification mode'
if mode == ModelMode.REGRESSION:
assert isinstance(self.context.schemas[Schema.OUTPUT][0], NumericFeature), \
'Only a numeric feature can be an output of regression'
assert isinstance(self.estimator, RegressorMixin), \
'Only a regressor can be serialized in regression mode'
assert estimator.tree_.value.shape[1] == len(self.context.schemas[Schema.OUTPUT]), \
'Tree outputs {} results while the schema specifies {} output fields'.format(
estimator.tree_.value.shape[1], len(self.context.schemas[Schema.OUTPUT]))
# create hidden variables for each categorical output
# TODO: this code is copied from the ClassifierConverter. To make things right, we need an abstract tree
# TODO: converter and subclasses for classifier and regression converters
internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
self.context.schemas[Schema.INTERNAL] = internal_schema
示例23
def test_sample_weight_kwargs():
"""Check that VotingClassifier passes sample_weight as kwargs"""
class MockClassifier(BaseEstimator, ClassifierMixin):
"""Mock Classifier to check that sample_weight is received as kwargs"""
def fit(self, X, y, *args, **sample_weight):
assert_true('sample_weight' in sample_weight)
clf = MockClassifier()
eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')
# Should not raise an error.
eclf.fit(X, y, sample_weight=np.ones((len(y),)))
示例24
def __init__(self, base_estimator, method="histogram", bins="auto",
interpolation=None, variable_width=False, cv=1):
"""Constructor.
Parameters
----------
* `base_estimator` [`ClassifierMixin`]:
The classifier whose output decision function needs to be
calibrated to offer more accurate predict_proba outputs. If
`cv=prefit`, the classifier must have been fit already on data.
* `method` [string]:
The method to use for calibration. Supported methods include
`"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
`"sigmoid"`.
* `bins` [int, default="auto"]:
The number of bins, if `method` is `"histogram"`.
* `interpolation` [string, optional]
Specifies the kind of interpolation between bins as a string
(`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`,
`"cubic"`), if `method` is `"histogram"`.
* `variable_dith_width` [boolean, optional]
If True use equal probability variable length bins, if
`method` is `"histogram"`.
* `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
If `"prefit"` is passed, it is assumed that base_estimator has been
fitted already and all data is used for calibration. If `cv=1`,
the training data is used for both training and calibration.
"""
self.base_estimator = base_estimator
self.method = method
self.bins = bins
self.interpolation = interpolation
self.variable_width = variable_width
self.cv = cv
示例25
def as_classifier(regressor):
"""Wrap a Scikit-Learn regressor into a binary classifier.
This function can be used to solve a binary classification problem as a
regression problem, where output labels {0,1} are treated as real values.
The wrapped regressor exhibits the classifier API, with the corresponding
`predict`, `predict_proba` and `score` methods.
Parameters
----------
* `regressor` [`RegressorMixin`]:
The regressor object.
Returns
-------
* `clf` [`ClassifierMixin`]:
The wrapped regressor, but with a classifier API.
"""
class Wrapper(BaseEstimator, ClassifierMixin):
def __init__(self, base_estimator):
self.base_estimator = base_estimator
def fit(self, X, y, **kwargs):
# Check inputs
X, y = check_X_y(X, y)
# Convert y
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y).astype(np.float)
if len(label_encoder.classes_) != 2:
raise ValueError
self.classes_ = label_encoder.classes_
# Fit regressor
self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs)
return self
def predict(self, X):
return np.where(self.predict_proba(X)[:, 1] >= 0.5,
self.classes_[1],
self.classes_[0])
def predict_proba(self, X):
X = check_array(X)
df = self.regressor_.predict(X)
df = np.clip(df, 0., 1.)
probas = np.zeros((len(X), 2))
probas[:, 0] = 1. - df
probas[:, 1] = df
return probas
def score(self, X, y):
return self.regressor_.score(X, y)
return Wrapper(regressor)
示例26
def evaluate(self, point):
"""
Fits model using the particular setting of hyperparameters and
evaluates the model validation data.
Parameters
----------
* `point`: dict
A mapping of parameter names to the corresponding values
Returns
-------
* `score`: float
Score (more is better!) for some specific point
"""
X_train, y_train, X_test, y_test = (
self.X_train, self.y_train, self.X_test, self.y_test)
# apply transformation to model parameters, for example exp transformation
point_mapped = {}
for param, val in point.items():
point_mapped[param] = self.space[param][1](val)
model_instance = self.model(**point_mapped)
if 'random_state' in model_instance.get_params():
model_instance.set_params(random_state=self.random_state)
min_obj_val = -5.0
# Infeasible parameters are expected to raise an exception, thus the try
# catch below, infeasible parameters yield assumed smallest objective.
try:
model_instance.fit(X_train, y_train)
if isinstance(model_instance, RegressorMixin): # r^2 metric
y_predicted = model_instance.predict(X_test)
score = r2_score(y_test, y_predicted)
elif isinstance(model_instance, ClassifierMixin): # log loss
y_predicted = model_instance.predict_proba(X_test)
score = -log_loss(y_test, y_predicted) # in the context of this function, the higher score is better
# avoid any kind of singularitites, eg probability being zero, and thus breaking the log_loss
if math.isnan(score):
score = min_obj_val
score = max(score, min_obj_val) # this is necessary to avoid -inf or NaN
except BaseException as ex:
score = min_obj_val # on error: return assumed smallest value of objective function
return score
# this is necessary to generate table for README in the end
示例27
def enumerate_pipeline_models(pipe, coor=None, vs=None):
"""
Enumerates all the models within a pipeline.
"""
if coor is None:
coor = (0,)
yield coor, pipe, vs
if hasattr(pipe, 'transformer_and_mapper_list') and len(
pipe.transformer_and_mapper_list):
# azureml DataTransformer
raise NotImplementedError("Unable to handle this specific case.")
elif hasattr(pipe, 'mapper') and pipe.mapper:
# azureml DataTransformer
for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)):
yield couple
elif hasattr(pipe, 'built_features'):
# sklearn_pandas.dataframe_mapper.DataFrameMapper
for i, (columns, transformers, _) in enumerate(pipe.built_features):
if isinstance(columns, str):
columns = (columns,)
if transformers is None:
yield (coor + (i,)), None, columns
else:
for couple in enumerate_pipeline_models(transformers,
coor + (i,),
columns):
yield couple
elif isinstance(pipe, Pipeline):
for i, (_, model) in enumerate(pipe.steps):
for couple in enumerate_pipeline_models(model, coor + (i,)):
yield couple
elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer):
for i, (_, fitted_transformer, column) in enumerate(pipe.transformers):
for couple in enumerate_pipeline_models(
fitted_transformer, coor + (i,), column):
yield couple
elif isinstance(pipe, FeatureUnion):
for i, (_, model) in enumerate(pipe.transformer_list):
for couple in enumerate_pipeline_models(model, coor + (i,)):
yield couple
elif TransformedTargetRegressor is not None and isinstance(
pipe, TransformedTargetRegressor):
raise NotImplementedError(
"Not yet implemented for TransformedTargetRegressor.")
elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)):
pass
elif isinstance(pipe, BaseEstimator):
pass
else:
raise TypeError(
"Parameter pipe is not a scikit-learn object: {}\n{}".format(
type(pipe), pipe))