Python源码示例:sklearn.pipeline.make_union()
示例1
def test_missing_indicator_with_imputer(X, missing_values, X_trans_exp):
trans = make_union(
SimpleImputer(missing_values=missing_values, strategy='most_frequent'),
MissingIndicator(missing_values=missing_values)
)
X_trans = trans.fit_transform(X)
assert_array_equal(X_trans, X_trans_exp)
示例2
def test_make_union():
pca = PCA(svd_solver='full')
mock = Transf()
fu = make_union(pca, mock)
names, transformers = zip(*fu.transformer_list)
assert_equal(names, ("pca", "transf"))
assert_equal(transformers, (pca, mock))
示例3
def test_make_union_kwargs():
pca = PCA(svd_solver='full')
mock = Transf()
fu = make_union(pca, mock, n_jobs=3)
assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
assert_equal(3, fu.n_jobs)
# invalid keyword parameters should raise an error message
assert_raise_message(
TypeError,
'Unknown keyword arguments: "transformer_weights"',
make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1}
)
示例4
def main():
vectorizer = make_union(
on_field('name', Tfidf(max_features=100000, token_pattern='\w+')),
on_field('text', Tfidf(max_features=100000, token_pattern='\w+', ngram_range=(1, 2))),
on_field(['shipping', 'item_condition_id'],
FunctionTransformer(to_records, validate=False), DictVectorizer()),
n_jobs=4)
y_scaler = StandardScaler()
with timer('process train'):
train = pd.read_table('../input/train.tsv')
train = train[train['price'] > 0].reset_index(drop=True)
cv = KFold(n_splits=20, shuffle=True, random_state=42)
train_ids, valid_ids = next(cv.split(train))
train, valid = train.iloc[train_ids], train.iloc[valid_ids]
y_train = y_scaler.fit_transform(np.log1p(train['price'].values.reshape(-1, 1)))
X_train = vectorizer.fit_transform(preprocess(train)).astype(np.float32)
print(f'X_train: {X_train.shape} of {X_train.dtype}')
del train
with timer('process valid'):
X_valid = vectorizer.transform(preprocess(valid)).astype(np.float32)
with ThreadPool(processes=4) as pool:
Xb_train, Xb_valid = [x.astype(np.bool).astype(np.float32) for x in [X_train, X_valid]]
xs = [[Xb_train, Xb_valid], [X_train, X_valid]] * 2
y_pred = np.mean(pool.map(partial(fit_predict, y_train=y_train), xs), axis=0)
y_pred = np.expm1(y_scaler.inverse_transform(y_pred.reshape(-1, 1))[:, 0])
print('Valid RMSLE: {:.4f}'.format(np.sqrt(mean_squared_log_error(valid['price'], y_pred))))
示例5
def __init__(self, training_values=None, training_targets=None):
self.vectorizer = make_union(TfidfVectorizer(), PostTransformer())
# Set using parameter_search. TODO: review after updating
# corpus.
self.classifier = svm.LinearSVC(C=1, loss='squared_hinge', multi_class='ovr', class_weight='balanced', tol=1e-6)
if training_values is not None and training_targets is not None:
self.fit(training_values, training_targets)
示例6
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.pipeline.Pipeline, pipeline.Pipeline)
self.assertIs(df.pipeline.FeatureUnion, pipeline.FeatureUnion)
self.assertIs(df.pipeline.make_pipeline, pipeline.make_pipeline)
self.assertIs(df.pipeline.make_union, pipeline.make_union)
示例7
def test_make_union():
pca = PCA(svd_solver='full')
mock = Transf()
fu = make_union(pca, mock)
names, transformers = zip(*fu.transformer_list)
assert_equal(names, ("pca", "transf"))
assert_equal(transformers, (pca, mock))
示例8
def test_make_union_kwargs():
pca = PCA(svd_solver='full')
mock = Transf()
fu = make_union(pca, mock, n_jobs=3)
assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
assert_equal(3, fu.n_jobs)
# invalid keyword parameters should raise an error message
assert_raise_message(
TypeError,
'Unknown keyword arguments: "transformer_weights"',
make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1}
)