Python源码示例:sklearn.grid.GridSearchCV()

示例1
def grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__stop_words=[None, "english"],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf 
示例2
def __grid_search_model(clf_factory, X, Y):
    cv = ShuffleSplit(
        n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)

    param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
                      vect__min_df=[1, 2],
                      vect__smooth_idf=[False, True],
                      vect__use_idf=[False, True],
                      vect__sublinear_tf=[False, True],
                      vect__binary=[False, True],
                      clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
                      )

    grid_search = GridSearchCV(clf_factory(),
                               param_grid=param_grid,
                               cv=cv,
                               score_func=f1_score,
                               verbose=10)
    grid_search.fit(X, Y)
    clf = grid_search.best_estimator_
    print clf

    return clf 
示例3
def nestedCrossValidation(X, y, cvFolds, estimator):  
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0
    param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]}
    r2 = np.zeros((cvFolds,1))   
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error')
        grid.fit(train_X,train_y)
        y_true, y_pred = test_y,grid.best_estimator_.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1 
    return r2
    
#%% main script 
示例4
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_features': ['sqrt', 'log2', None],
                             'max_depth': range(2,1000),
                             }
                            ]


        reg = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_true, y_pred) 
示例5
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'alpha': np.logspace(-5,5)
                             }
                            ]


        reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true) 
示例6
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true) 
示例7
def parameterChoosing(self):
        #Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestRegressor(n_estimators=30), tuned_parameters, cv=5, scoring='mean_squared_error')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print mean_squared_error(y_true, y_pred) 
示例8
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'kernel': ['rbf'],
                             'gamma': np.logspace(-4, 3, 30),
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                             {'kernel': ['poly'],
                              'degree': [1, 2, 3, 4],
                              'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
                              'coef0': np.logspace(-4, 3, 30)},
                            {'kernel': ['linear'],
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

        clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
示例9
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'penalty': ['l1'],
                             'C': np.logspace(-5,5)},
                             {'penalty': ['l2'],
                              'C': np.logspace(-5,5)}]

        clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
示例10
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,60)
                             }
                            ]


        clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
示例11
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(2,60),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(DecisionTreeClassifier(max_depth=5), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred) 
示例12
def test_same_result(self):
        X, y, Z = self.make_classification(2, 40000, nonnegative=True)

        parameters = {'alpha': [0.1, 1, 10]}
        fit_params = {'classes': np.unique(y)}

        local_estimator = MultinomialNB()
        local_grid = GridSearchCV(estimator=local_estimator,
                                  param_grid=parameters)

        estimator = SparkMultinomialNB()
        grid = SparkGridSearchCV(estimator=estimator,
                                 param_grid=parameters,
                                 fit_params=fit_params)

        local_grid.fit(X, y)
        grid.fit(Z)

        locscores = [r.mean_validation_score for r in local_grid.grid_scores_]
        scores = [r.mean_validation_score for r in grid.grid_scores_]

        assert_array_almost_equal(locscores, scores, decimal=2) 
示例13
def compute_svm_score_nestedCV(K, y, n_folds,
                               scoring=balanced_accuracy_scoring,
                               random_state=None,
                               param_grid=[{'C': np.logspace(-5, 5, 25)}]):
    """Compute cross-validated score of SVM using precomputed kernel.
    """
    cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
                         random_state=random_state)
    scores = np.zeros(n_folds)
    for i, (train, test) in enumerate(cv):
        cvclf = SVC(kernel='precomputed')
        y_train = y[train]
        cvcv = StratifiedKFold(y_train, n_folds=n_folds,
                               shuffle=True,
                               random_state=random_state)
        clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring,
                           cv=cvcv, n_jobs=1)
        clf.fit(K[train, :][:, train], y_train)
        # print clf.best_params_
        scores[i] = clf.score(K[test, :][:, train], y[test])

    return scores.mean() 
示例14
def test_cv_pipeline(self):
        pipeline = SKL_Pipeline([
            ('vect', SKL_HashingVectorizer(n_features=20)),
            ('tfidf', SKL_TfidfTransformer(use_idf=False)),
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }
        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        df = self.sql.createDataFrame(data, ["review", "rating"]).toPandas()
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha']) 
示例15
def fit(self, X, y, featurename=[]):
        self.dim_ = X.shape[1]
        self.setfeaturename(featurename)
        self.setdefaultpred(y)
        param_grid = {"max_depth": self.max_depth_, "min_samples_leaf": self.min_samples_leaf_}
        if self.modeltype_ == 'regression':
            mdl = tree.DecisionTreeRegressor()
        elif self.modeltype_ == 'classification':
            mdl = tree.DecisionTreeClassifier()
        grid_search = GridSearchCV(mdl, param_grid=param_grid, cv=self.cv_)
        grid_search.fit(X, y)
        mdl = grid_search.best_estimator_
        self.__parseTree(mdl)
        self.weight_ = np.ones(len(self.rule_)) 
示例16
def main():

    import sys
    import numpy as np
    from sklearn import cross_validation
    from sklearn import svm
    import cPickle

    data_dir = sys.argv[1]

    fet_list = load_list(osp.join(data_dir, 'c3d.list'))
    pos_list = load_list(osp.join(data_dir, 'pos.urls'))

    features = np.load(osp.join(data_dir, 'c3d.npy'))
    fet_set = set(fet_list)

    pos_idx = [fet_list.index(i) for i in pos_list if i in fet_set]

    y = np.zeros(features.shape[0])
    y[pos_idx] = 1

    print 'n_pos', np.sum(y), 'n_neg', np.sum(1 - y)

    params = {'n_estimators':[2, 4, 5, 6, 8, 10, 30]}
    #params = {'n_estimators':[50, 70, 100, 120, 150, 200]}
    clf = grid_search.GridSearchCV(RandomForestClassifier(n_estimators = 2, n_jobs = 4), params, scoring = metrics.make_scorer(lambda yt, yp: metrics.f1_score(yt, yp, pos_label = 0)), cv = 5)
    clf.fit(features, y)
    print clf.best_score_
    print clf.best_estimator_
    cPickle.dump(clf.best_estimator_, open(osp.join(data_dir, 'c3d-models-rfc.pkl'), 'w')) 
示例17
def testIrisDNN(self):
    if HAS_SKLEARN:
      random.seed(42)
      iris = datasets.load_iris()
      feature_columns = learn.infer_real_valued_columns_from_input(iris.data)
      classifier = learn.DNNClassifier(
          feature_columns=feature_columns, hidden_units=[10, 20, 10],
          n_classes=3)
      grid_search = GridSearchCV(classifier,
                                 {'hidden_units': [[5, 5], [10, 10]]},
                                 scoring='accuracy',
                                 fit_params={'steps': [50]})
      grid_search.fit(iris.data, iris.target)
      score = accuracy_score(iris.target, grid_search.predict(iris.data))
      self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score)) 
示例18
def plot_kernel_density(col, verbose=True): 
    """Plots kernel density function of column

    From: 
    https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/

    Parameters
    ----------
    col : np.ndarray
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    #address pass entire matrix
    # TODO respect missing_val
    # TODO what does n do?
    col = utils.check_col(col)
    x_grid = np.linspace(min(col), max(col), 1000)

    grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.1,1.0,30)}, cv=20) # 20-fold cross-validation
    grid.fit(col[:, None])

    kde = grid.best_estimator_
    pdf = np.exp(kde.score_samples(x_grid[:, None]))

    fig, ax = plt.subplots()
    #fig = plt.figure()
    ax.plot(x_grid, pdf, linewidth=3, alpha=0.5, label='bw=%.2f' % kde.bandwidth)
    ax.hist(col, 30, fc='gray', histtype='stepfilled', alpha=0.3, normed=True)
    ax.legend(loc='upper left')
    ax.set_xlim(min(col), max(col))
    if verbose:
        plt.show()
    return fig 
示例19
def runGridSearch(self, model):
        logging.debug("run grid search on model: {}".format(model.__class__.__name__))
        logging.debug("cross validation strategy: {}".format(model.holdout_split))
        logging.debug("used features: {}".format(model.usedFeatures))
        logging.debug("tuned parameters: {}".format(model.getTunedParamterOptions()))
        
        features,labels,cv = model.getFeaturesLabel()
        # do grid search
        if self.do_random_gridsearch:
            estimator = RandomizedSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv, n_jobs=self.n_jobs,
                       scoring=mean_absolute_percentage_error_scoring, verbose = 500, n_iter=self.n_iter_randomsearch)
        else:
            estimator = GridSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv,n_jobs=-self.n_jobs, 
                                     fit_params=model.get_fit_params(),
                       scoring=mean_absolute_percentage_error_scoring, verbose = 500)
        estimator.fit(features, labels)
        model.clf = estimator.best_estimator_
        model.save_final_model = True
        model.save_model()
        
#         model.dispFeatureImportance()
        logging.debug('estimaator parameters: {}'.format(estimator.get_params))
        logging.debug('Best parameters: {}'.format(estimator.best_params_))
        logging.debug('Best Scores: {}'.format(-estimator.best_score_))
        logging.debug('Score grid: {}'.format(estimator.grid_scores_ ))
        for i in estimator.grid_scores_ :
            logging.debug('parameters: {}'.format(i.parameters ))
            logging.debug('mean_validation_score: {}'.format(np.absolute(i.mean_validation_score)))
            logging.debug('cv_validation_scores: {}'.format(np.absolute(i.cv_validation_scores) ))

        
        
        return 
示例20
def GBR_stacking(y_train, X_train, X_test):
    param_grid = {'learning_rate': [0.1, 0.05, 0.01],
                  'max_depth': [2, 3, 4, 5],  # [2, 3, 4, 6],
                  'min_samples_leaf': [1, 2, 3],  # ,5, 7],
                  'max_features': [1.0, 0.5, 0.3, 0.1]}

    est = en.GradientBoostingRegressor(loss='ls', n_estimators=100)
    clf = GridSearchCV(est, param_grid, n_jobs=3, verbose=1, cv=20, scoring=spearman_scoring).fit(X_train, y_train.flatten())
    # clf.fit(X_train, y_train.flatten())
    return clf.predict(X_test) 
示例21
def SVM_stacking(y_train, X_train, X_test):
    parameters = {'kernel': ('linear', 'rbf'), 'C': np.linspace(1, 10, 10), 'gamma': np.linspace(1e-3, 1., 10)}
    svr = svm.SVR()
    clf = GridSearchCV(svr, parameters, n_jobs=3, verbose=1, cv=10, scoring=spearman_scoring)
    clf.fit(X_train, y_train.flatten())
    return clf.predict(X_test) 
示例22
def shrunk_cov_score(X):
    shrinkages = np.logspace(-2, 0, 30)
    cv = GridSearchCV(ShrunkCovariance(), {'shrinkage': shrinkages})
    return np.mean(cross_val_score(cv.fit(X).best_estimator_, X)) 
示例23
def create_classif_search(name_clf, clf_pipeline, nb_labels,
                          search_type='random', cross_val=10,
                          eval_metric='f1', nb_iter=250, nb_workers=5):
    """ create sklearn search depending on spec. random or grid

    :param int nb_labels: number of labels
    :param str search_type: hyper-params search type
    :param str eval_metric: evaluation metric
    :param int nb_iter: for random number of tries
    :param str name_clf: name of classif.
    :param obj clf_pipeline: object
    :param obj cross_val: obj specific CV for fix train-test
    :param int nb_workers: number jobs running in parallel
    :return:
    """
    score_weight = 'weighted' if nb_labels > 2 else 'binary'
    scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()],
                                  average=score_weight)
    if search_type == 'grid':
        clf_parameters = create_clf_param_search_grid(name_clf)
        logging.info('init Grid search...')
        clf_search = GridSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, verbose=1, refit=True)
    else:
        clf_parameters = create_clf_param_search_distrib(name_clf)
        nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
        logging.info('init Randomized search...')
        clf_search = RandomizedSearchCV(
            clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
            n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True)
    return clf_search 
示例24
def perform_class(X, y, iterations=1):
    scores = []
    for i in range(iterations):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations)
        parameters = {'C':[0.01, 0.1, 1, 10, 100]}
        clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy')
        clf_acc.fit(X_train, y_train)
        scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')])
    acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores])
    mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores])
    return acc, mif 
示例25
def perform_class(X, y, iterations=1):
    scores = []
    for i in range(iterations):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42+iterations)
        parameters = {'C':[0.01, 0.1, 1, 10, 100]}
        clf_acc = GridSearchCV(svm.LinearSVC(), parameters, n_jobs=3, cv=3, refit=True, scoring = 'accuracy')
        clf_acc.fit(X_train, y_train)
        scores.append([metrics.accuracy_score(y_test, clf_acc.predict(X_test)), metrics.f1_score(y_test, clf_acc.predict(X_test),average='micro')])
    acc = np.mean([x[0] for x in scores]), np.std([x[0] for x in scores])
    mif = np.mean([x[1] for x in scores]), np.std([x[1] for x in scores])
    return acc, mif 
示例26
def __init__(self, mode = 'adaboost'):
                
		if mode == 'adaboost':
			clf = GradientBoostingRegressor(
				learning_rate = 1,
				n_estimators = 1000,
				max_depth = 3,
				random_state = 0)
                elif mode == 'randomforest':
                        clf = RandomForestRegressor(
                                n_estimators = 10,
                                max_depth = None,
                                n_jobs = -1)  
		elif mode == 'SVM':
			clf = SVC(C = 10.0, 
				kernel = 'linear',
				)
		elif mode == 'vjcascade':
			clf = vjcascade(n_stage=30,
				n_esti = 1,
				l_rate = 1)
		elif mode == 'gridSearch':
			param_grid = [
			{'max_depth': [1, 2, 3], 'loss': ['ls', 'lad']},
			]
			gbr = GradientBoostingRegressor()
			clf = grid_search.GridSearchCV(gbr, param_grid, n_jobs = -1)
		else:
			raise Exception('no mode named: '+mode+' found!')

                self.classifier = clf
                self.mode = mode 
示例27
def test_GridGlobalParams(self):
        clf = GridSearchCV(
                    self.__estimator__(layers=[L(self.__output__)], n_iter=1),
                    param_grid={'learning_rate': [0.01, 0.001]})
        clf.fit(self.a_in, self.a_out) 
示例28
def test_GridLayerParams(self):
        clf = GridSearchCV(
                    self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1),
                    param_grid={'hidden0__units': [4, 8, 12]})
        clf.fit(self.a_in, self.a_out) 
示例29
def __init__(self, *models, **kwargs):
        """ Initializes the grid search

            :param list models: List of models to use. Each one should be a tuple
             with a model instance or class and a dictionary for the search space.
            :param kwargs: addition initialization arguments
             for `sklearn.grid_search.GridSearchCV`
        """
        self.models = filter(None, models)
        kwargs['refit'] = True
        self.kwargs = kwargs 
示例30
def fit(self, training_sets):
        """ Searches for the best estimator and its arguments as well as the best
            training set amongst those specified.

            :param generator training_sets: Training set to use. Should be a sequence
             of tuples (x, y, metadata) where x is the training set, y is the
             correct answer for each chunk and metadata contains additional data that will
             be returned back
            :return: the metadata of the training set which yielded the best score,
             the best score obtained by the model, parameters of the model and
             fitted model itself
            :rtype: tuple
        """
        best_training, best_score, best_params, best_model = None, None, None, None
        for i, (metadata, extractor) in enumerate(training_sets):
            for model, grid in self.models:
                assert isclass(model)

                x, y = extractor.get_features(refit=True)

                grid['model_cls'] = [model]
                grid['selector_column'] = [None, extractor.lu_column()]

                search = GridSearchCV(
                    FeatureSelectedClassifier(model), param_grid=grid, **self.kwargs
                )
                search.fit(x, y)

                score, params, model = search.best_score_, search.best_params_, search.best_estimator_
                logger.debug('%s with parameters %s and training meta %s has score %s',
                             type(model), params, metadata, score)
                if best_score is None or score > best_score:
                    best_training, best_score, best_params, best_model = (x, y, metadata), score, params, model

        return best_training, best_score, best_params, best_model


# needs to be pickleable and callable