Python源码示例:sklearn.utils.validation.check_X_y()

示例1
def fit(self, X, y):
        X, y = check_X_y(X, y,
                         accept_sparse=("csr", "csc", "coo"),
                         accept_large_sparse=True,
                         multi_output=True,
                         y_numeric=True)
        if sp.issparse(X):
            if X.getformat() == "coo":
                if X.row.dtype == "int64" or X.col.dtype == "int64":
                    raise ValueError(
                        "Estimator doesn't support 64-bit indices")
            elif X.getformat() in ["csc", "csr"]:
                if X.indices.dtype == "int64" or X.indptr.dtype == "int64":
                    raise ValueError(
                        "Estimator doesn't support 64-bit indices")

        return self 
示例2
def fit(self, x, y):
        check_classification_targets(y)
#        x, y = check_X_y(x, y, y_numeric=True)
        x, y = check_X_y(x, y)
        x_p, x_u = x[y == +1, :], x[y == 0, :]
        n_p, n_u = x_p.shape[0], x_u.shape[0]

        if self.basis == 'gauss':
            b = np.minimum(n_u, self.n_basis)
            center_index = np.random.permutation(n_u)[:b]
            self._x_c = x_u[center_index, :]
        elif self.basis == 'lm':
            b = x_p.shape[1] + 1
        else:
            raise ValueError('Invalid basis type: {}.'.format(basis))

        k_p, k_u = self._ker(x_p), self._ker(x_u)

        H = k_u.T.dot(k_u)/n_u
        h = 2*self.prior*np.mean(k_p, axis=0) - np.mean(k_u, axis=0)
        R = self.lam*np.eye(b)
        self.coef_ = sp.linalg.solve(H + R, h)

        return self 
示例3
def fit(self, x, y):
        check_classification_targets(y)
        x, y = check_X_y(x, y)
        x_s, x_u = x[y == +1, :], x[y == 0, :]
        n_s, n_u = len(x_s), len(x_u)

        p_p = self.prior
        p_n = 1 - self.prior
        p_s = p_p ** 2 + p_n ** 2
        k_s = self._basis(x_s)
        k_u = self._basis(x_u)
        d = k_u.shape[1]

        """
        Note that `2 *` is needed for `b` while this coefficient does not seem
        appear in the original paper at a glance.
        This is because `k_s.T.mean` takes mean over `2 * n_s` entries,
        while the division is taken with `n_s` in the original paper.
        """
        A = (p_p - p_n) / n_u * (k_u.T.dot(k_u) + 2 * self.lam * n_u * np.eye(d))
        b = 2 * p_s * k_s.T.mean(axis=1) - k_u.T.mean(axis=1)
        self.coef_ = np.linalg.solve(A, b)

        return self 
示例4
def fit(self, X, y):
        # Check that X and y have correct shape
        # if isinstance(y, (pd.DataFrame, pd.Serise)):
        #     y = y.values
        X, y = check_X_y(X, y, accept_sparse=True)

        def pr(X, y_i, y):
            p = X[y == y_i].sum(0)
            return (p+1) / ((y == y_i).sum()+1)

        self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y)))
        X_nb = X.multiply(self._r)
        self._clf = LogisticRegression(
            C=self.C,
            dual=self.dual,
            n_jobs=self.n_jobs
        ).fit(X_nb, y)
        return self 
示例5
def fit(self, X, y):
        # Check that X and y have correct shape
        y = y.values
        X, y = check_X_y(X, y, accept_sparse=True)

        def pr(X, y_i, y):
            p = X[y == y_i].sum(0)
            return (p+1) / ((y == y_i).sum()+1)

        self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y)))
        X_nb = X.multiply(self._r)
        self._clf = LogisticRegression(
            C=self.C,
            dual=self.dual,
            n_jobs=self.n_jobs
        ).fit(X_nb, y)
        return self 
示例6
def fit(self, x, y):

        x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False)  # boilerplate

        x, y, X_offset, y_offset, X_scale = self._preprocess_data(
            x, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X
        )

        fh, vf, ve, sigma = jmap(
            y, x, self.ae0, self.be0, self.af0, self.bf0, max_iter=self.max_iter, tol=self.tol
        )
        self.X_offset_ = X_offset
        self.X_scale_ = X_scale

        self.sigma_ = sigma
        self.ve_ = ve
        self.vf_ = vf
        self.coef_ = fh
        self.alpha_ = 1.0 / np.mean(ve)
        self.lambda_ = 1.0 / np.mean(vf)
        self.std_intercept_, self.std_coef_ = scale_sigma(self, X_offset, X_scale)
        self._set_intercept(X_offset, y_offset, X_scale)
        return self 
示例7
def fit(self, x, y, sample_weight=None):
        x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False)

        x, y, X_offset, y_offset, X_scale = self._preprocess_data(
            x,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=sample_weight,
        )

        if sample_weight is not None:
            x, y = _rescale_data(x, y, sample_weight)

        self.coef_ = sparse_group_lasso(
            x, y, self.alpha, self.rho, self.groups, max_iter=self.max_iter, rtol=self.tol
        )

        self._set_intercept(X_offset, y_offset, X_scale)
        return self 
示例8
def fit(self, x_, y, sample_weight=None):
        n_samples, n_features = x_.shape

        X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False)

        x, y, X_offset, y_offset, X_scale = self._preprocess_data(
            x_,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=None,
        )

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            x, y = _rescale_data(x, y, sample_weight)

        coefs, intercept = fit_with_noise(x, y, self.sigma, self.alpha, self.n)
        self.intercept_ = intercept
        self.coef_ = coefs
        self._set_intercept(X_offset, y_offset, X_scale)
        return self 
示例9
def validate_inputs(self, X, y):
        # Things we don't want to allow until we've tested them:
        # - Sparse inputs
        # - Multiclass outputs (e.g., more than 2 classes in `y`)
        # - Non-finite inputs
        # - Complex inputs

        X, y = check_X_y(X, y, accept_sparse=False, allow_nd=False)

        assert_all_finite(X, y)

        if type_of_target(y) != 'binary':
            raise ValueError("Non-binary targets not supported")

        if np.any(np.iscomplex(X)) or np.any(np.iscomplex(y)):
            raise ValueError("Complex data not supported")
        if np.issubdtype(X.dtype, np.object_) or np.issubdtype(y.dtype, np.object_):
            try:
                X = X.astype(float)
                y = y.astype(int)
            except (TypeError, ValueError):
                raise ValueError("argument must be a string.* number")

        return (X, y) 
示例10
def __init__(self, X, y, criterion, min_samples_split, max_depth,
                 n_val_sample, random_state):
        # make sure max_depth > 1
        if max_depth < 2:
            raise ValueError("max depth must be > 1")

        # check the input arrays, and if it's classification validate the
        # target values in y
        X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True)
        if is_classifier(self):
            check_classification_targets(y)

        # hyper parameters so we can later inspect attributes of the model
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.n_val_sample = n_val_sample
        self.random_state = random_state

        # create the splitting class
        random_state = check_random_state(random_state)
        self.splitter = RandomSplitter(random_state, criterion, n_val_sample)

        # grow the tree depth first
        self.tree = self._find_next_split(X, y, 0) 
示例11
def _check_X_y(self, X, y):

        # helpful error message for sklearn < 1.17
        is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2

        if is_2d or type_of_target(y) != 'binary':
            raise TypeError("Only binary targets supported. For training "
                            "multiclass or multilabel models, you may use the "
                            "OneVsRest or OneVsAll metaestimators in "
                            "scikit-learn.")

        X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
                         multi_output=False)

        self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
        y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
        return X, y 
示例12
def fit(self, X, y):
        """A reference implementation of a fitting function.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_X_y(X, y, accept_sparse=True)
        self.is_fitted_ = True
        # `fit` should always return `self`
        return self 
示例13
def fit(self, X, y):
        """A reference implementation of a fitting function for a classifier.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,)
            The target values. An array of int.

        Returns
        -------
        self : object
            Returns self.
        """
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y
        # Return the classifier
        return self 
示例14
def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
            Data used to fit the model.

        y : array of shape (n_samples)
            class labels of each example in X.

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_X_y(X, y)
        super(Oracle, self).fit(X, y)
        return self 
示例15
def fit(self, X, Y):
        
        """Fit the model according to the given training data
        
        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Matrix of the examples, where
            n_samples is the number of samples and
            n_feature is the number of features
        
        Y : array-like, shape = [n_samples]
            array of the labels relative to X
        
        Returns
        -------
        self : object
            Returns self
        """
        X,Y = validation.check_X_y(X, Y, dtype=np.float64, order='C', accept_sparse='csr')
        #check_consistent_length(X,Y)
        check_classification_targets(Y)
        
        self.classes_ = np.unique(Y)
        if len(self.classes_) < 2:
            raise ValueError("The number of classes has to be almost 2; got ", len(self.classes_))
        
        if len(self.classes_) == 2:
            self.multiclass_ = False
            return self._fit(X,Y)
        else :
            self.multiclass_ = True
            if self.multiclass_strategy == 'ovo':
                return self._one_vs_one(X,Y)
            else :
                return self._one_vs_rest(X,Y)
        raise ValueError('This is a very bad exception...') 
示例16
def fit(self, X, Y):
        """ Train eigenpro classification model

        Parameters
        ----------
        X : {float, array}, shape = [n_samples, n_raw_feature]
            The raw input feature matrix.

        Y : {float, array}, shape =[n_samples]
            The labels corresponding to the features of X.

        Returns
        -------
        self : returns an instance of self.
       """
        X, Y = check_X_y(
            X,
            Y,
            dtype=np.float32,
            force_all_finite=True,
            multi_output=False,
            ensure_min_samples=3,
        )
        check_classification_targets(Y)
        self.classes_ = np.unique(Y)

        loc = {}
        for ind, label in enumerate(self.classes_):
            loc[label] = ind

        class_matrix = np.zeros((Y.shape[0], self.classes_.shape[0]))

        for ind, label in enumerate(Y):
            class_matrix[ind, loc[label]] = 1
        self._raw_fit(X, class_matrix)
        return self 
示例17
def check_estimation_input(X, y, is_classification=False):
    """Check input arrays.

    This function is adapted from sklearn.utils.validation.

    Parameters
    ----------
    X : nd-array or list
        Input data.
    y : nd-array, list
        Labels.
    is_classification : boolean (default=`False`)
        Wether the data is used for classification or regression tasks.

    Returns
    -------
    X : object
        The converted and validated `X`.
    y : object
        The converted and validated `y`.

    """
    if is_classification:
        X, y = check_X_y(X, y)
    else:
        X, y = check_X_y(X, y, dtype=np.float64)

    # TODO accept_sparse="csc"
    X = check_array(X, ensure_2d=True, dtype=np.float64)
    y = check_array(y, ensure_2d=False, dtype=None)

    if is_classification:
        check_classification_targets(y)

    y = np.atleast_1d(y)

    if y.ndim == 1:
        y = np.reshape(y, (-1, 1))

    return X, y 
示例18
def check_X_y(X, y, *args, **kwargs):
        return X, y 
示例19
def fit(self, X, y):
        """Fit coefficient vector."""
        X, y = check_X_y(X, y, dtype='float64')
        y = np.asarray(y, dtype=X.dtype)

        O = np.linalg.lstsq(X, y, rcond=None)

        self.coef_ = O[0] + self.offset
        self.resid_ = O[1]

        return self 
示例20
def fit(self, X, y):
        """Fit one model per label."""
        X, y = check_X_y(X, y, accept_sparse=False)

        self.labels_ = np.unique(y)

        models = []
        for label in self.labels_:
            labels = y == label
            models.append(OLS().fit(X, labels))

        self._models_ = models
        self.coef_ = np.vstack([l.coef_ for l in self._models_])

        return self 
示例21
def fit(self, X, y, *args, **kwargs):
        X, y = check_X_y(X, y)
        return super(Tmp, self).fit(X, y, *args, **kwargs) 
示例22
def fit_transform(self, X, y, *args, **kwargs):
        X, y = check_X_y(X, y)
        return super(Tmp, self).fit_transform(X, y, *args, **kwargs) 
示例23
def fit_transform(self, X, y, *args, **kwargs):
        X, y = check_X_y(X, y)
        y = np.asarray(y, X.dtype)
        return super(Tmp, self).fit_transform(X, y, *args, **kwargs) 
示例24
def fit(self, X, y, *args, **kwargs):
        X,y = check_X_y(X, y)
        return super(Tmp, self).fit(X, y, *args, **kwargs) 
示例25
def fit_transform(self, X, y, *args, **kwargs):
        X,y = check_X_y(X, y)
        return super(Tmp, self).fit_transform(X, y, *args, **kwargs) 
示例26
def setUp(self):
        # Define data file and read X and y
        # Generate some data if the source data is missing
        this_directory = path.abspath(path.dirname(__file__))
        mat_file = 'cardio.mat'
        try:
            mat = loadmat(path.join(*[this_directory, 'data', mat_file]))

        except TypeError:
            print('{data_file} does not exist. Use generated data'.format(
                data_file=mat_file))
            X, y = generate_data(train_only=True)  # load data
        except IOError:
            print('{data_file} does not exist. Use generated data'.format(
                data_file=mat_file))
            X, y = generate_data(train_only=True)  # load data
        else:
            X = mat['X']
            y = mat['y'].ravel()
            X, y = check_X_y(X, y)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=42)

        self.detector_list = [LOF(), LOF()]
        self.clf = LSCP(self.detector_list)
        self.clf.fit(self.X_train)
        self.roc_floor = 0.6 
示例27
def setUp(self):
        # Define data file and read X and y
        # Generate some data if the source data is missing
        this_directory = path.abspath(path.dirname(__file__))
        mat_file = 'pima.mat'
        try:
            mat = loadmat(path.join(*[this_directory, 'data', mat_file]))

        except TypeError:
            print('{data_file} does not exist. Use generated data'.format(
                data_file=mat_file))
            X, y = generate_data(train_only=True)  # load data
        except IOError:
            print('{data_file} does not exist. Use generated data'.format(
                data_file=mat_file))
            X, y = generate_data(train_only=True)  # load data
        else:
            X = mat['X']
            y = mat['y'].ravel()
            X, y = check_X_y(X, y)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=42)

        self.clf = XGBOD(random_state=42)
        self.clf.fit(self.X_train, self.y_train)

        self.roc_floor = 0.75 
示例28
def fit(self, X, y=None):
        X, y = check_X_y(X, y)
        return self 
示例29
def fit(self, X, y=None):
        self.wrong_attribute = 0
        X, y = check_X_y(X, y)
        return self 
示例30
def fit(self, X, y=None):
        self.wrong_attribute = 1
        X, y = check_X_y(X, y)
        return self