Python源码示例:sklearn.utils.validation.check_X_y()
示例1
def fit(self, X, y):
X, y = check_X_y(X, y,
accept_sparse=("csr", "csc", "coo"),
accept_large_sparse=True,
multi_output=True,
y_numeric=True)
if sp.issparse(X):
if X.getformat() == "coo":
if X.row.dtype == "int64" or X.col.dtype == "int64":
raise ValueError(
"Estimator doesn't support 64-bit indices")
elif X.getformat() in ["csc", "csr"]:
if X.indices.dtype == "int64" or X.indptr.dtype == "int64":
raise ValueError(
"Estimator doesn't support 64-bit indices")
return self
示例2
def fit(self, x, y):
check_classification_targets(y)
# x, y = check_X_y(x, y, y_numeric=True)
x, y = check_X_y(x, y)
x_p, x_u = x[y == +1, :], x[y == 0, :]
n_p, n_u = x_p.shape[0], x_u.shape[0]
if self.basis == 'gauss':
b = np.minimum(n_u, self.n_basis)
center_index = np.random.permutation(n_u)[:b]
self._x_c = x_u[center_index, :]
elif self.basis == 'lm':
b = x_p.shape[1] + 1
else:
raise ValueError('Invalid basis type: {}.'.format(basis))
k_p, k_u = self._ker(x_p), self._ker(x_u)
H = k_u.T.dot(k_u)/n_u
h = 2*self.prior*np.mean(k_p, axis=0) - np.mean(k_u, axis=0)
R = self.lam*np.eye(b)
self.coef_ = sp.linalg.solve(H + R, h)
return self
示例3
def fit(self, x, y):
check_classification_targets(y)
x, y = check_X_y(x, y)
x_s, x_u = x[y == +1, :], x[y == 0, :]
n_s, n_u = len(x_s), len(x_u)
p_p = self.prior
p_n = 1 - self.prior
p_s = p_p ** 2 + p_n ** 2
k_s = self._basis(x_s)
k_u = self._basis(x_u)
d = k_u.shape[1]
"""
Note that `2 *` is needed for `b` while this coefficient does not seem
appear in the original paper at a glance.
This is because `k_s.T.mean` takes mean over `2 * n_s` entries,
while the division is taken with `n_s` in the original paper.
"""
A = (p_p - p_n) / n_u * (k_u.T.dot(k_u) + 2 * self.lam * n_u * np.eye(d))
b = 2 * p_s * k_s.T.mean(axis=1) - k_u.T.mean(axis=1)
self.coef_ = np.linalg.solve(A, b)
return self
示例4
def fit(self, X, y):
# Check that X and y have correct shape
# if isinstance(y, (pd.DataFrame, pd.Serise)):
# y = y.values
X, y = check_X_y(X, y, accept_sparse=True)
def pr(X, y_i, y):
p = X[y == y_i].sum(0)
return (p+1) / ((y == y_i).sum()+1)
self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y)))
X_nb = X.multiply(self._r)
self._clf = LogisticRegression(
C=self.C,
dual=self.dual,
n_jobs=self.n_jobs
).fit(X_nb, y)
return self
示例5
def fit(self, X, y):
# Check that X and y have correct shape
y = y.values
X, y = check_X_y(X, y, accept_sparse=True)
def pr(X, y_i, y):
p = X[y == y_i].sum(0)
return (p+1) / ((y == y_i).sum()+1)
self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y)))
X_nb = X.multiply(self._r)
self._clf = LogisticRegression(
C=self.C,
dual=self.dual,
n_jobs=self.n_jobs
).fit(X_nb, y)
return self
示例6
def fit(self, x, y):
x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False) # boilerplate
x, y, X_offset, y_offset, X_scale = self._preprocess_data(
x, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X
)
fh, vf, ve, sigma = jmap(
y, x, self.ae0, self.be0, self.af0, self.bf0, max_iter=self.max_iter, tol=self.tol
)
self.X_offset_ = X_offset
self.X_scale_ = X_scale
self.sigma_ = sigma
self.ve_ = ve
self.vf_ = vf
self.coef_ = fh
self.alpha_ = 1.0 / np.mean(ve)
self.lambda_ = 1.0 / np.mean(vf)
self.std_intercept_, self.std_coef_ = scale_sigma(self, X_offset, X_scale)
self._set_intercept(X_offset, y_offset, X_scale)
return self
示例7
def fit(self, x, y, sample_weight=None):
x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False)
x, y, X_offset, y_offset, X_scale = self._preprocess_data(
x,
y,
fit_intercept=self.fit_intercept,
normalize=self.normalize,
copy=self.copy_X,
sample_weight=sample_weight,
)
if sample_weight is not None:
x, y = _rescale_data(x, y, sample_weight)
self.coef_ = sparse_group_lasso(
x, y, self.alpha, self.rho, self.groups, max_iter=self.max_iter, rtol=self.tol
)
self._set_intercept(X_offset, y_offset, X_scale)
return self
示例8
def fit(self, x_, y, sample_weight=None):
n_samples, n_features = x_.shape
X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False)
x, y, X_offset, y_offset, X_scale = self._preprocess_data(
x_,
y,
fit_intercept=self.fit_intercept,
normalize=self.normalize,
copy=self.copy_X,
sample_weight=None,
)
if sample_weight is not None:
# Sample weight can be implemented via a simple rescaling.
x, y = _rescale_data(x, y, sample_weight)
coefs, intercept = fit_with_noise(x, y, self.sigma, self.alpha, self.n)
self.intercept_ = intercept
self.coef_ = coefs
self._set_intercept(X_offset, y_offset, X_scale)
return self
示例9
def validate_inputs(self, X, y):
# Things we don't want to allow until we've tested them:
# - Sparse inputs
# - Multiclass outputs (e.g., more than 2 classes in `y`)
# - Non-finite inputs
# - Complex inputs
X, y = check_X_y(X, y, accept_sparse=False, allow_nd=False)
assert_all_finite(X, y)
if type_of_target(y) != 'binary':
raise ValueError("Non-binary targets not supported")
if np.any(np.iscomplex(X)) or np.any(np.iscomplex(y)):
raise ValueError("Complex data not supported")
if np.issubdtype(X.dtype, np.object_) or np.issubdtype(y.dtype, np.object_):
try:
X = X.astype(float)
y = y.astype(int)
except (TypeError, ValueError):
raise ValueError("argument must be a string.* number")
return (X, y)
示例10
def __init__(self, X, y, criterion, min_samples_split, max_depth,
n_val_sample, random_state):
# make sure max_depth > 1
if max_depth < 2:
raise ValueError("max depth must be > 1")
# check the input arrays, and if it's classification validate the
# target values in y
X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True)
if is_classifier(self):
check_classification_targets(y)
# hyper parameters so we can later inspect attributes of the model
self.min_samples_split = min_samples_split
self.max_depth = max_depth
self.n_val_sample = n_val_sample
self.random_state = random_state
# create the splitting class
random_state = check_random_state(random_state)
self.splitter = RandomSplitter(random_state, criterion, n_val_sample)
# grow the tree depth first
self.tree = self._find_next_split(X, y, 0)
示例11
def _check_X_y(self, X, y):
# helpful error message for sklearn < 1.17
is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2
if is_2d or type_of_target(y) != 'binary':
raise TypeError("Only binary targets supported. For training "
"multiclass or multilabel models, you may use the "
"OneVsRest or OneVsAll metaestimators in "
"scikit-learn.")
X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
multi_output=False)
self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
return X, y
示例12
def fit(self, X, y):
"""A reference implementation of a fitting function.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
y : array-like, shape (n_samples,) or (n_samples, n_outputs)
The target values (class labels in classification, real numbers in
regression).
Returns
-------
self : object
Returns self.
"""
X, y = check_X_y(X, y, accept_sparse=True)
self.is_fitted_ = True
# `fit` should always return `self`
return self
示例13
def fit(self, X, y):
"""A reference implementation of a fitting function for a classifier.
Parameters
----------
X : array-like, shape (n_samples, n_features)
The training input samples.
y : array-like, shape (n_samples,)
The target values. An array of int.
Returns
-------
self : object
Returns self.
"""
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
self.classes_ = unique_labels(y)
self.X_ = X
self.y_ = y
# Return the classifier
return self
示例14
def fit(self, X, y):
"""Fit the model according to the given training data.
Parameters
----------
X : array of shape (n_samples, n_features)
Data used to fit the model.
y : array of shape (n_samples)
class labels of each example in X.
Returns
-------
self : object
Returns self.
"""
X, y = check_X_y(X, y)
super(Oracle, self).fit(X, y)
return self
示例15
def fit(self, X, Y):
"""Fit the model according to the given training data
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Matrix of the examples, where
n_samples is the number of samples and
n_feature is the number of features
Y : array-like, shape = [n_samples]
array of the labels relative to X
Returns
-------
self : object
Returns self
"""
X,Y = validation.check_X_y(X, Y, dtype=np.float64, order='C', accept_sparse='csr')
#check_consistent_length(X,Y)
check_classification_targets(Y)
self.classes_ = np.unique(Y)
if len(self.classes_) < 2:
raise ValueError("The number of classes has to be almost 2; got ", len(self.classes_))
if len(self.classes_) == 2:
self.multiclass_ = False
return self._fit(X,Y)
else :
self.multiclass_ = True
if self.multiclass_strategy == 'ovo':
return self._one_vs_one(X,Y)
else :
return self._one_vs_rest(X,Y)
raise ValueError('This is a very bad exception...')
示例16
def fit(self, X, Y):
""" Train eigenpro classification model
Parameters
----------
X : {float, array}, shape = [n_samples, n_raw_feature]
The raw input feature matrix.
Y : {float, array}, shape =[n_samples]
The labels corresponding to the features of X.
Returns
-------
self : returns an instance of self.
"""
X, Y = check_X_y(
X,
Y,
dtype=np.float32,
force_all_finite=True,
multi_output=False,
ensure_min_samples=3,
)
check_classification_targets(Y)
self.classes_ = np.unique(Y)
loc = {}
for ind, label in enumerate(self.classes_):
loc[label] = ind
class_matrix = np.zeros((Y.shape[0], self.classes_.shape[0]))
for ind, label in enumerate(Y):
class_matrix[ind, loc[label]] = 1
self._raw_fit(X, class_matrix)
return self
示例17
def check_estimation_input(X, y, is_classification=False):
"""Check input arrays.
This function is adapted from sklearn.utils.validation.
Parameters
----------
X : nd-array or list
Input data.
y : nd-array, list
Labels.
is_classification : boolean (default=`False`)
Wether the data is used for classification or regression tasks.
Returns
-------
X : object
The converted and validated `X`.
y : object
The converted and validated `y`.
"""
if is_classification:
X, y = check_X_y(X, y)
else:
X, y = check_X_y(X, y, dtype=np.float64)
# TODO accept_sparse="csc"
X = check_array(X, ensure_2d=True, dtype=np.float64)
y = check_array(y, ensure_2d=False, dtype=None)
if is_classification:
check_classification_targets(y)
y = np.atleast_1d(y)
if y.ndim == 1:
y = np.reshape(y, (-1, 1))
return X, y
示例18
def check_X_y(X, y, *args, **kwargs):
return X, y
示例19
def fit(self, X, y):
"""Fit coefficient vector."""
X, y = check_X_y(X, y, dtype='float64')
y = np.asarray(y, dtype=X.dtype)
O = np.linalg.lstsq(X, y, rcond=None)
self.coef_ = O[0] + self.offset
self.resid_ = O[1]
return self
示例20
def fit(self, X, y):
"""Fit one model per label."""
X, y = check_X_y(X, y, accept_sparse=False)
self.labels_ = np.unique(y)
models = []
for label in self.labels_:
labels = y == label
models.append(OLS().fit(X, labels))
self._models_ = models
self.coef_ = np.vstack([l.coef_ for l in self._models_])
return self
示例21
def fit(self, X, y, *args, **kwargs):
X, y = check_X_y(X, y)
return super(Tmp, self).fit(X, y, *args, **kwargs)
示例22
def fit_transform(self, X, y, *args, **kwargs):
X, y = check_X_y(X, y)
return super(Tmp, self).fit_transform(X, y, *args, **kwargs)
示例23
def fit_transform(self, X, y, *args, **kwargs):
X, y = check_X_y(X, y)
y = np.asarray(y, X.dtype)
return super(Tmp, self).fit_transform(X, y, *args, **kwargs)
示例24
def fit(self, X, y, *args, **kwargs):
X,y = check_X_y(X, y)
return super(Tmp, self).fit(X, y, *args, **kwargs)
示例25
def fit_transform(self, X, y, *args, **kwargs):
X,y = check_X_y(X, y)
return super(Tmp, self).fit_transform(X, y, *args, **kwargs)
示例26
def setUp(self):
# Define data file and read X and y
# Generate some data if the source data is missing
this_directory = path.abspath(path.dirname(__file__))
mat_file = 'cardio.mat'
try:
mat = loadmat(path.join(*[this_directory, 'data', mat_file]))
except TypeError:
print('{data_file} does not exist. Use generated data'.format(
data_file=mat_file))
X, y = generate_data(train_only=True) # load data
except IOError:
print('{data_file} does not exist. Use generated data'.format(
data_file=mat_file))
X, y = generate_data(train_only=True) # load data
else:
X = mat['X']
y = mat['y'].ravel()
X, y = check_X_y(X, y)
self.X_train, self.X_test, self.y_train, self.y_test = \
train_test_split(X, y, test_size=0.4, random_state=42)
self.detector_list = [LOF(), LOF()]
self.clf = LSCP(self.detector_list)
self.clf.fit(self.X_train)
self.roc_floor = 0.6
示例27
def setUp(self):
# Define data file and read X and y
# Generate some data if the source data is missing
this_directory = path.abspath(path.dirname(__file__))
mat_file = 'pima.mat'
try:
mat = loadmat(path.join(*[this_directory, 'data', mat_file]))
except TypeError:
print('{data_file} does not exist. Use generated data'.format(
data_file=mat_file))
X, y = generate_data(train_only=True) # load data
except IOError:
print('{data_file} does not exist. Use generated data'.format(
data_file=mat_file))
X, y = generate_data(train_only=True) # load data
else:
X = mat['X']
y = mat['y'].ravel()
X, y = check_X_y(X, y)
self.X_train, self.X_test, self.y_train, self.y_test = \
train_test_split(X, y, test_size=0.4, random_state=42)
self.clf = XGBOD(random_state=42)
self.clf.fit(self.X_train, self.y_train)
self.roc_floor = 0.75
示例28
def fit(self, X, y=None):
X, y = check_X_y(X, y)
return self
示例29
def fit(self, X, y=None):
self.wrong_attribute = 0
X, y = check_X_y(X, y)
return self
示例30
def fit(self, X, y=None):
self.wrong_attribute = 1
X, y = check_X_y(X, y)
return self