Python源码示例:sklearn.utils.validation.FLOAT_DTYPES
示例1
def fit(self, X, y=None):
"""
Fit the model using X as training data.
:param X: array-like, shape=(n_columns, n_samples,) training data.
:param y: ignored but kept in for pipeline support
:return: Returns an instance of self.
"""
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
if self.n_components < 2:
raise ValueError("Number of components must be at least two.")
if not self.threshold:
raise ValueError(f"The `threshold` value cannot be `None`.")
self.umap_ = umap.UMAP(
n_components=self.n_components,
n_neighbors=self.n_neighbors,
min_dist=self.min_dist,
metric=self.metric,
random_state=self.random_state,
)
self.umap_.fit(X, y)
self.offset_ = -self.threshold
return self
示例2
def predict_proba(self, X: np.array):
check_is_fitted(self, ["gmms_", "classes_", "num_fit_cols_"])
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
if self.num_fit_cols_ != X.shape[1]:
raise ValueError(
f"number of columns {X.shape[1]} does not match fit size {self.num_fit_cols_}"
)
check_is_fitted(self, ["gmms_", "classes_"])
probs = np.zeros((X.shape[0], len(self.classes_)))
for k, v in self.gmms_.items():
class_idx = int(np.argwhere(self.classes_ == k))
probs[:, class_idx] = np.array(
[
m.score_samples(np.expand_dims(X[:, idx], 1))
for idx, m in enumerate(v)
]
).sum(axis=0)
likelihood = np.exp(probs)
return likelihood / likelihood.sum(axis=1).reshape(-1, 1)
示例3
def _preprocess_data(X, y, fit_intercept, epsilon=1.0, bounds_X=None, bounds_y=None, copy=True, check_input=True,
**unused_args):
warn_unused_args(unused_args)
if check_input:
X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES)
elif copy:
X = X.copy(order='K')
y = np.asarray(y, dtype=X.dtype)
X_scale = np.ones(X.shape[1], dtype=X.dtype)
if fit_intercept:
bounds_X = check_bounds(bounds_X, X.shape[1])
bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1)
X = clip_to_bounds(X, bounds_X)
y = clip_to_bounds(y, bounds_y)
X_offset = mean(X, axis=0, bounds=bounds_X, epsilon=epsilon, accountant=BudgetAccountant())
X -= X_offset
y_offset = mean(y, axis=0, bounds=bounds_y, epsilon=epsilon, accountant=BudgetAccountant())
y = y - y_offset
else:
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
if y.ndim == 1:
y_offset = X.dtype.type(0)
else:
y_offset = np.zeros(y.shape[1], dtype=X.dtype)
return X, y, X_offset, y_offset, X_scale
# noinspection PyPep8Naming,PyAttributeOutsideInit
示例4
def transform(self, X=None, copy=True, is_query=False):
"""
Parameters
----------
X : sparse matrix, [n_samples, n_features]
document-term query matrix
copy : boolean, optional (default=True)
query: boolean (default=False)
whether to transform a query or the documents database
Returns
-------
vectors : sparse matrix, [n_samples, n_features]
"""
if is_query:
X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy)
if not sp.issparse(X):
X = sp.csr_matrix(X, dtype=np.float64)
n_samples, n_features = X.shape
expected_n_features = self._doc_matrix.shape[1]
if n_features != expected_n_features:
raise ValueError(
"Input has n_features=%d while the model"
" has been trained with n_features=%d"
% (n_features, expected_n_features)
)
if self.use_idf:
check_is_fitted(self, "_idf_diag", "idf vector is not fitted")
X = sp.csr_matrix(X.toarray() * self._idf_diag.diagonal())
return X
else:
return self._doc_matrix
示例5
def _validate_input(self, X):
allowed_strategies = ["mean", "median", "most_frequent", "constant"]
if self.strategy not in allowed_strategies:
raise ValueError("Can only use these strategies: {0} "
" got strategy={1}".format(allowed_strategies,
self.strategy))
if self.strategy in ("most_frequent", "constant"):
dtype = None
else:
dtype = FLOAT_DTYPES
if not is_scalar_nan(self.missing_values):
force_all_finite = True
else:
force_all_finite = False # "allow-nan"
try:
X = check_array(X, accept_sparse='csc', dtype=dtype,
force_all_finite=force_all_finite, copy=self.copy)
except ValueError as ve:
if "could not convert" in str(ve):
raise ValueError("Cannot use {0} strategy with non-numeric "
"data. Received datatype :{1}."
"".format(self.strategy, X.dtype.kind))
else:
raise ve
_check_inputs_dtype(X, self.missing_values)
if X.dtype.kind not in ("i", "u", "f", "O"):
raise ValueError("_SimpleImputer does not support data with dtype "
"{0}. Please provide either a numeric array (with"
" a floating point or integer dtype) or "
"categorical data represented either as an array "
"with integer dtype or an array of string values "
"with an object dtype.".format(X.dtype))
return X
示例6
def fit(self, X, y=None):
"""Fit RobustStandardScaler to X.
If input is sparse, `fit` overrides `self.with_mean` to standardize without subtracting mean (avoids breaking
for sparse matrix)
If the data is dense, the mean is adjusted for sparse features and the scaled with mean.
Parameters
----------
X : array-like, shape [n_samples, n_features]
The data to standardize.
Returns
-------
self : RobustStandardScaler
"""
X = check_array(
X, accept_sparse=("csr", "csc"), estimator=self, dtype=FLOAT_DTYPES, force_all_finite="allow-nan"
)
with_mean = True
if issparse(X):
with_mean = False
self.scaler_ = StandardScaler(with_mean=with_mean, with_std=True, copy=self.copy)
self.scaler_.fit(X)
if self.scaler_.with_mean:
nnz_mean_mask = np.where(np.count_nonzero(X, axis=0) / X.shape[0] > 0.3, 1, 0)
self.scaler_.mean_ = self.scaler_.mean_ * nnz_mean_mask
return self
示例7
def fit(self, X: np.ndarray, y: np.ndarray):
"""
Fit the model using X, y as training data.
:param X: array-like, shape=(n_features, n_samples)
:param y: array-like, shape=(n_samples)
:return: Returns an instance of self
"""
X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
self.classes_ = unique_labels(y)
self.models_, self.priors_logp_ = {}, {}
for target_label in self.classes_:
x_subset = X[y == target_label]
# Computing joint distribution
self.models_[target_label] = KernelDensity(
bandwidth=self.bandwidth,
kernel=self.kernel,
algorithm=self.algorithm,
metric=self.metric,
atol=self.atol,
rtol=self.rtol,
breadth_first=self.breath_first,
leaf_size=self.leaf_size,
metric_params=self.metric_params,
).fit(x_subset)
# Computing target class prior
self.priors_logp_[target_label] = np.log(len(x_subset) / len(X))
return self
示例8
def predict_proba(self, X):
"""
Probability estimates.
The returned estimates for all classes are in the same order found in the `.classes_` attribute.
:param X: array-like of shape (n_samples, n_features)
:return: array-like of shape (n_samples, n_classes)
Returns the probability of the sample for each class in the model,
where classes are ordered as they are in self.classes_.
"""
check_is_fitted(self)
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
log_prior = np.array(
[self.priors_logp_[target_label] for target_label in self.classes_]
)
log_likelihood = np.array(
[
self.models_[target_label].score_samples(X)
for target_label in self.classes_
]
).T
log_likelihood_and_prior = np.exp(log_likelihood + log_prior)
evidence = log_likelihood_and_prior.sum(axis=1, keepdims=True)
posterior = log_likelihood_and_prior / evidence
return posterior
示例9
def predict(self, X):
"""
Predict class labels for samples in X.
:param X: array_like, shape (n_samples, n_features)
:return: array, shape (n_samples)
"""
check_is_fitted(self)
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
return self.classes_[np.argmax(self.predict_proba(X), 1)]
示例10
def score_samples(self, X):
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["gmm_", "likelihood_threshold_"])
if len(X.shape) == 1:
X = np.expand_dims(X, 1)
return -self.gmm_.score_samples(X)
示例11
def fit(self, X: np.array, y: np.array) -> "GMMClassifier":
"""
Fit the model using X, y as training data.
:param X: array-like, shape=(n_columns, n_samples, ) training data.
:param y: array-like, shape=(n_samples, ) training data.
:return: Returns an instance of self.
"""
X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
if X.ndim == 1:
X = np.expand_dims(X, 1)
self.gmms_ = {}
self.classes_ = unique_labels(y)
for c in self.classes_:
subset_x, subset_y = X[y == c], y[y == c]
mixture = GaussianMixture(
n_components=self.n_components,
covariance_type=self.covariance_type,
tol=self.tol,
reg_covar=self.reg_covar,
max_iter=self.max_iter,
n_init=self.n_init,
init_params=self.init_params,
weights_init=self.weights_init,
means_init=self.means_init,
precisions_init=self.precisions_init,
random_state=self.random_state,
warm_start=self.warm_start,
verbose=self.verbose,
verbose_interval=self.verbose_interval,
)
self.gmms_[c] = mixture.fit(subset_x, subset_y)
return self
示例12
def predict_proba(self, X):
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["gmms_", "classes_"])
res = np.zeros((X.shape[0], self.classes_.shape[0]))
for idx, c in enumerate(self.classes_):
res[:, idx] = self.gmms_[c].score_samples(X)
return np.exp(res) / np.exp(res).sum(axis=1)[:, np.newaxis]
示例13
def score_samples(self, X):
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["gmm_", "likelihood_threshold_"])
if len(X.shape) == 1:
X = np.expand_dims(X, 1)
return self.gmm_.score_samples(X) * -1
示例14
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier":
"""
Fit the model using X, y as training data.
:param X: array-like, shape=(n_columns, n_samples, ) training data.
:param y: array-like, shape=(n_samples, ) training data.
:return: Returns an instance of self.
"""
X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
if X.ndim == 1:
X = np.expand_dims(X, 1)
self.gmms_ = {}
self.classes_ = unique_labels(y)
for c in self.classes_:
subset_x, subset_y = X[y == c], y[y == c]
mixture = BayesianGaussianMixture(
n_components=self.n_components,
covariance_type=self.covariance_type,
tol=self.tol,
reg_covar=self.reg_covar,
max_iter=self.max_iter,
n_init=self.n_init,
init_params=self.init_params,
weight_concentration_prior_type=self.weight_concentration_prior_type,
weight_concentration_prior=self.weight_concentration_prior,
mean_precision_prior=self.mean_precision_prior,
mean_prior=self.mean_prior,
degrees_of_freedom_prior=self.degrees_of_freedom_prior,
covariance_prior=self.covariance_prior,
random_state=self.random_state,
warm_start=self.warm_start,
verbose=self.verbose,
verbose_interval=self.verbose_interval,
)
self.gmms_[c] = mixture.fit(subset_x, subset_y)
return self
示例15
def predict(self, X):
check_is_fitted(self, ["gmms_", "classes_"])
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
return self.classes_[self.predict_proba(X).argmax(axis=1)]
示例16
def predict_proba(self, X):
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["gmms_", "classes_"])
res = np.zeros((X.shape[0], self.classes_.shape[0]))
for idx, c in enumerate(self.classes_):
res[:, idx] = self.gmms_[c].score_samples(X)
return np.exp(res) / np.exp(res).sum(axis=1)[:, np.newaxis]
示例17
def transform(self, X):
"""
Uses the underlying PCA method to transform the data.
"""
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["pca_", "offset_"])
return self.pca_.transform(X)
示例18
def predict(self, X):
"""
Predict if a point is an outlier.
:param X: array-like, shape=(n_columns, n_samples, ) training data.
:return: array, shape=(n_samples,) the predicted data. 1 for inliers, -1 for outliers.
"""
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["pca_", "offset_"])
result = np.ones(X.shape[0])
result[self.difference(X) > self.threshold] = -1
return result.astype(np.int)
示例19
def transform(self, X):
"""
Uses the underlying UMAP method to transform the data.
"""
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["umap_", "offset_"])
return self.umap_.transform(X)
示例20
def predict(self, X):
"""
Predict if a point is an outlier.
:param X: array-like, shape=(n_columns, n_samples, ) training data.
:return: array, shape=(n_samples,) the predicted data. 1 for inliers, -1 for outliers.
"""
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["umap_", "offset_"])
result = np.ones(X.shape[0])
result[self.difference(X) > self.threshold] = -1
return result.astype(np.int)
示例21
def transform(self, X):
"""
Performs the capping on the column(s) of ``X``.
:type X: pandas.DataFrame or numpy.ndarray
:param X: The column(s) for which the capping limit(s) will be applied.
:rtype: numpy.ndarray
:returns: ``X`` values with capped limits.
:raises:
``ValueError`` if the number of columns from ``X`` differs from the
number of columns when fitting
"""
check_is_fitted(self, "quantiles_")
X = check_array(
X,
copy=self.copy,
force_all_finite=False,
dtype=FLOAT_DTYPES,
estimator=self,
)
if X.shape[1] != self.n_columns_:
raise ValueError(
"X must have the same number of columns in fit and transform"
)
if self.discard_infs:
np.putmask(X, (X == np.inf) | (X == -np.inf), np.nan)
# Actually capping
X = np.minimum(X, self.quantiles_[1, :])
X = np.maximum(X, self.quantiles_[0, :])
return X
示例22
def fit(self, X, y):
super().fit(X, y)
X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
self.dim_ = X.shape[1]
return self
示例23
def transform_train(self, X):
rs = check_random_state(self.random_state)
check_is_fitted(self, ["dim_"])
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
return X + rs.normal(0, self.noise, size=X.shape)
示例24
def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB":
"""
Fit the model using X, y as training data.
:param X: array-like, shape=(n_columns, n_samples, ) training data.
:param y: array-like, shape=(n_samples, ) training data.
:return: Returns an instance of self.
"""
X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
if X.ndim == 1:
X = np.expand_dims(X, 1)
self.gmms_ = {}
self.classes_ = unique_labels(y)
self.num_fit_cols_ = X.shape[1]
for c in self.classes_:
subset_x, subset_y = X[y == c], y[y == c]
self.gmms_[c] = [
GaussianMixture(
n_components=self.n_components,
covariance_type=self.covariance_type,
tol=self.tol,
reg_covar=self.reg_covar,
max_iter=self.max_iter,
n_init=self.n_init,
init_params=self.init_params,
weights_init=self.weights_init,
means_init=self.means_init,
precisions_init=self.precisions_init,
random_state=self.random_state,
warm_start=self.warm_start,
).fit(subset_x[:, i].reshape(-1, 1), subset_y)
for i in range(X.shape[1])
]
return self
示例25
def predict(self, X):
check_is_fitted(self, ["gmms_", "classes_"])
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
return self.classes_[self.predict_proba(X).argmax(axis=1)]
示例26
def fit(self, X: np.array, y: np.array) -> "BayesianGaussianMixtureNB":
"""
Fit the model using X, y as training data.
:param X: array-like, shape=(n_columns, n_samples, ) training data.
:param y: array-like, shape=(n_samples, ) training data.
:return: Returns an instance of self.
"""
X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
if X.ndim == 1:
X = np.expand_dims(X, 1)
self.gmms_ = {}
self.classes_ = unique_labels(y)
self.num_fit_cols_ = X.shape[1]
for c in self.classes_:
subset_x, subset_y = X[y == c], y[y == c]
self.gmms_[c] = [
BayesianGaussianMixture(
n_components=self.n_components,
covariance_type=self.covariance_type,
tol=self.tol,
reg_covar=self.reg_covar,
max_iter=self.max_iter,
n_init=self.n_init,
init_params=self.init_params,
weight_concentration_prior_type=self.weight_concentration_prior_type,
weight_concentration_prior=self.weight_concentration_prior,
mean_precision_prior=self.mean_precision_prior,
mean_prior=self.mean_prior,
degrees_of_freedom_prior=self.degrees_of_freedom_prior,
covariance_prior=self.covariance_prior,
random_state=self.random_state,
warm_start=self.warm_start,
verbose=self.verbose,
verbose_interval=self.verbose_interval,
).fit(subset_x[:, i].reshape(-1, 1), subset_y)
for i in range(X.shape[1])
]
return self
示例27
def predict(self, X):
check_is_fitted(self, ["gmms_", "classes_", "num_fit_cols_"])
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
return self.classes_[self.predict_proba(X).argmax(axis=1)]
示例28
def _check_test_data(self, X):
X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES,
warn_on_dtype=True)
n_samples, n_features = X.shape
expected_n_features = self.cluster_centers_.shape[1]
if not n_features == expected_n_features:
raise ValueError("Incorrect number of features. "
"Got %d features, expected %d" % (
n_features, expected_n_features))
return X
示例29
def _initial_imputation(self, X):
"""Perform initial imputation for input X.
Parameters
----------
X : ndarray, shape (n_samples, n_features)
Input data, where "n_samples" is the number of samples and
"n_features" is the number of features.
Returns
-------
Xt : ndarray, shape (n_samples, n_features)
Input data, where "n_samples" is the number of samples and
"n_features" is the number of features.
X_filled : ndarray, shape (n_samples, n_features)
Input data with the most recent imputations.
mask_missing_values : ndarray, shape (n_samples, n_features)
Input data's missing indicator matrix, where "n_samples" is the
number of samples and "n_features" is the number of features.
"""
# TODO: change False to "allow-nan"
if is_scalar_nan(self.missing_values):
force_all_finite = False # "allow-nan"
else:
force_all_finite = True
X = check_array(X, dtype=FLOAT_DTYPES, order="F",
force_all_finite=force_all_finite)
_check_inputs_dtype(X, self.missing_values)
mask_missing_values = _get_mask(X, self.missing_values)
if self.initial_imputer_ is None:
self.initial_imputer_ = _SimpleImputer(
missing_values=self.missing_values,
strategy=self.initial_strategy)
X_filled = self.initial_imputer_.fit_transform(X)
else:
X_filled = self.initial_imputer_.transform(X)
valid_mask = np.flatnonzero(np.logical_not(
np.isnan(self.initial_imputer_.statistics_)))
Xt = X[:, valid_mask]
mask_missing_values = mask_missing_values[:, valid_mask]
return Xt, X_filled, mask_missing_values