Python源码示例:sklearn.utils.validation.check_array()
示例1
def _joint_log_likelihood(self, X):
"""Calculate the posterior log probability of the samples X"""
check_is_fitted(self, "classes_")
X = check_array(X, accept_sparse='csr')
X_bin = self._transform_data(X)
n_classes, n_features = self.feature_log_prob_.shape
n_samples, n_features_X = X_bin.shape
if n_features_X != n_features:
raise ValueError(
"Expected input with %d features, got %d instead" %
(n_features, n_features_X))
# see chapter 4.1 of http://www.cs.columbia.edu/~mcollins/em.pdf
# implementation as in Formula 4.
jll = safe_sparse_dot(X_bin, self.feature_log_prob_.T)
jll += self.class_log_prior_
return jll
示例2
def from_array(X, column_names=None):
"""A simple wrapper for H2OFrame.from_python. This takes a
numpy array (or 2d array) and returns an H2OFrame with all
the default args.
Parameters
----------
X : ndarray
The array to convert.
column_names : list, tuple (default=None)
the names to use for your columns
Returns
-------
H2OFrame
"""
X = check_array(X, force_all_finite=False)
return from_pandas(pd.DataFrame.from_records(data=X, columns=column_names))
示例3
def _diff_inv_matrix(x, lag, differences, xi):
n, m = x.shape
y = np.zeros((n + lag * differences, m), dtype=DTYPE)
if m >= 1: # todo: R checks this. do we need to?
# R: if(missing(xi)) xi <- matrix(0.0, lag*differences, m)
if xi is None:
xi = np.zeros((lag * differences, m), dtype=DTYPE)
else:
xi = check_array(
xi, dtype=DTYPE, copy=False, force_all_finite=False,
ensure_2d=True)
if xi.shape != (lag * differences, m):
raise IndexError('"xi" does not have the right shape')
# TODO: can we vectorize?
for i in range(m):
y[:, i] = _diff_inv_vector(x[:, i], lag, differences, xi[:, i])
return y
示例4
def _seasonal_prediction_with_confidence(arima_res, start, end, exog, alpha,
**kwargs):
"""Compute the prediction for a SARIMAX and get a conf interval
Unfortunately, SARIMAX does not really provide a nice way to get the
confidence intervals out of the box, so we have to perform the
``get_prediction`` code here and unpack the confidence intervals manually.
Notes
-----
For internal use only.
"""
results = arima_res.get_prediction(
start=start,
end=end,
exog=exog,
**kwargs)
f = results.predicted_mean
conf_int = results.conf_int(alpha=alpha)
return check_endog(f, dtype=None, copy=False), \
check_array(conf_int, copy=False, dtype=None)
示例5
def fit(self, X, y_orig):
def as_factory(r):
return r if isinstance(r, AggregationRuleFactory) else DummyAggregationRuleFactory(r)
self.aggregation_rules__ = [ as_factory(r) for r in self.aggregation_rules ]
X = check_array(X)
self.classes_, _ = np.unique(y_orig, return_inverse=True)
self.m = X.shape[1]
if np.nan in self.classes_:
raise Exception("nan not supported for class values")
self.build_with_ga(X, y_orig)
return self
示例6
def predict(self, X):
"""
Predict outputs given examples.
Parameters:
-----------
X : the examples to predict (array or matrix)
Returns:
--------
y_pred : Predicted values for each row in matrix.
"""
if self.protos_ is None:
raise Exception("Prototypes not initialized. Perform a fit first.")
X = check_array(X)
# predict
return _predict(self.protos_, self.aggregation, self.classes_, X)
示例7
def predict(self, X):
"""
Predict outputs given examples.
Parameters:
-----------
X : the examples to predict (array or matrix)
Returns:
--------
y_pred : Predicted values for each row in matrix.
"""
if self.protos_ is None:
raise Exception("Prototypes not initialized. Perform a fit first.")
X = check_array(X)
# predict
return _predict_multi(self.protos_, self.aggregation, self.classes_, X, self.n_features)
示例8
def fit(self, X, y):
X = check_array(X)
self.classes_, _ = np.unique(y, return_inverse=True)
# construct distance measure
self.distance_ = self.df(X)
# build models
models = np.zeros((len(self.classes_), X.shape[1]))
for c_idx, c_value in enumerate(self.classes_):
models[c_idx, :] = self.build_for_class(X[y == c_value])
self.models_ = models
return self
示例9
def fit(self, X, y):
X = check_array(X)
self.classes_, y = np.unique(y, return_inverse=True)
if "?" in tuple(self.classes_):
raise ValueError("nan not supported for class values")
# build membership functions for each feature for each class
self.protos_ = [
build_memberships(X, y == idx, self.membership_factory)
for idx, class_value in enumerate(self.classes_)
]
# build aggregation
self.aggregation_ = self.aggregation_factory(self.protos_, X, y, self.classes_)
return self
示例10
def fit(self, X, y):
X = check_array(X)
self.classes_, y = np.unique(y, return_inverse=True)
if "?" in tuple(self.classes_):
raise ValueError("nan not supported for class values")
# build membership functions for each feature for each class
learned = [
learn_class(X, y, y == idx, self.membership_factory, self.aggregation_factory)
for idx, class_value in enumerate(self.classes_)
]
logger.info("learned %s" % (str(learned),))
self.protos_ = [ x[0] for x in learned ]
self.aggregations_ = [ x[1] for x in learned ]
self.selection_method_ = self.selection_factory(X, y)
return self
示例11
def fit(self, X, y):
X = check_array(X)
self.classes_, y = np.unique(y, return_inverse=True)
if np.nan in self.classes_:
raise Exception("nan not supported for class values")
self.trees_ = {}
# build membership functions
P = []
for feature_idx, feature in enumerate(X.T):
P.extend(self.fuzzifier(feature_idx, feature))
# build the pattern tree for each class
for class_idx, class_value in enumerate(self.classes_):
class_vector = np.zeros(len(y))
class_vector[y == class_idx] = 1.0
root = self.build_for_class(X, y, class_vector, list(P))
self.trees_[class_idx] = root
return self
示例12
def predict(self, X):
"""Predict class for X.
Parameters
----------
X : Array-like of shape [n_samples, n_features]
The input to classify.
Returns
-------
y : array of shape = [n_samples]
The predicted classes.
"""
X = check_array(X)
if self.trees_ is None:
raise Exception("Pattern trees not initialized. Perform a fit first.")
y_classes = np.zeros((X.shape[0], len(self.classes_)))
for i, c in enumerate(self.classes_):
y_classes[:, i] = self.trees_[i](X)
# predict the maximum value
return self.classes_.take(np.argmax(y_classes, -1))
示例13
def decision_function(self, X):
"""Predict raw anomaly score of X using the fitted detector.
The anomaly score of an input sample is computed based on different
detector algorithms. For consistency, outliers are assigned with
larger anomaly scores.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The training input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Returns
-------
anomaly_scores : numpy array of shape (n_samples,)
The anomaly score of the input samples.
"""
check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
X = check_array(X)
# Computer mahalanobis distance of the samples
return self.detector_.mahalanobis(X)
示例14
def transform(self, X):
check_is_fitted(self, ['statistics_', 'estimators_', 'gamma_'])
X = check_array(X, copy=True, dtype=np.float64, force_all_finite=False)
if X.shape[1] != self.statistics_.shape[1]:
raise ValueError("X has %d features per sample, expected %d"
% (X.shape[1], self.statistics_.shape[1]))
X_nan = np.isnan(X)
imputed = self.initial_imputer.transform(X)
if len(self.estimators_) > 1:
for i, estimator_ in enumerate(self.estimators_):
X_s = np.delete(imputed, i, 1)
y_nan = X_nan[:, i]
X_unk = X_s[y_nan]
if len(X_unk) > 0:
X[y_nan, i] = estimator_.predict(X_unk)
else:
estimator_ = self.estimators_[0]
X[X_nan] = estimator_.inverse_transform(estimator_.transform(imputed))[X_nan]
return X
示例15
def predict(self, X):
"""Applies learned event segmentation to new testing dataset
Alternative function for segmenting a new dataset after using
fit() to learn a sequence of events, to comply with the sklearn
Classifier interface
Parameters
----------
X: timepoint by voxel ndarray
fMRI data to segment based on previously-learned event patterns
Returns
-------
Event label for each timepoint
"""
check_is_fitted(self, ["event_pat_", "event_var_"])
X = check_array(X)
segments, test_ll = self.find_events(X)
return np.argmax(segments, axis=1)
示例16
def fit(self, X, y=None):
"""Compute the lower and upper quantile cutoffs, columns to transform, and nonnegative columns.
Parameters
----------
X : array-like, shape [n_samples, n_features]
The data array to transform. Must be numeric, non-sparse, and two-dimensional.
Returns
-------
self : LogExtremeValueTransformer
"""
super().fit(X)
X = check_array(X)
self.nonnegative_cols_ = [j for j in range(self.n_input_features_) if np.all(X[:, j] >= 0)]
return self
示例17
def fit(self, X, y=None):
"""Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles.
Parameters
----------
X : array-like, shape [n_samples, n_features]
The data array to transform. Must be numeric, non-sparse, and two-dimensional.
Returns
-------
self : QuantileExtremeValueTransformer
"""
super().fit(X)
X = check_array(X)
self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True)
self.quantile_transformer_.fit(X)
return self
示例18
def fit(self, X, y=None):
"""Learn empirical variances from X.
Parameters
----------
X : array of shape [n_samples, n_features]
Input samples from which to check uniqueness.
Returns
-------
self
"""
X = check_array(X, force_all_finite=False)
_, self.n_input_features_ = X.shape
all_nan_cols = np.all(np.isnan(X), axis=0)
self.cols_to_transform_ = np.logical_or(
np.array([np.unique(X[:, j]).size == 1 for j in range(self.n_input_features_)]), all_nan_cols
)
return self
示例19
def transform(self, X=None, copy=True, is_query=False):
"""
Parameters
----------
X : sparse matrix, [n_samples, n_features]
document-term query matrix
copy : boolean, optional (default=True)
query: boolean (default=False)
whether to transform a query or the documents database
Returns
-------
vectors : sparse matrix, [n_samples, n_features]
"""
if is_query:
X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy)
if not sp.issparse(X):
X = sp.csr_matrix(X, dtype=np.float64)
n_samples, n_features = X.shape
expected_n_features = self._doc_matrix.shape[1]
if n_features != expected_n_features:
raise ValueError(
"Input has n_features=%d while the model"
" has been trained with n_features=%d"
% (n_features, expected_n_features)
)
if self.use_idf:
check_is_fitted(self, "_idf_diag", "idf vector is not fitted")
X = sp.csr_matrix(X.toarray() * self._idf_diag.diagonal())
return X
else:
return self._doc_matrix
示例20
def predict_moments(self, X):
"""
Full predictive distribution from Bayesian linear regression.
Parameters
----------
X : ndarray
(N*,d) array query input dataset (N* samples, d dimensions).
Returns
-------
Ey : ndarray
The expected value of y* for the query inputs, X* of shape (N*,).
Vy : ndarray
The expected variance of y* for the query inputs, X* of shape
(N*,).
"""
check_is_fitted(self, ['var_', 'regularizer_', 'weights_',
'covariance_', 'hypers_'])
X = check_array(X)
Phi = self.basis.transform(X, *atleast_list(self.hypers_))
Ey = Phi.dot(self.weights_)
Vf = (Phi.dot(self.covariance_) * Phi).sum(axis=1)
return Ey, Vf + self.var_
示例21
def check_endog(y, dtype=DTYPE, copy=True, force_all_finite=False):
"""Wrapper for ``check_array`` and ``column_or_1d`` from sklearn
Parameters
----------
y : array-like, shape=(n_samples,)
The 1d endogenous array.
dtype : string, type or None (default=np.float64)
Data type of result. If None, the dtype of the input is preserved.
If "numeric", dtype is preserved unless array.dtype is object.
copy : bool, optional (default=False)
Whether a forced copy will be triggered. If copy=False, a copy might
still be triggered by a conversion.
force_all_finite : bool, optional (default=False)
Whether to raise an error on np.inf and np.nan in an array. The
possibilities are:
- True: Force all values of array to be finite.
- False: accept both np.inf and np.nan in array.
Returns
-------
y : np.ndarray, shape=(n_samples,)
A 1d numpy ndarray
"""
return column_or_1d(
check_array(y, ensure_2d=False, force_all_finite=force_all_finite,
copy=copy, dtype=dtype)) # type: np.ndarray
示例22
def check_estimation_input(X, y, is_classification=False):
"""Check input arrays.
This function is adapted from sklearn.utils.validation.
Parameters
----------
X : nd-array or list
Input data.
y : nd-array, list
Labels.
is_classification : boolean (default=`False`)
Wether the data is used for classification or regression tasks.
Returns
-------
X : object
The converted and validated `X`.
y : object
The converted and validated `y`.
"""
if is_classification:
X, y = check_X_y(X, y)
else:
X, y = check_X_y(X, y, dtype=np.float64)
# TODO accept_sparse="csc"
X = check_array(X, ensure_2d=True, dtype=np.float64)
y = check_array(y, ensure_2d=False, dtype=None)
if is_classification:
check_classification_targets(y)
y = np.atleast_1d(y)
if y.ndim == 1:
y = np.reshape(y, (-1, 1))
return X, y
示例23
def predict(self, X, y=None):
"""Predict output of data X.
Parameters
----------
X : array-like matrix of shape = [n_samples, n_features]
The prediction input samples.
y : None, optional
Ignored.
Returns
-------
y_pred : list of float
List of predicted values.
Examples
--------
Fit the SOM on your data `X, y`:
>>> import susi
>>> som = susi.SOMClassifier()
>>> som.fit(X, y)
>>> y_pred = som.predict(X)
"""
# Check is fit had been called
check_is_fitted(self, ['X_', 'y_'])
# Input validation
X = check_array(X, dtype=np.float64)
y_pred_list = []
for dp in tqdm(X, desc="predict", **self.tqdm_params_):
y_pred_list.append(self.calc_estimation_output(dp, mode="bmu"))
y_pred = np.array(y_pred_list)
return y_pred
示例24
def fit_transform(self, X, y=None):
"""Fit to the input data and transform it.
Parameters
----------
X : array-like matrix of shape = [n_samples, n_features]
The training and prediction input samples.
y : array-like matrix of shape = [n_samples, 1]
The labels (ground truth) of the input samples
Returns
-------
np.array of tuples (int, int)
Predictions including the BMUs of each datapoint
Examples
--------
Load the SOM, fit it to your input data `X` and transform your input
data with:
>>> import susi
>>> som = susi.SOMClassifier()
>>> tuples = som.fit_transform(X, y)
"""
self.fit(X, y)
self.X_ = check_array(X, dtype=np.float64)
return self.transform(X, y)
示例25
def fit(self, X, y=None):
"""Fit unsupervised SOM to input data.
Parameters
----------
X : array-like matrix of shape = [n_samples, n_features]
The training input samples.
y : None
Not used in this class.
Returns
-------
self : object
Examples
--------
Load the SOM and fit it to your input data `X` with:
>>> import susi
>>> som = susi.SOMClustering()
>>> som.fit(X)
"""
np.random.seed(seed=self.random_state)
self.X_ = check_array(X, dtype=np.float64) # TODO accept_sparse
self.sample_weights_ = np.full(
fill_value=1., shape=(len(self.X_), 1))
self.train_unsupervised_som()
self.fitted_ = True
return self
示例26
def transform(self, X, y=None):
"""Transform input data.
Parameters
----------
X : array-like matrix of shape = [n_samples, n_features]
The prediction input samples.
y : None, optional
Ignored.
Returns
-------
np.array of tuples (int, int)
Predictions including the BMUs of each datapoint
Examples
--------
Load the SOM, fit it to your input data `X` and transform your input
data with:
>>> import susi
>>> som = susi.SOMClustering()
>>> som.fit(X)
>>> X_transformed = som.transform(X)
"""
# assert(self.fitted_ is True)
self.X_ = check_array(X, dtype=np.float64)
return np.array(self.get_bmus(self.X_))
示例27
def check_array(X, *args, **kwargs):
return X
示例28
def predict(self, X):
"""Predict with fitted weights."""
if not hasattr(self, 'coef_'):
raise NotFittedError("Estimator not fitted. Call 'fit' first.")
X = check_array(X, accept_sparse=False)
return np.dot(X, self.coef_.T)
示例29
def predict_proba(self, X):
"""Get probability predictions."""
if not hasattr(self, '_models_'):
raise NotFittedError("Estimator not fitted. Call 'fit' first.")
X = check_array(X, accept_sparse=False)
preds = []
for m in self._models_:
p = 1 / (1 + np.exp(- m.predict(X)))
preds.append(p)
return np.vstack(preds).T
示例30
def predict(self, X):
"""Get label predictions."""
if not hasattr(self, '_models_'):
raise NotFittedError("Estimator not fitted. Call 'fit' first.")
X = check_array(X, accept_sparse=False)
preds = self.predict_proba(X)
labels = np.zeros(X.shape[0])
for i in range(X.shape[0]):
labels[i] = self.labels_[preds[i].argmax()]
return labels