Python源码示例:sklearn.utils.validation.check_array()

示例1
def _joint_log_likelihood(self, X):
        """Calculate the posterior log probability of the samples X"""
        check_is_fitted(self, "classes_")

        X = check_array(X, accept_sparse='csr')
        X_bin = self._transform_data(X)

        n_classes, n_features = self.feature_log_prob_.shape
        n_samples, n_features_X = X_bin.shape

        if n_features_X != n_features:
            raise ValueError(
                "Expected input with %d features, got %d instead" %
                (n_features, n_features_X))

        # see chapter 4.1 of http://www.cs.columbia.edu/~mcollins/em.pdf
        # implementation as in Formula 4.
        jll = safe_sparse_dot(X_bin, self.feature_log_prob_.T)
        jll += self.class_log_prior_

        return jll 
示例2
def from_array(X, column_names=None):
    """A simple wrapper for H2OFrame.from_python. This takes a
    numpy array (or 2d array) and returns an H2OFrame with all 
    the default args.

    Parameters
    ----------

    X : ndarray
        The array to convert.

    column_names : list, tuple (default=None)
        the names to use for your columns

    Returns
    -------

    H2OFrame
    """
    X = check_array(X, force_all_finite=False)
    return from_pandas(pd.DataFrame.from_records(data=X, columns=column_names)) 
示例3
def _diff_inv_matrix(x, lag, differences, xi):
    n, m = x.shape
    y = np.zeros((n + lag * differences, m), dtype=DTYPE)

    if m >= 1:  # todo: R checks this. do we need to?
        # R: if(missing(xi)) xi <- matrix(0.0, lag*differences, m)
        if xi is None:
            xi = np.zeros((lag * differences, m), dtype=DTYPE)
        else:
            xi = check_array(
                xi, dtype=DTYPE, copy=False, force_all_finite=False,
                ensure_2d=True)
            if xi.shape != (lag * differences, m):
                raise IndexError('"xi" does not have the right shape')

        # TODO: can we vectorize?
        for i in range(m):
            y[:, i] = _diff_inv_vector(x[:, i], lag, differences, xi[:, i])

    return y 
示例4
def _seasonal_prediction_with_confidence(arima_res, start, end, exog, alpha,
                                         **kwargs):
    """Compute the prediction for a SARIMAX and get a conf interval

    Unfortunately, SARIMAX does not really provide a nice way to get the
    confidence intervals out of the box, so we have to perform the
    ``get_prediction`` code here and unpack the confidence intervals manually.

    Notes
    -----
    For internal use only.
    """
    results = arima_res.get_prediction(
        start=start,
        end=end,
        exog=exog,
        **kwargs)

    f = results.predicted_mean
    conf_int = results.conf_int(alpha=alpha)
    return check_endog(f, dtype=None, copy=False), \
        check_array(conf_int, copy=False, dtype=None) 
示例5
def fit(self, X, y_orig):

        def as_factory(r):
            return r if isinstance(r, AggregationRuleFactory) else DummyAggregationRuleFactory(r)

        self.aggregation_rules__ = [ as_factory(r) for r in self.aggregation_rules ]
        
        X = check_array(X)

        self.classes_, _ = np.unique(y_orig, return_inverse=True)
        self.m = X.shape[1]

        if np.nan in self.classes_:
            raise Exception("nan not supported for class values")

        self.build_with_ga(X, y_orig)

        return self 
示例6
def predict(self, X):
        """

        Predict outputs given examples.

        Parameters:
        -----------

        X : the examples to predict (array or matrix)

        Returns:
        --------

        y_pred : Predicted values for each row in matrix.

        """
        if self.protos_ is None:
            raise Exception("Prototypes not initialized. Perform a fit first.")

        X = check_array(X)

        # predict
        return _predict(self.protos_, self.aggregation, self.classes_, X) 
示例7
def predict(self, X):
        """

        Predict outputs given examples.

        Parameters:
        -----------

        X : the examples to predict (array or matrix)

        Returns:
        --------

        y_pred : Predicted values for each row in matrix.

        """
        if self.protos_ is None:
            raise Exception("Prototypes not initialized. Perform a fit first.")

        X = check_array(X)

        # predict
        return _predict_multi(self.protos_, self.aggregation, self.classes_, X, self.n_features) 
示例8
def fit(self, X, y):
        X = check_array(X)

        self.classes_, _ = np.unique(y, return_inverse=True)

        # construct distance measure
        self.distance_ = self.df(X)

        # build models
        models = np.zeros((len(self.classes_), X.shape[1]))
        for c_idx, c_value in enumerate(self.classes_):
            models[c_idx, :] = self.build_for_class(X[y == c_value])

        self.models_ = models

        return self 
示例9
def fit(self, X, y):

        X = check_array(X)

        self.classes_, y = np.unique(y, return_inverse=True)

        if "?" in tuple(self.classes_):
            raise ValueError("nan not supported for class values")

        # build membership functions for each feature for each class
        self.protos_ = [
            build_memberships(X, y == idx, self.membership_factory)
            for idx, class_value in enumerate(self.classes_)
        ]

        # build aggregation
        self.aggregation_ = self.aggregation_factory(self.protos_, X, y, self.classes_)

        return self 
示例10
def fit(self, X, y):

        X = check_array(X)

        self.classes_, y = np.unique(y, return_inverse=True)

        if "?" in tuple(self.classes_):
            raise ValueError("nan not supported for class values")

        # build membership functions for each feature for each class
        learned = [
            learn_class(X, y, y == idx, self.membership_factory, self.aggregation_factory)
            for idx, class_value in enumerate(self.classes_)
        ]

        logger.info("learned %s" % (str(learned),))

        self.protos_ = [ x[0] for x in learned ]
        self.aggregations_ = [ x[1] for x in learned ]
        self.selection_method_ = self.selection_factory(X, y)

        return self 
示例11
def fit(self, X, y):

        X = check_array(X)

        self.classes_, y = np.unique(y, return_inverse=True)

        if np.nan in self.classes_:
            raise Exception("nan not supported for class values")

        self.trees_ = {}

        # build membership functions
        P = []
        for feature_idx, feature in enumerate(X.T):
            P.extend(self.fuzzifier(feature_idx, feature))

        # build the pattern tree for each class
        for class_idx, class_value in enumerate(self.classes_):
            class_vector = np.zeros(len(y))
            class_vector[y == class_idx] = 1.0
            root = self.build_for_class(X, y, class_vector, list(P))
            self.trees_[class_idx] = root

        return self 
示例12
def predict(self, X):
        """Predict class for X.

        Parameters
        ----------
        X : Array-like of shape [n_samples, n_features]
            The input to classify.

        Returns
        -------
        y : array of shape = [n_samples]
            The predicted classes.
        """

        X = check_array(X)

        if self.trees_ is None:
            raise Exception("Pattern trees not initialized. Perform a fit first.")

        y_classes = np.zeros((X.shape[0], len(self.classes_)))
        for i, c in enumerate(self.classes_):
            y_classes[:, i] = self.trees_[i](X)

        # predict the maximum value
        return self.classes_.take(np.argmax(y_classes, -1)) 
示例13
def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
        X = check_array(X)

        # Computer mahalanobis distance of the samples
        return self.detector_.mahalanobis(X) 
示例14
def transform(self, X):
        check_is_fitted(self, ['statistics_', 'estimators_', 'gamma_'])
        X = check_array(X, copy=True, dtype=np.float64, force_all_finite=False)
        if X.shape[1] != self.statistics_.shape[1]:
            raise ValueError("X has %d features per sample, expected %d"
                             % (X.shape[1], self.statistics_.shape[1]))

        X_nan = np.isnan(X)
        imputed = self.initial_imputer.transform(X)

        if len(self.estimators_) > 1:
            for i, estimator_ in enumerate(self.estimators_):
                X_s = np.delete(imputed, i, 1)
                y_nan = X_nan[:, i]

                X_unk = X_s[y_nan]
                if len(X_unk) > 0:
                    X[y_nan, i] = estimator_.predict(X_unk)

        else:
            estimator_ = self.estimators_[0]
            X[X_nan] = estimator_.inverse_transform(estimator_.transform(imputed))[X_nan]

        return X 
示例15
def predict(self, X):
        """Applies learned event segmentation to new testing dataset

        Alternative function for segmenting a new dataset after using
        fit() to learn a sequence of events, to comply with the sklearn
        Classifier interface

        Parameters
        ----------
        X: timepoint by voxel ndarray
            fMRI data to segment based on previously-learned event patterns

        Returns
        -------
        Event label for each timepoint
        """
        check_is_fitted(self, ["event_pat_", "event_var_"])
        X = check_array(X)
        segments, test_ll = self.find_events(X)
        return np.argmax(segments, axis=1) 
示例16
def fit(self, X, y=None):
        """Compute the lower and upper quantile cutoffs, columns to transform, and nonnegative columns.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data array to transform. Must be numeric, non-sparse, and two-dimensional.

        Returns
        -------
        self : LogExtremeValueTransformer
        """
        super().fit(X)
        X = check_array(X)
        self.nonnegative_cols_ = [j for j in range(self.n_input_features_) if np.all(X[:, j] >= 0)]
        return self 
示例17
def fit(self, X, y=None):
        """Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles.

        Parameters
        ----------
        X : array-like, shape [n_samples, n_features]
            The data array to transform. Must be numeric, non-sparse, and two-dimensional.

        Returns
        -------
        self : QuantileExtremeValueTransformer
        """
        super().fit(X)
        X = check_array(X)
        self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True)
        self.quantile_transformer_.fit(X)
        return self 
示例18
def fit(self, X, y=None):
        """Learn empirical variances from X.
        Parameters
        ----------
        X : array of shape [n_samples, n_features]
            Input samples from which to check uniqueness.

        Returns
        -------
        self
        """
        X = check_array(X, force_all_finite=False)
        _, self.n_input_features_ = X.shape
        all_nan_cols = np.all(np.isnan(X), axis=0)
        self.cols_to_transform_ = np.logical_or(
            np.array([np.unique(X[:, j]).size == 1 for j in range(self.n_input_features_)]), all_nan_cols
        )
        return self 
示例19
def transform(self, X=None, copy=True, is_query=False):
        """
        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
            document-term query matrix
        copy : boolean, optional (default=True)
        query: boolean (default=False)
            whether to transform a query or the documents database

        Returns
        -------
        vectors : sparse matrix, [n_samples, n_features]

        """
        if is_query:
            X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy)
            if not sp.issparse(X):
                X = sp.csr_matrix(X, dtype=np.float64)

            n_samples, n_features = X.shape

            expected_n_features = self._doc_matrix.shape[1]
            if n_features != expected_n_features:
                raise ValueError(
                    "Input has n_features=%d while the model"
                    " has been trained with n_features=%d"
                    % (n_features, expected_n_features)
                )

            if self.use_idf:
                check_is_fitted(self, "_idf_diag", "idf vector is not fitted")
                X = sp.csr_matrix(X.toarray() * self._idf_diag.diagonal())

            return X

        else:
            return self._doc_matrix 
示例20
def predict_moments(self, X):
        """
        Full predictive distribution from Bayesian linear regression.

        Parameters
        ----------
        X : ndarray
            (N*,d) array query input dataset (N* samples, d dimensions).

        Returns
        -------
        Ey : ndarray
            The expected value of y* for the query inputs, X* of shape (N*,).
        Vy : ndarray
            The expected variance of y* for the query inputs, X* of shape
            (N*,).
        """
        check_is_fitted(self, ['var_', 'regularizer_', 'weights_',
                               'covariance_', 'hypers_'])
        X = check_array(X)

        Phi = self.basis.transform(X, *atleast_list(self.hypers_))
        Ey = Phi.dot(self.weights_)
        Vf = (Phi.dot(self.covariance_) * Phi).sum(axis=1)

        return Ey, Vf + self.var_ 
示例21
def check_endog(y, dtype=DTYPE, copy=True, force_all_finite=False):
    """Wrapper for ``check_array`` and ``column_or_1d`` from sklearn

    Parameters
    ----------
    y : array-like, shape=(n_samples,)
        The 1d endogenous array.

    dtype : string, type or None (default=np.float64)
        Data type of result. If None, the dtype of the input is preserved.
        If "numeric", dtype is preserved unless array.dtype is object.

    copy : bool, optional (default=False)
        Whether a forced copy will be triggered. If copy=False, a copy might
        still be triggered by a conversion.

    force_all_finite : bool, optional (default=False)
        Whether to raise an error on np.inf and np.nan in an array. The
        possibilities are:

        - True: Force all values of array to be finite.
        - False: accept both np.inf and np.nan in array.

    Returns
    -------
    y : np.ndarray, shape=(n_samples,)
        A 1d numpy ndarray
    """
    return column_or_1d(
        check_array(y, ensure_2d=False, force_all_finite=force_all_finite,
                    copy=copy, dtype=dtype))  # type: np.ndarray 
示例22
def check_estimation_input(X, y, is_classification=False):
    """Check input arrays.

    This function is adapted from sklearn.utils.validation.

    Parameters
    ----------
    X : nd-array or list
        Input data.
    y : nd-array, list
        Labels.
    is_classification : boolean (default=`False`)
        Wether the data is used for classification or regression tasks.

    Returns
    -------
    X : object
        The converted and validated `X`.
    y : object
        The converted and validated `y`.

    """
    if is_classification:
        X, y = check_X_y(X, y)
    else:
        X, y = check_X_y(X, y, dtype=np.float64)

    # TODO accept_sparse="csc"
    X = check_array(X, ensure_2d=True, dtype=np.float64)
    y = check_array(y, ensure_2d=False, dtype=None)

    if is_classification:
        check_classification_targets(y)

    y = np.atleast_1d(y)

    if y.ndim == 1:
        y = np.reshape(y, (-1, 1))

    return X, y 
示例23
def predict(self, X, y=None):
        """Predict output of data X.

        Parameters
        ----------
        X : array-like matrix of shape = [n_samples, n_features]
            The prediction input samples.
        y : None, optional
            Ignored.

        Returns
        -------
        y_pred : list of float
            List of predicted values.

        Examples
        --------
        Fit the SOM on your data `X, y`:

        >>> import susi
        >>> som = susi.SOMClassifier()
        >>> som.fit(X, y)
        >>> y_pred = som.predict(X)

        """
        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X, dtype=np.float64)
        y_pred_list = []
        for dp in tqdm(X, desc="predict", **self.tqdm_params_):
            y_pred_list.append(self.calc_estimation_output(dp, mode="bmu"))
        y_pred = np.array(y_pred_list)
        return y_pred 
示例24
def fit_transform(self, X, y=None):
        """Fit to the input data and transform it.

        Parameters
        ----------
        X : array-like matrix of shape = [n_samples, n_features]
            The training and prediction input samples.
        y : array-like matrix of shape = [n_samples, 1]
            The labels (ground truth) of the input samples

        Returns
        -------
        np.array of tuples (int, int)
            Predictions including the BMUs of each datapoint

        Examples
        --------
        Load the SOM, fit it to your input data `X` and transform your input
        data with:

        >>> import susi
        >>> som = susi.SOMClassifier()
        >>> tuples = som.fit_transform(X, y)

        """
        self.fit(X, y)
        self.X_ = check_array(X, dtype=np.float64)
        return self.transform(X, y) 
示例25
def fit(self, X, y=None):
        """Fit unsupervised SOM to input data.

        Parameters
        ----------
        X : array-like matrix of shape = [n_samples, n_features]
            The training input samples.
        y : None
            Not used in this class.

        Returns
        -------
        self : object

        Examples
        --------
        Load the SOM and fit it to your input data `X` with:

        >>> import susi
        >>> som = susi.SOMClustering()
        >>> som.fit(X)

        """
        np.random.seed(seed=self.random_state)
        self.X_ = check_array(X, dtype=np.float64)  # TODO accept_sparse

        self.sample_weights_ = np.full(
            fill_value=1., shape=(len(self.X_), 1))

        self.train_unsupervised_som()
        self.fitted_ = True

        return self 
示例26
def transform(self, X, y=None):
        """Transform input data.

        Parameters
        ----------
        X : array-like matrix of shape = [n_samples, n_features]
            The prediction input samples.
        y : None, optional
            Ignored.

        Returns
        -------
        np.array of tuples (int, int)
            Predictions including the BMUs of each datapoint

        Examples
        --------
        Load the SOM, fit it to your input data `X` and transform your input
        data with:

        >>> import susi
        >>> som = susi.SOMClustering()
        >>> som.fit(X)
        >>> X_transformed = som.transform(X)

        """
        # assert(self.fitted_ is True)
        self.X_ = check_array(X, dtype=np.float64)
        return np.array(self.get_bmus(self.X_)) 
示例27
def check_array(X, *args, **kwargs):
        return X 
示例28
def predict(self, X):
        """Predict with fitted weights."""
        if not hasattr(self, 'coef_'):
            raise NotFittedError("Estimator not fitted. Call 'fit' first.")

        X = check_array(X, accept_sparse=False)

        return np.dot(X, self.coef_.T) 
示例29
def predict_proba(self, X):
        """Get probability predictions."""
        if not hasattr(self, '_models_'):
            raise NotFittedError("Estimator not fitted. Call 'fit' first.")

        X = check_array(X, accept_sparse=False)

        preds = []
        for m in self._models_:

            p = 1 / (1 + np.exp(- m.predict(X)))

            preds.append(p)

        return np.vstack(preds).T 
示例30
def predict(self, X):
        """Get label predictions."""
        if not hasattr(self, '_models_'):
            raise NotFittedError("Estimator not fitted. Call 'fit' first.")

        X = check_array(X, accept_sparse=False)

        preds = self.predict_proba(X)

        labels = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            labels[i] = self.labels_[preds[i].argmax()]

        return labels