Python源码示例:sklearn.datasets.load_boston()

示例1
def setUp(self):
        self.X, self.y = load_boston(return_X_y=True)
        self.regressor_settings = [
            'sklearn_random_forest_regressor',
            'sklearn_extra_trees_regressor',
            'sklearn_bagging_regressor',
            'sklearn_GP_regressor',
            'sklearn_ridge_regressor',
            'sklearn_lasso_regressor',
            'sklearn_kernel_ridge_regressor',
            'sklearn_knn_regressor',
            'sklearn_svr_regressor',
            'sklearn_decision_tree_regressor',
            'sklearn_linear_regression',
            'sklearn_adaboost_regressor',
            'xgboost_regressor',
        ] 
示例2
def test_onehot():
    data = load_boston()
    X, y = data['data'], data['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=333)
    train = pd.DataFrame(X_train)
    test = pd.DataFrame(X_test)

    t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=False,
                                      dummy_na=True)
    assert t_train.shape[1] == t_test.shape[1]
    assert t_train.shape[1] == 441

    t_train, t_test = onehot_features(train.copy(deep=True), test.copy(deep=True), [8, 1, 12], full=True,
                                      dummy_na=False)
    assert t_train.shape[1] == t_test.shape[1]
    assert t_train.shape[1] == 500 
示例3
def test_few_fit_shapes():
    """test_few.py: fit and predict return correct shapes """
    np.random.seed(202)
    # load example data
    boston = load_boston()
    d = pd.DataFrame(data=boston.data)
    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=0.2, crossover_rate=0.8,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'epsilon_lexicase', tourn_size = 2,
                random_state=0, verbosity=0,
                disable_update_check=False, fit_choice = 'mse')

    score = learner.fit(boston.data[:300], boston.target[:300])
    print("learner:",learner._best_estimator)
    yhat_test = learner.predict(boston.data[300:])
    test_score = learner.score(boston.data[300:],boston.target[300:])
    print("train score:",score,"test score:",test_score,
    "test r2:",r2_score(boston.target[300:],yhat_test))
    assert yhat_test.shape == boston.target[300:].shape 
示例4
def test_few_with_parents_weight():
    """test_few.py: few performs without error with parent pressure for selection"""
    np.random.seed(1006987)
    boston = load_boston()
    d = np.column_stack((boston.data,boston.target))
    np.random.shuffle(d)
    features = d[:,0:-1]
    target = d[:,-1]

    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=1, crossover_rate=1,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'tournament', fit_choice = 'r2',tourn_size = 2, random_state=0, verbosity=0,
                disable_update_check=False, weight_parents=True)

    learner.fit(features[:300], target[:300])
    few_score = learner.score(features[:300], target[:300])
    test_score = learner.score(features[300:],target[300:])

    print("few score:",few_score)
    print("few test score:",test_score) 
示例5
def test_boston_dataset(max_bins):
    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, random_state=42)

    mapper = _BinMapper(max_bins=max_bins, random_state=42)
    X_train_binned = mapper.fit_transform(X_train)

    # Init gradients and hessians to that of least squares loss
    gradients = -y_train.astype(G_H_DTYPE)
    hessians = np.ones(1, dtype=G_H_DTYPE)

    min_samples_leaf = 8
    max_leaf_nodes = 31
    grower = TreeGrower(X_train_binned, gradients, hessians,
                        min_samples_leaf=min_samples_leaf,
                        max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
                        actual_n_bins=mapper.actual_n_bins_)
    grower.grow()

    predictor = grower.make_predictor(bin_thresholds=mapper.bin_thresholds_)

    assert r2_score(y_train, predictor.predict(X_train)) > 0.85
    assert r2_score(y_test, predictor.predict(X_test)) > 0.70 
示例6
def test_score_sample_weight():

    rng = np.random.RandomState(0)

    # test both ClassifierMixin and RegressorMixin
    estimators = [DecisionTreeClassifier(max_depth=2),
                  DecisionTreeRegressor(max_depth=2)]
    sets = [datasets.load_iris(),
            datasets.load_boston()]

    for est, ds in zip(estimators, sets):
        est.fit(ds.data, ds.target)
        # generate random sample weights
        sample_weight = rng.randint(1, 10, size=len(ds.target))
        # check that the score with and without sample weights are different
        assert_not_equal(est.score(ds.data, ds.target),
                         est.score(ds.data, ds.target,
                                   sample_weight=sample_weight),
                         msg="Unweighted and weighted scores "
                             "are unexpectedly equal") 
示例7
def test_warm_start_convergence_with_regularizer_decrement():
    boston = load_boston()
    X, y = boston.data, boston.target

    # Train a model to converge on a lightly regularized problem
    final_alpha = 1e-5
    low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y)

    # Fitting a new model on a more regularized version of the same problem.
    # Fitting with high regularization is easier it should converge faster
    # in general.
    high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
    assert_greater(low_reg_model.n_iter_, high_reg_model.n_iter_)

    # Fit the solution to the original, less regularized version of the
    # problem but from the solution of the highly regularized variant of
    # the problem as a better starting point. This should also converge
    # faster than the original model that starts from zero.
    warm_low_reg_model = deepcopy(high_reg_model)
    warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha)
    warm_low_reg_model.fit(X, y)
    assert_greater(low_reg_model.n_iter_, warm_low_reg_model.n_iter_) 
示例8
def test_keras_02(self):
        boston = load_boston()
        data = pd.DataFrame(boston.data)
        features = list(boston.feature_names)
        target = 'PRICE'
        data.columns = features
        data['PRICE'] = boston.target
        x_train, x_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.20, random_state=42)
        model = Sequential()
        model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
        model.add(Dense(23))
        model.add(Dense(1, kernel_initializer='normal'))
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(x_train, y_train, epochs=1000, verbose=0)
        pmmlObj=KerasToPmml(model)
        pmmlObj.export(open('sequentialModel.pmml','w'),0)
        reconPmmlObj=ny.parse('sequentialModel.pmml',True)
        self.assertEqual(os.path.isfile("sequentialModel.pmml"),True)
        self.assertEqual(len(model.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer)-1) 
示例9
def test_boston(self):
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = StandardScaler().fit(scikit_data.data)

        spec = converter.convert(
            scikit_model, scikit_data.feature_names, "out"
        ).get_spec()

        input_data = [
            dict(zip(scikit_data.feature_names, row)) for row in scikit_data.data
        ]

        output_data = [{"out": row} for row in scikit_model.transform(scikit_data.data)]

        metrics = evaluate_transformer(spec, input_data, output_data)

        assert metrics["num_errors"] == 0 
示例10
def setUpClass(self):
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier

        # Load data and train model
        import numpy as np

        scikit_data = load_boston()
        self.X = scikit_data.data.astype("f").astype(
            "d"
        )  ## scikit-learn downcasts data
        t = scikit_data.target
        num_classes = 3
        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
示例11
def test_boston_OHE(self):
        data = load_boston()

        for categorical_features in [[3], [8], [3, 8], [8, 3]]:
            model = OneHotEncoder(
                categorical_features=categorical_features, sparse=False
            )
            model.fit(data.data, data.target)

            # Convert the model
            spec = sklearn.convert(model, data.feature_names, "out").get_spec()

            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out": row} for row in model.transform(data.data)]

            result = evaluate_transformer(spec, input_data, output_data)

            assert result["num_errors"] == 0

    # This test still isn't working 
示例12
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_SKLEARN:
            return
        if not _HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.NU_SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        self.libsvm_model = svmutil.svm_train(prob, param) 
示例13
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = Imputer(strategy="most_frequent", axis=0)
        scikit_data["data"][1, 8] = np.NaN

        input_data = scikit_data["data"][:, 8].reshape(-1, 1)
        scikit_model.fit(input_data, scikit_data["target"])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
示例14
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_SKLEARN:
            return
        if not _HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.EPSILON_SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        self.libsvm_model = svmutil.svm_train(prob, param) 
示例15
def test_input_names(self):
        data = load_boston()
        df = pd.DataFrame({"input": data["data"].tolist()})
        df["input"] = df["input"].apply(np.array)

        # Default values
        spec = libsvm.convert(self.libsvm_model)
        if _is_macos() and _macos_version() >= (10, 13):
            (df["prediction"], _, _) = svmutil.svm_predict(
                data["target"], data["data"].tolist(), self.libsvm_model
            )
            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics["max_error"], 0)

        # One extra parameters. This is legal/possible.
        num_inputs = len(data["data"][0])
        spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1)

        # Not enought input names.
        input_names = ["this", "is", "not", "enought", "names"]
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_names=input_names)
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_length=num_inputs - 1) 
示例16
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier
        import numpy as np

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state=1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model 
示例17
def test_boston_OHE_plus_normalizer(self):

        data = load_boston()

        pl = Pipeline(
            [
                ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)),
                ("Scaler", StandardScaler()),
            ]
        )

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, "out")

        if _is_macos() and _macos_version() >= (10, 13):
            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out": row} for row in pl.transform(data.data)]

            result = evaluate_transformer(spec, input_data, output_data)
            assert result["num_errors"] == 0 
示例18
def test_boston_OHE_plus_trees(self):

        data = load_boston()

        pl = Pipeline(
            [
                ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)),
                ("Trees", GradientBoostingRegressor(random_state=1)),
            ]
        )

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, "target")

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(data.data, columns=data.feature_names)
            df["prediction"] = pl.predict(data.data)

            # Evaluate it
            result = evaluate_regressor(spec, df, "target", verbose=False)

            assert result["max_error"] < 0.0001 
示例19
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        if not (_HAS_SKLEARN):
            return

        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data["data"], scikit_data["target"])
        scikit_spec = converter.convert(
            scikit_model, feature_names, "target"
        ).get_spec()

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
        self.scikit_spec = scikit_spec 
示例20
def setUpClass(self):
        if not _HAS_XGBOOST:
            return
        if not _HAS_SKLEARN:
            return

        # Load data and train model
        scikit_data = load_boston()
        self.X = scikit_data.data.astype("f").astype("d")
        self.dtrain = xgboost.DMatrix(
            scikit_data.data,
            label=scikit_data.target,
            feature_names=scikit_data.feature_names,
        )
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
示例21
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_XGBOOST:
            return
        if not _HAS_SKLEARN:
            return

        scikit_data = load_boston()
        dtrain = xgboost.DMatrix(
            scikit_data.data,
            label=scikit_data.target,
            feature_names=scikit_data.feature_names,
        )
        xgb_model = xgboost.train({}, dtrain, 1)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.xgb_model = xgb_model
        self.feature_names = self.scikit_data.feature_names 
示例22
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state=1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
示例23
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        dtrain = xgboost.DMatrix(
            scikit_data.data, label=target, feature_names=scikit_data.feature_names
        )
        self.xgb_model = xgboost.train({}, dtrain)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.n_classes = len(np.unique(self.target)) 
示例24
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.preprocessing import MultiLabelBinarizer
        import numpy as np

        scikit_data = load_boston()
        scikit_model = DecisionTreeClassifier(random_state=1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model 
示例25
def setUpClass(self):
        from sklearn.datasets import load_boston

        # Load data and train model
        import numpy as np

        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype("f").astype(
            "d"
        )  ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
示例26
def setUpClass(self):
        from sklearn.datasets import load_boston
        import numpy as np

        # Load data and train model
        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype("f").astype(
            "d"
        )  ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
示例27
def create_boston_data():
    # Import Boston housing dataset
    boston = load_boston()
    # Split data into train and test
    x_train, x_test, y_train, y_validation = train_test_split(
        boston.data, boston.target, test_size=0.2, random_state=7
    )
    return x_train, x_test, y_train, y_validation, boston.feature_names 
示例28
def get_sample_dataset(dataset_properties):
    """Returns sample dataset

    Args:
        dataset_properties (dict): Dictionary corresponding to the properties of the dataset
            used to verify the estimator and metric generators.

    Returns:
        X (array-like): Features array

        y (array-like): Labels array

        splits (iterator): This is an iterator that returns train test splits for
            cross-validation purposes on ``X`` and ``y``.
    """
    kwargs = dataset_properties.copy()
    data_type = kwargs.pop('type')
    if data_type == 'multiclass':
        try:
            X, y = datasets.make_classification(random_state=8, **kwargs)
            splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
        except Exception as e:
            raise exceptions.UserError(repr(e))
    elif data_type == 'iris':
        X, y = datasets.load_iris(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'mnist':
        X, y = datasets.load_digits(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'breast_cancer':
        X, y = datasets.load_breast_cancer(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'boston':
        X, y = datasets.load_boston(return_X_y=True)
        splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
    elif data_type == 'diabetes':
        X, y = datasets.load_diabetes(return_X_y=True)
        splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
    else:
        raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type']))
    return X, y, splits 
示例29
def setUp(self):
        self.X, self.y = load_boston(return_X_y=True)
        self.transformer_settings = [
            'identity_transformer'
        ] 
示例30
def test_boston_dataset(max_bins):
    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, random_state=42)

    mapper = BinMapper(max_bins=max_bins, random_state=42)
    X_train_binned = mapper.fit_transform(X_train)
    X_test_binned = mapper.transform(X_test)

    # Init gradients and hessians to that of least squares loss
    gradients = -y_train.astype(np.float32)
    hessians = np.ones(1, dtype=np.float32)

    min_samples_leaf = 8
    max_leaf_nodes = 31
    grower = TreeGrower(X_train_binned, gradients, hessians,
                        min_samples_leaf=min_samples_leaf,
                        max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
                        n_bins_per_feature=mapper.n_bins_per_feature_)
    grower.grow()

    predictor = grower.make_predictor(
        numerical_thresholds=mapper.numerical_thresholds_)

    assert r2_score(y_train, predictor.predict_binned(X_train_binned)) > 0.85
    assert r2_score(y_test, predictor.predict_binned(X_test_binned)) > 0.70

    assert_allclose(predictor.predict(X_train),
                    predictor.predict_binned(X_train_binned))

    assert_allclose(predictor.predict(X_test),
                    predictor.predict_binned(X_test_binned))

    assert r2_score(y_train, predictor.predict(X_train)) > 0.85
    assert r2_score(y_test, predictor.predict(X_test)) > 0.70