Python源码示例:sklearn.ensemble.RandomForestRegressor()

示例1
def __init__(self, model_type='classifier', feature_type='fingerprints',
                 n_estimators=100, n_ensemble=5):
        super(RandomForestQSAR, self).__init__()
        self.n_estimators = n_estimators
        self.n_ensemble = n_ensemble
        self.model = []
        self.model_type = model_type
        if self.model_type == 'classifier':
            for i in range(n_ensemble):
                self.model.append(RFC(n_estimators=n_estimators))
        elif self.model_type == 'regressor':
            for i in range(n_ensemble):
                self.model.append(RFR(n_estimators=n_estimators))
        else:
            raise ValueError('invalid value for argument')
        self.feature_type = feature_type
        if self.feature_type == 'descriptors':
            self.calc = Calculator(descriptors, ignore_3D=True)
            self.desc_mean = [0]*self.n_ensemble 
示例2
def test_sklearn_regression_overfit(self):
    """Test that sklearn models can overfit simple regression datasets."""
    n_samples = 10
    n_features = 3
    n_tasks = 1

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.rand(n_samples, n_tasks)
    w = np.ones((n_samples, n_tasks))
    dataset = dc.data.NumpyDataset(X, y, w, ids)

    regression_metric = dc.metrics.Metric(dc.metrics.r2_score)
    sklearn_model = RandomForestRegressor()
    model = dc.models.SklearnModel(sklearn_model)

    # Fit trained model
    model.fit(dataset)
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])
    assert scores[regression_metric.name] > .7 
示例3
def get_regressor_fitted(file_path,
                         X_train,
                         X_test,
                         y_train,
                         y_test):
    if os.path.exists(file_path):
        try:
            regressor_fitted = load_sklearn_model(file_path)
        except EOFError as e:
            print(file_path)
            raise e
    else:
        regressor = RandomForestRegressor(n_estimators=50,
                                          criterion="mse",
                                          max_features="auto",
                                          n_jobs=get_threads_number())

        regressor_fitted = regressor.fit(X_train, y_train)

        store_sklearn_model(file_path, regressor_fitted)
    return regressor_fitted 
示例4
def Train(data, treecount, tezh, yanzhgdata):
    model = RF(n_estimators=treecount, max_features=tezh)
    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
示例5
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens 
示例6
def regression_rf(x,y):
    '''
        Estimate a random forest regressor
    '''
    # create the regressor object
    random_forest = en.RandomForestRegressor(
        min_samples_split=80, random_state=666, 
        max_depth=5, n_estimators=10)

    # estimate the model
    random_forest.fit(x,y)

    # return the object
    return random_forest

# the file name of the dataset 
示例7
def test_single_condition():
    estimator = ensemble.RandomForestRegressor(n_estimators=2, random_state=1)

    estimator.fit([[1], [2]], [1, 2])

    assembler = assemblers.RandomForestModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.BinNumExpr(
            ast.NumVal(1.0),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(0),
                    ast.NumVal(1.5),
                    ast.CompOpType.LTE),
                ast.NumVal(1.0),
                ast.NumVal(2.0)),
            ast.BinNumOpType.ADD),
        ast.NumVal(0.5),
        ast.BinNumOpType.MUL)

    assert utils.cmp_exprs(actual, expected) 
示例8
def generate_regression_data_and_models():
    df = pd.DataFrame()
    for _ in range(1000):
        a = np.random.normal(0, 1)
        b = np.random.normal(0, 3)
        c = np.random.normal(12, 4)
        target = a + b + c
        df = df.append({
            "A": a,
            "B": b,
            "C": c,
            "target": target
        }, ignore_index=True)

    reg1 = tree.DecisionTreeRegressor()
    reg2 = ensemble.RandomForestRegressor()
    column_names = ["A", "B", "C"]
    target_name = "target"
    X = df[column_names]
    reg1.fit(X, df[target_name])
    reg2.fit(X, df[target_name])
    return df, column_names, target_name, reg1, reg2 
示例9
def fit(self, X, y):
        """
        Fit a Random Forest model to data `X` and targets `y`.

        Parameters
        ----------
        X : array-like
            Input values.
        y: array-like
            Target values.
        """
        self.X = X
        self.y = y
        self.n = self.X.shape[0]
        self.model = RandomForestRegressor(**self.params)
        self.model.fit(X, y) 
示例10
def test_regression(self):
        training_pt = gpd.read_file(ms.meuse)
        training = self.stack_meuse.extract_vector(gdf=training_pt)
        training["zinc"] = training_pt["zinc"]
        training["cadmium"] = training_pt["cadmium"]
        training["copper"] = training_pt["copper"]
        training["lead"] = training_pt["lead"]
        training = training.dropna()

        # single target regression
        regr = RandomForestRegressor(n_estimators=50)
        X = training.loc[:, self.stack_meuse.names]
        y = training["zinc"]
        regr.fit(X, y)

        single_regr = self.stack_meuse.predict(regr)
        self.assertIsInstance(single_regr, Raster)
        self.assertEqual(single_regr.count, 1)

        # multi-target regression
        y = training.loc[:, ["zinc", "cadmium", "copper", "lead"]]
        regr.fit(X, y)
        multi_regr = self.stack_meuse.predict(regr)
        self.assertIsInstance(multi_regr, Raster)
        self.assertEqual(multi_regr.count, 4) 
示例11
def fit(self, losses, configs=None):

        if configs is None:
            configs = [[]]*len(times)

        # convert learning curves into X and y data

        X = []
        y = []

        for l,c in zip(losses, configs):
            l = self.apply_differencing(l)

            for i in range(self.order, len(l)):
                X.append(np.hstack([l[i-self.order:i], c]))
                y.append(l[i])

        self.X = np.array(X)
        self.y = np.array(y)


        self.rfr = rfr().fit(self.X,self.y) 
示例12
def extend_partial(self, obs_losses, num_steps, config=None):
        # TODO: add variance predictions
        if config is None:
            config = []

        d_losses = self.apply_differencing(obs_losses)


        for t in range(num_steps):
            x = np.hstack([d_losses[-self.order:], config])
            y = self.rfr.predict([x])
            d_losses = np.hstack([d_losses, y])


        prediction = self.invert_differencing( obs_losses, d_losses[-num_steps:])

        return(prediction) 
示例13
def test_random_forest_regressor(self):
        for dtype in self.number_data_type.keys():
            scikit_model = RandomForestRegressor(random_state=1)
            data = self.scikit_data["data"].astype(dtype)
            target = self.scikit_data["target"].astype(dtype)
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, "multiArrayType", "doubleType", 1)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(
                    scikit_model.predict(test_data)[0].dtype,
                    type(coreml_model.predict({"data": test_data})["target"]),
                )
                self.assertAlmostEqual(
                    scikit_model.predict(test_data)[0],
                    coreml_model.predict({"data": test_data})["target"],
                    msg="{} != {} for Dtype: {}".format(
                        scikit_model.predict(test_data)[0],
                        coreml_model.predict({"data": test_data})["target"],
                        dtype,
                    ),
                )
            except RuntimeError:
                print("{} not supported. ".format(dtype)) 
示例14
def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = RandomForestRegressor(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec, df, verbose=False)
            self._check_metrics(metrics, scikit_params) 
示例15
def test_smoke_regression_methods(regression_test_data, n_jobs):
    """Construct, fit, and predict on realistic problem.
    """
    xtrain = regression_test_data['x']
    ytrain = regression_test_data['y']

    rng = np.random.RandomState(17)
    est_list = [('lr', LinearRegression()),
                ('rf', RandomForestRegressor(random_state=rng,
                                             n_estimators=10)),
                ('nnls', NonNegativeLinearRegression())]
    sm = StackedRegressor(est_list, n_jobs=n_jobs)
    sm.fit(xtrain, ytrain)
    sm.predict(xtrain)
    sm.score(xtrain, ytrain)

    with pytest.raises(AttributeError):
        sm.predict_proba(xtrain) 
示例16
def test_smoke_multiout_regression_methods(n_jobs):
    """Construct, fit, and predict on realistic problem.
    """
    X, y = make_regression(random_state=7, n_samples=100, n_features=10,
                           n_informative=4, n_targets=2)

    rng = np.random.RandomState(17)
    est_list = [('lr', LinearRegression()),
                ('rf', RandomForestRegressor(random_state=rng,
                                             n_estimators=10)),
                ('metalr', LinearRegression())]
    sm = StackedRegressor(est_list, n_jobs=n_jobs)
    sm.fit(X, y)
    sm.predict(X)
    sm.score(X, y)

    with pytest.raises(AttributeError):
        sm.predict_proba(X) 
示例17
def test_integration_regression(regression_test_data, n_jobs):
    """Construct, fit, and predict on realistic problem. Compare goodness of
    fit of stacked model vs. individual base estimators.
    """
    xtrain = regression_test_data['x']
    ytrain = regression_test_data['y']
    xtest = regression_test_data['xtest']
    ytest = regression_test_data['ytest']

    sr = StackedRegressor([('rf', RandomForestRegressor(random_state=7,
                                                        n_estimators=10)),
                           ('lr', LinearRegression()),
                           ('metalr', NonNegativeLinearRegression())],
                          n_jobs=n_jobs)
    rf = RandomForestRegressor(random_state=7, n_estimators=10)
    lr = LinearRegression()
    sr_mse = fit_predict_measure_reg(sr, xtrain, ytrain, xtest, ytest)
    rf_mse = fit_predict_measure_reg(rf, xtrain, ytrain, xtest, ytest)
    lr_mse = fit_predict_measure_reg(lr, xtrain, ytrain, xtest, ytest)

    # Stacked regressor should perform better than its base estimators on this
    # data.
    assert sr_mse < rf_mse
    assert sr_mse < lr_mse
    assert sr_mse < 1.5    # Sanity check 
示例18
def __init__(self, params):
        super(RandomForestRegressorAlgorithm, self).__init__(params)
        logger.debug("RandomForestRegressorAlgorithm.__init__")

        self.library_version = sklearn.__version__
        self.trees_in_step = regression_additional.get("trees_in_step", 5)
        self.max_steps = regression_additional.get("max_steps", 3)
        self.early_stopping_rounds = regression_additional.get(
            "early_stopping_rounds", 50
        )
        self.model = RandomForestRegressor(
            n_estimators=self.trees_in_step,
            criterion=params.get("criterion", "mse"),
            max_features=params.get("max_features", 0.8),
            min_samples_split=params.get("min_samples_split", 4),
            warm_start=True,
            n_jobs=-1,
            random_state=params.get("seed", 1),
        ) 
示例19
def __init__(self, text, scores):
        self.text = text
        self.scores = scores
        self.feature_generator = FeatureGenerator()
        self.classifier = RandomForestRegressor(
            n_estimators=100,
            min_samples_split=4,
            min_samples_leaf=3,
            random_state=1
        )

        unique_scores = set(scores)
        if len(unique_scores) <= self.classification_max:
            self.classifier = RandomForestClassifier(
                n_estimators=100,
                min_samples_split=4,
                min_samples_leaf=3,
                random_state=1
            )

        self.fit_feats()
        self.fit_done = False 
示例20
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)
        clf = RandomForestRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf 
示例21
def fit_ensemble(x,y):
    fit_type = jhkaggle.jhkaggle_config['FIT_TYPE']
    if 1:
        if fit_type == jhkaggle.const.FIT_TYPE_BINARY_CLASSIFICATION:
            blend = SGDClassifier(loss="log", penalty="elasticnet")  # LogisticRegression()
        else:
            # blend = SGDRegressor()
            #blend = LinearRegression()
            #blend = RandomForestRegressor(n_estimators=10, n_jobs=-1, max_depth=5, criterion='mae')
            blend = LassoLarsCV(normalize=True)
            #blend = ElasticNetCV(normalize=True)
            #blend = LinearRegression(normalize=True)
        blend.fit(x, y)
    else:
        blend = LogisticRegression()
        blend.fit(x, y)


    return blend 
示例22
def run_sklearn():
  n_trees = 100
  n_folds = 3

  # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
  alg_list = [
      ['lreg',LinearRegression()],
      ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)],
      ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)],
      ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)],
      ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)]
  ]

  start_time = time.time()
  for name,alg in alg_list:
      train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
      train.run()
      train = None
  elapsed_time = time.time() - start_time
  print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time))) 
示例23
def fit_and_trade(data, cols, split, threshold):
    '''
    Fits and backtests a theoretical trading strategy
    '''
    data = data[data.width > 0]
    X = data[cols]
    y = data.mid30
    X_train = X.iloc[:split]
    X_test = X.iloc[split:]
    y_train = y.iloc[:split]
    y_test = y.iloc[split:]
    regressor = RandomForestRegressor(n_estimators=100,
                                      min_samples_leaf=500,
                                      random_state=42,
                                      n_jobs=-1)
    regressor.fit(X_train.values, y_train.values)
    trade(X_test.values, y_test.values, regressor, threshold) 
示例24
def create_sklearn_random_forest_regressor(X, y):
    rfr = ensemble.RandomForestRegressor(max_depth=4, random_state=777)
    model = rfr.fit(X, y)
    return model 
示例25
def fit(self, X, y):
		"""load the data in, initiate the models"""
		self.X = X
		self.y = y
		self.opt_XGBoost_reg = xgb.XGBRegressor(**self.opt_xgb_params)
		self.opt_forest_reg = RandomForestRegressor(**self.opt_rf_params)
		self.opt_svm_reg = SVR(**self.opt_svm_params)
		""" fit the models """
		self.opt_XGBoost_reg.fit(self.X ,self.y)
		self.opt_forest_reg.fit(self.X ,self.y)
		self.opt_svm_reg.fit(self.X ,self.y) 
示例26
def test_missforest_numerical_single():
    # Test imputation with default parameter values

    # Test with a single missing value
    df = np.array([
        [1,      0,      0,      1],
        [2,      1,      2,      2],
        [3,      2,      3,      2],
        [np.nan, 4,      5,      5],
        [6,      7,      6,      7],
        [8,      8,      8,      8],
        [16,     15,     18,    19],
    ])
    statistics_mean = np.nanmean(df, axis=0)

    y = df[:, 0]
    X = df[:, 1:]
    good_rows = np.where(~np.isnan(y))[0]
    bad_rows = np.where(np.isnan(y))[0]

    rf = RandomForestRegressor(n_estimators=10, random_state=1337)
    rf.fit(X=X[good_rows], y=y[good_rows])
    pred_val = rf.predict(X[bad_rows])

    df_imputed = np.array([
        [1,         0,      0,      1],
        [2,         1,      2,      2],
        [3,         2,      3,      2],
        [pred_val,  4,      5,      5],
        [6,         7,      6,      7],
        [8,         8,      8,      8],
        [16,        15,     18,    19],
    ])

    imputer = MissForest(n_estimators=10, random_state=1337)
    assert_array_equal(imputer.fit_transform(df), df_imputed)
    assert_array_equal(imputer.statistics_.get('col_means'), statistics_mean) 
示例27
def model_builder(model_dir):
  sklearn_model = RandomForestRegressor(n_estimators=500)
  return SklearnModel(sklearn_model, model_dir) 
示例28
def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      n_estimators=100, max_features=int(num_features/3),
      min_samples_split=5, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir) 
示例29
def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      n_estimators=100,
      max_features=int(num_features / 3),
      min_samples_split=5,
      n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir) 
示例30
def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      n_estimators=100, max_features=int(num_features/3),
      min_samples_split=5, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir)