Python源码示例:sklearn.ensemble.AdaBoostRegressor()

示例1
def Train(data, modelcount, censhu, yanzhgdata):
    model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=censhu),
                              n_estimators=modelcount, learning_rate=0.8)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
示例2
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target) 
示例3
def test_sample_weight_adaboost_regressor():
    """
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    """
    class DummyEstimator(BaseEstimator):

        def fit(self, X, y):
            pass

        def predict(self, X):
            return np.zeros(X.shape[0])

    boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
    boost.fit(X, y_regr)
    assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_)) 
示例4
def test_multidimensional_X():
    """
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    """

    from sklearn.dummy import DummyClassifier, DummyRegressor

    rng = np.random.RandomState(0)

    X = rng.randn(50, 3, 3)
    yc = rng.choice([0, 1], 50)
    yr = rng.randn(50)

    boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent'))
    boost.fit(X, yc)
    boost.predict(X)
    boost.predict_proba(X)

    boost = AdaBoostRegressor(DummyRegressor())
    boost.fit(X, yr)
    boost.predict(X) 
示例5
def run_sklearn():
  n_trees = 100
  n_folds = 3

  # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
  alg_list = [
      ['lreg',LinearRegression()],
      ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)],
      ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)],
      ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)],
      ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)]
  ]

  start_time = time.time()
  for name,alg in alg_list:
      train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
      train.run()
      train = None
  elapsed_time = time.time() - start_time
  print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time))) 
示例6
def sample_1031_4():
    """
    10.3.1_4 猪老三使用回归预测股价:使用集成学习算法预测股价AdaBoost与RandomForest
    :return:
    """
    train_x, train_y_regress, train_y_classification, pig_three_feature, \
    test_x, test_y_regress, test_y_classification, kl_another_word_feature_test = sample_1031_1()

    # AdaBoost
    from sklearn.ensemble import AdaBoostRegressor

    estimator = AdaBoostRegressor(n_estimators=100)
    regress_process(estimator, train_x, train_y_regress, test_x,
                    test_y_regress)
    plt.show()
    # RandomForest
    from sklearn.ensemble import RandomForestRegressor

    estimator = RandomForestRegressor(n_estimators=100)
    regress_process(estimator, train_x, train_y_regress, test_x, test_y_regress)
    plt.show() 
示例7
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target) 
示例8
def test_sample_weight_adaboost_regressor():
    """
    AdaBoostRegressor should work without sample_weights in the base estimator

    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    """
    class DummyEstimator(BaseEstimator):

        def fit(self, X, y):
            pass

        def predict(self, X):
            return np.zeros(X.shape[0])

    boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
    boost.fit(X, y_regr)
    assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_)) 
示例9
def Adaboost_First(self, data, max_depth=5, n_estimators=320):
        model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                                  n_estimators=n_estimators, learning_rate=0.8)
        model.fit(data['train'][:, :-1], data['train'][:, -1])
        # 注意存储验证数据集结果和预测数据集结果的不同
        # 训练数据集的预测结果
        xul = model.predict(data['train'][:, :-1])
        # 验证的预测结果
        yanre = model.predict(data['test'][:, :-1])
        # 预测的预测结果
        prer = model.predict(data['predict'][:, :-1])
        # 储存
        self.yanzhneg_pr.append(yanre)
        self.predi.append(prer)
        # 分别计算训练、验证、预测的误差
        # 每计算一折后,要计算训练、验证、预测数据的误差
        xx = self.RMSE(xul, data['train'][:, -1])
        yy = self.RMSE(yanre, data['test'][:, -1])
        pp = self.RMSE(prer, data['predict'][:, -1])
        # 储存误差
        self.error_dict['AdaBoost'] = [xx, yy, pp]
        # 验证数据集的真实输出结果
        self.yanzhneg_real = data['test'][:, -1]

        # 预测数据集的真实输出结果
        self.preal = data['predict'][:, -1]
        return print('1层中的AdaBoost运行完毕')

    # GBDT 
示例10
def recspre(exstr, predata, datadict, zhe, count=100):
    tree, te = exstr.split('-')
    model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=int(te)),
                              n_estimators=int(tree), learning_rate=0.8)
    model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])

    # 预测
    yucede = model.predict(predata[:, :-1])
    # 为了便于展示,选100条数据进行展示
    zongleng = np.arange(len(yucede))
    randomnum = np.random.choice(zongleng, count, replace=False)

    yucede_se = list(np.array(yucede)[randomnum])

    yuce_re = list(np.array(predata[:, -1])[randomnum])

    # 对比
    plt.figure(figsize=(17, 9))
    plt.subplot(2, 1, 1)
    plt.plot(list(range(len(yucede_se))), yucede_se, 'r--', label='预测', lw=2)
    plt.scatter(list(range(len(yuce_re))), yuce_re, c='b', marker='.', label='真实', lw=2)
    plt.xlim(-1, count + 1)
    plt.legend()
    plt.title('预测和真实值对比[最大树数%d]' % int(tree))

    plt.subplot(2, 1, 2)
    plt.plot(list(range(len(yucede_se))), np.array(yuce_re) - np.array(yucede_se), 'k--', marker='s', label='真实-预测', lw=2)
    plt.legend()
    plt.title('预测和真实值相对误差')

    plt.savefig(r'C:\Users\GWT9\Desktop\duibi.jpg')
    return '预测真实对比完毕'

# 主函数 
示例11
def test_regression_toy():
    # Check classification on a toy dataset.
    clf = AdaBoostRegressor(random_state=0)
    clf.fit(X, y_regr)
    assert_array_equal(clf.predict(T), y_t_regr) 
示例12
def test_boston():
    # Check consistency on dataset boston house prices.
    reg = AdaBoostRegressor(random_state=0)
    reg.fit(boston.data, boston.target)
    score = reg.score(boston.data, boston.target)
    assert score > 0.85

    # Check we used multiple estimators
    assert len(reg.estimators_) > 1
    # Check for distinct random states (see issue #7408)
    assert_equal(len(set(est.random_state for est in reg.estimators_)),
                 len(reg.estimators_)) 
示例13
def test_pickle():
    # Check pickability.
    import pickle

    # Adaboost classifier
    for alg in ['SAMME', 'SAMME.R']:
        obj = AdaBoostClassifier(algorithm=alg)
        obj.fit(iris.data, iris.target)
        score = obj.score(iris.data, iris.target)
        s = pickle.dumps(obj)

        obj2 = pickle.loads(s)
        assert_equal(type(obj2), obj.__class__)
        score2 = obj2.score(iris.data, iris.target)
        assert_equal(score, score2)

    # Adaboost regressor
    obj = AdaBoostRegressor(random_state=0)
    obj.fit(boston.data, boston.target)
    score = obj.score(boston.data, boston.target)
    s = pickle.dumps(obj)

    obj2 = pickle.loads(s)
    assert_equal(type(obj2), obj.__class__)
    score2 = obj2.score(boston.data, boston.target)
    assert_equal(score, score2) 
示例14
def test_sample_weight_missing():
    from sklearn.cluster import KMeans

    clf = AdaBoostClassifier(KMeans(), algorithm="SAMME")
    assert_raises(ValueError, clf.fit, X, y_regr)

    clf = AdaBoostRegressor(KMeans())
    assert_raises(ValueError, clf.fit, X, y_regr) 
示例15
def setClf(self):
#         min_samples_split = 3
        self.clf = AdaBoostRegressor()
        return 
示例16
def __init__(self, options):
        self.handle_options(options)
        params = options.get('params', {})
        out_params = convert_params(
            params,
            strs=['loss', 'max_features'],
            floats=['learning_rate'],
            ints=['n_estimators'],
        )

        self.estimator = _AdaBoostRegressor(**out_params) 
示例17
def test_reg_engineer(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate problem with `BayesianOptPro` specifically - same configuration is fine with all
    other `OptPro`s"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(),
        feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go() 
示例18
def test_reg_engineer_integer_ok(env_boston_regression, hh_assets, opt_pro):
    """Identical to `test_reg_engineer`, except `Integer` dimension added to show that everything is
    fine now. Problem limited to not only `BayesianOptPro`, but also exclusively `Categorical`
    search spaces"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(n_estimators=Integer(10, 40)),
        feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go() 
示例19
def test_reg_engineer_categorical(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate that `BayesianOptPro` breaks with multiple `Categorical`s when `FeatureEngineer`
    is included in the dimensions"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(loss=Categorical(["linear", "square", "exponential"])),
        feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go() 
示例20
def test_reg_engineer_categorical_integer_ok(env_boston_regression, hh_assets, opt_pro):
    """Identical to `test_reg_engineer_categorical`, except `Integer` added to demonstrate that all
    `OptPro`s can optimize with `FeatureEngineer` if space is not exclusively `Categorical`"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(
            loss=Categorical(["linear", "square", "exponential"]), n_estimators=Integer(10, 40)
        ),
        feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
    )
    opt.go() 
示例21
def test_reg_categorical_ok(env_boston_regression, hh_assets, opt_pro):
    """Demonstrate that all `OptPro`s are fine with exclusively-`Categorical` space that doesn't
    include `FeatureEngineer`"""
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor,
        model_init_params=dict(loss=Categorical(["linear", "square", "exponential"])),
    )
    opt.go() 
示例22
def test_reg_integer_ok(env_boston_regression, hh_assets, opt_pro):
    opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
    opt.forge_experiment(
        model_initializer=AdaBoostRegressor, model_init_params=dict(n_estimators=Integer(10, 40))
    )
    opt.go() 
示例23
def exp_lambda_cb(lambda_cbs):
    """Return a `CVExperiment` with `lambda_cbs` as `callbacks`

    Parameters
    ----------
    lambda_cbs: `LambdaCallback`, list of `LambdaCallback`, or None
        LambdaCallback values passed to the `CVExperiment`'s `callbacks` kwarg"""
    return CVExperiment(AdaBoostRegressor, callbacks=lambda_cbs)


##################################################
# Dummy LambdaCallbacks
################################################## 
示例24
def get_ensemble_models():
    rf = RandomForestRegressor(
        n_estimators=51, min_samples_leaf=5, min_samples_split=3, random_state=42,
        n_jobs=int(0.8*n_cores))
    bag = BaggingRegressor(n_estimators=51, random_state=42, n_jobs=int(0.8*n_cores))
    extra = ExtraTreesRegressor(n_estimators=71, random_state=42, n_jobs=int(0.8*n_cores))
    ada = AdaBoostRegressor(random_state=42)
    grad = GradientBoostingRegressor(n_estimators=101, random_state=42)
    classifier_list = [rf, bag, extra, ada, grad]
    classifier_name_list = ['Random Forests', 'Bagging',
                            'Extra Trees', 'AdaBoost', 'Gradient Boost']
    return classifier_list, classifier_name_list 
示例25
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.ensemble.AdaBoostClassifier,
                      ensemble.AdaBoostClassifier)
        self.assertIs(df.ensemble.AdaBoostRegressor,
                      ensemble.AdaBoostRegressor)
        self.assertIs(df.ensemble.BaggingClassifier,
                      ensemble.BaggingClassifier)
        self.assertIs(df.ensemble.BaggingRegressor,
                      ensemble.BaggingRegressor)
        self.assertIs(df.ensemble.ExtraTreesClassifier,
                      ensemble.ExtraTreesClassifier)
        self.assertIs(df.ensemble.ExtraTreesRegressor,
                      ensemble.ExtraTreesRegressor)

        self.assertIs(df.ensemble.GradientBoostingClassifier,
                      ensemble.GradientBoostingClassifier)
        self.assertIs(df.ensemble.GradientBoostingRegressor,
                      ensemble.GradientBoostingRegressor)

        self.assertIs(df.ensemble.IsolationForest,
                      ensemble.IsolationForest)

        self.assertIs(df.ensemble.RandomForestClassifier,
                      ensemble.RandomForestClassifier)
        self.assertIs(df.ensemble.RandomTreesEmbedding,
                      ensemble.RandomTreesEmbedding)
        self.assertIs(df.ensemble.RandomForestRegressor,
                      ensemble.RandomForestRegressor)

        self.assertIs(df.ensemble.VotingClassifier,
                      ensemble.VotingClassifier) 
示例26
def adaboost_regressor(self, assign=True, base_estimator=DecisionTreeRegressor(), **kwargs):
        """
        有监督学习回归器,实例化AdaBoostRegressor,默认使用:
            AdaBoostRegressor(base_estimator=base_estimator, n_estimators=100, random_state=1)

        通过**kwargs即关键字参数透传AdaBoostRegressor,即:
            AdaBoostRegressor(**kwargs)

        :param base_estimator: 默认使用DecisionTreeRegressor()
        :param assign: 是否保存实例后的AdaBoostRegressor对象,默认True,self.reg = reg
        :param kwargs: 有参数情况下初始化: AdaBoostRegressor(**kwargs)
                       无参数情况下初始化: AdaBoostRegressor(n_estimators=100, random_state=1)

        :return: 实例化的AdaBoostRegressor对象
        """
        if kwargs is not None and len(kwargs) > 0:
            if 'base_estimator' not in kwargs:
                kwargs['base_estimator'] = base_estimator
            reg = AdaBoostRegressor(**kwargs)
        else:
            reg = AdaBoostRegressor(base_estimator=base_estimator, n_estimators=100, random_state=1)

        if assign:
            self.reg = reg

        return reg 
示例27
def adaboost_regressor_best(self, x, y, param_grid=None, assign=True, n_jobs=-1, show=True):
        """
        寻找AdaBoostRegressor构造器的最优参数

        上层AbuML中adaboost_regressor_best函数,直接使用AbuML中的x,y数据调用
        eg:
            adaboost_regressor_best无param_grid参数调用:

            from abupy import AbuML, ml
            ttn_abu = AbuML.create_test_more_fiter()
            ttn_abu.adaboost_regressor_best()

            adaboost_classifier_best有param_grid参数调用:

            param_grid = {'learning_rate': np.arange(0.2, 1.2, 0.2), 'n_estimators': np.arange(10, 100, 10)}
            ttn_abu.adaboost_regressor_best(param_grid=param_grid, n_jobs=-1)

            out: AdaBoostRegressor(learning_rate=0.8, n_estimators=40)


        :param x: 训练集x矩阵,numpy矩阵
        :param y: 训练集y序列,numpy序列
        :param param_grid: 最优字典关键字参数,
                        eg:param_grid = {'learning_rate': np.arange(0.2, 1.2, 0.2),
                                         'n_estimators': np.arange(10, 100, 10)}
        :param assign: 是否保存实例化后最优参数的学习器对象,默认True
        :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
        :param show: 是否可视化最优参数搜索结果
        :return: 通过最优参数构造的AdaBoostRegressor对象
        """
        return self._estimators_prarms_best(self.adaboost_regressor, x, y, param_grid, assign, n_jobs, show) 
示例28
def __init__(self, isTrain):
        super(RegressionAdaBoost, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create AdaBoost regression object
        decisionReg = DecisionTreeRegressor(max_depth=10)
        rng = np.random.RandomState(1)
        self.adaReg = AdaBoostRegressor(decisionReg,
                          n_estimators=400,
                          random_state=rng) 
示例29
def test_regression_toy():
    # Check classification on a toy dataset.
    clf = AdaBoostRegressor(random_state=0)
    clf.fit(X, y_regr)
    assert_array_equal(clf.predict(T), y_t_regr) 
示例30
def test_boston():
    # Check consistency on dataset boston house prices.
    reg = AdaBoostRegressor(random_state=0)
    reg.fit(boston.data, boston.target)
    score = reg.score(boston.data, boston.target)
    assert score > 0.85

    # Check we used multiple estimators
    assert_true(len(reg.estimators_) > 1)
    # Check for distinct random states (see issue #7408)
    assert_equal(len(set(est.random_state for est in reg.estimators_)),
                 len(reg.estimators_))