Python源码示例:sklearn.ensemble.VotingClassifier()

示例1
def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    ereg = VotingRegressor([('dr', DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this method.")
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict_proba, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.transform, X)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor',
                         ereg.predict, X_r)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor',
                         ereg.transform, X_r) 
示例2
def test_parallel_fit():
    """Check parallel backend of VotingClassifier on toy dataset."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
    y = np.array([1, 1, 2, 2])

    eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft',
        n_jobs=1).fit(X, y)
    eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft',
        n_jobs=2).fit(X, y)

    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) 
示例3
def __init__(self, num_features, **kwargs):
        super(VotingClassifier, self).__init__()

        kwargs = {**constants.VOTING_CLASSIFIER_PARAMS, **kwargs}

        voting = kwargs.pop('voting')

        self.num_features = num_features

        estimators = []
        for clf in constants.CLASSIFIERS_FOR_ENSEMBLE:
            model = utils.init_model(clf, num_features=num_features, **kwargs)

            estimators.append((clf, model.kernel))

        # use as kernel the VotingClassifier coming from sklearn
        self.kernel = SKVotingClassifier(
            estimators=estimators, voting=voting, n_jobs=None
        ) 
示例4
def __init__(self, api, lobes=False):
        """
        lobes = a dict of classifiers to use in the VotingClassifier
            defaults to RandomForestClassifier and DecisionTreeClassifier
        """
        self.api = api
        if not lobes:
            lobes = {'rf': RandomForestClassifier(n_estimators=7,
                                                  random_state=666),
                     'dt': DecisionTreeClassifier()
                     }
        self.lobe = VotingClassifier(
            estimators=[(lobe, lobes[lobe]) for lobe in lobes],
            voting='hard',
            n_jobs=-1)
        self._trained = False
        self.split = splitTrainTestData
        self.prep = prepDataframe 
示例5
def test_voting_hard_binary(self):
        model = VotingClassifier(
            voting="hard",
            flatten_transform=False,
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", LogisticRegression(fit_intercept=False)),
            ],
        )
        # predict_proba is not defined when voting is hard.
        dump_binary_classification(
            model,
            suffix="Hard",
            comparable_outputs=[0],
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.5.0')",
            target_opset=TARGET_OPSET
        ) 
示例6
def test_voting_hard_binary_weights(self):
        model = VotingClassifier(
            voting="hard",
            flatten_transform=False,
            weights=numpy.array([1000, 1]),
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", LogisticRegression(fit_intercept=False)),
            ],
        )
        # predict_proba is not defined when voting is hard.
        dump_binary_classification(
            model,
            suffix="WeightsHard",
            comparable_outputs=[0],
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.5.0')",
            target_opset=TARGET_OPSET
        ) 
示例7
def test_voting_soft_binary(self):
        model = VotingClassifier(
            voting="soft",
            flatten_transform=False,
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", LogisticRegression(fit_intercept=False)),
            ],
        )
        dump_binary_classification(
            model,
            suffix="Soft",
            comparable_outputs=[0, 1],
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
            target_opset=TARGET_OPSET
        ) 
示例8
def test_voting_soft_binary_weighted(self):
        model = VotingClassifier(
            voting="soft",
            flatten_transform=False,
            weights=numpy.array([1.8, 0.2]),
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", LogisticRegression(fit_intercept=False)),
            ],
        )
        dump_binary_classification(
            model,
            suffix="WeightedSoft",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
            target_opset=TARGET_OPSET
        ) 
示例9
def test_voting_hard_multi(self):
        # predict_proba is not defined when voting is hard.
        model = VotingClassifier(
            voting="hard",
            flatten_transform=False,
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", DecisionTreeClassifier()),
            ],
        )
        dump_multiple_classification(
            model,
            suffix="Hard",
            comparable_outputs=[0],
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.5.0')",
            target_opset=TARGET_OPSET
        ) 
示例10
def test_voting_soft_multi(self):
        model = VotingClassifier(
            voting="soft",
            flatten_transform=False,
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", LogisticRegression()),
            ],
        )
        dump_multiple_classification(
            model,
            suffix="Soft",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
            target_opset=TARGET_OPSET
        ) 
示例11
def test_voting_soft_multi_string(self):
        model = VotingClassifier(
            voting="soft",
            flatten_transform=False,
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", LogisticRegression()),
            ],
        )
        dump_multiple_classification(
            model, label_string=True,
            suffix="Soft",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
            target_opset=TARGET_OPSET
        ) 
示例12
def test_voting_soft_multi_weighted(self):
        model = VotingClassifier(
            voting="soft",
            flatten_transform=False,
            weights=numpy.array([1.8, 0.2]),
            estimators=[
                ("lr", LogisticRegression()),
                ("lr2", LogisticRegression()),
            ],
        )
        dump_multiple_classification(
            model,
            suffix="WeightedSoft",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
            target_opset=TARGET_OPSET
        ) 
示例13
def test_voting_soft_multi_weighted4(self):
        model = VotingClassifier(
            voting="soft",
            flatten_transform=False,
            weights=numpy.array([2.7, 0.3, 0.5, 0.5]),
            estimators=[
                ("lr", LogisticRegression()),
                ("lra", LogisticRegression()),
                ("lrb", LogisticRegression()),
                ("lr2", LogisticRegression()),
            ],
        )
        dump_multiple_classification(
            model,
            suffix="Weighted4Soft",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
            target_opset=TARGET_OPSET
        ) 
示例14
def test_voting_soft_multi_weighted42(self):
        model = VotingClassifier(
            voting="soft",
            flatten_transform=False,
            weights=numpy.array([27, 0.3, 0.5, 0.5]),
            estimators=[
                ("lr", LogisticRegression()),
                ("lra", LogisticRegression()),
                ("lrb", LogisticRegression()),
                ("lr2", LogisticRegression()),
            ],
        )
        dump_multiple_classification(
            model,
            suffix="Weighted42Soft",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
            target_opset=TARGET_OPSET
        ) 
示例15
def test_parallel_fit():
    """Check parallel backend of VotingClassifier on toy dataset."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
    y = np.array([1, 1, 2, 2])

    eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft',
        n_jobs=1).fit(X, y)
    eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft',
        n_jobs=2).fit(X, y)

    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) 
示例16
def test_set_params():
    """set_params should be able to set estimators"""
    clf1 = LogisticRegression(random_state=123, C=1.0)
    clf2 = RandomForestClassifier(random_state=123, max_depth=None)
    clf3 = GaussianNB()
    eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
                             weights=[1, 2])
    eclf1.fit(X, y)
    eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
                             weights=[1, 2])
    eclf2.set_params(nb=clf2).fit(X, y)
    assert_false(hasattr(eclf2, 'nb'))

    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
    assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
    assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())

    eclf1.set_params(lr__C=10.0)
    eclf2.set_params(nb__max_depth=5)

    assert_true(eclf1.estimators[0][1].get_params()['C'] == 10.0)
    assert_true(eclf2.estimators[1][1].get_params()['max_depth'] == 5)
    assert_equal(eclf1.get_params()["lr__C"],
                 eclf1.get_params()["lr"].get_params()['C']) 
示例17
def test_estimator_init():
    eclf = VotingClassifier(estimators=[])
    msg = ('Invalid `estimators` attribute, `estimators` should be'
           ' a list of (string, estimator) tuples')
    assert_raise_message(AttributeError, msg, eclf.fit, X, y)

    clf = LogisticRegression(random_state=1)

    eclf = VotingClassifier(estimators=[('lr', clf)], voting='error')
    msg = ('Voting must be \'soft\' or \'hard\'; got (voting=\'error\')')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2])
    msg = ('Number of `estimators` and weights must be equal'
           '; got 2 weights, 1 estimators')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr', clf), ('lr', clf)],
                            weights=[1, 2])
    msg = "Names provided are not unique: ['lr', 'lr']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr__', clf)])
    msg = "Estimator names must not contain __: got ['lr__']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('estimators', clf)])
    msg = "Estimator names conflict with constructor arguments: ['estimators']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y) 
示例18
def test_predictproba_hardvoting():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='hard')
    msg = "predict_proba is not available when voting='hard'"
    assert_raise_message(AttributeError, msg, eclf.predict_proba, X) 
示例19
def test_majority_label_iris():
    """Check classification by majority label on dataset iris."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    eclf = VotingClassifier(estimators=[
                ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                voting='hard')
    scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
    assert_almost_equal(scores.mean(), 0.95, decimal=2) 
示例20
def test_tie_situation():
    """Check voting classifier selects smaller class label in tie situation."""
    clf1 = LogisticRegression(random_state=123, multi_class='ovr',
                              solver='liblinear')
    clf2 = RandomForestClassifier(random_state=123)
    eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)],
                            voting='hard')
    assert_equal(clf1.fit(X, y).predict(X)[73], 2)
    assert_equal(clf2.fit(X, y).predict(X)[73], 1)
    assert_equal(eclf.fit(X, y).predict(X)[73], 1) 
示例21
def test_predict_on_toy_problem():
    """Manually check predicted class labels for toy dataset."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()

    X = np.array([[-1.1, -1.5],
                  [-1.2, -1.4],
                  [-3.4, -2.2],
                  [1.1, 1.2],
                  [2.1, 1.4],
                  [3.1, 2.3]])

    y = np.array([1, 1, 1, 2, 2, 2])

    assert_equal(all(clf1.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
    assert_equal(all(clf2.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
    assert_equal(all(clf3.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))

    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='hard',
                            weights=[1, 1, 1])
    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))

    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='soft',
                            weights=[1, 1, 1])
    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) 
示例22
def test_multilabel():
    """Check if error is raised for multilabel classification."""
    X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                          allow_unlabeled=False,
                                          random_state=123)
    clf = OneVsRestClassifier(SVC(kernel='linear'))

    eclf = VotingClassifier(estimators=[('ovr', clf)], voting='hard')

    try:
        eclf.fit(X, y)
    except NotImplementedError:
        return 
示例23
def test_gridsearch():
    """Check GridSearch support."""
    clf1 = LogisticRegression(random_state=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    eclf = VotingClassifier(estimators=[
                ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                voting='soft')

    params = {'lr__C': [1.0, 100.0],
              'voting': ['soft', 'hard'],
              'weights': [[0.5, 0.5, 0.5], [1.0, 0.5, 0.5]]}

    grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
    grid.fit(iris.data, iris.target) 
示例24
def test_sample_weight_kwargs():
    """Check that VotingClassifier passes sample_weight as kwargs"""
    class MockClassifier(BaseEstimator, ClassifierMixin):
        """Mock Classifier to check that sample_weight is received as kwargs"""
        def fit(self, X, y, *args, **sample_weight):
            assert 'sample_weight' in sample_weight

    clf = MockClassifier()
    eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')

    # Should not raise an error.
    eclf.fit(X, y, sample_weight=np.ones((len(y),))) 
示例25
def test_set_params():
    """set_params should be able to set estimators"""
    clf1 = LogisticRegression(random_state=123, C=1.0)
    clf2 = RandomForestClassifier(random_state=123, max_depth=None)
    clf3 = GaussianNB()
    eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
                             weights=[1, 2])
    assert 'lr' in eclf1.named_estimators
    assert eclf1.named_estimators.lr is eclf1.estimators[0][1]
    assert eclf1.named_estimators.lr is eclf1.named_estimators['lr']
    eclf1.fit(X, y)
    assert 'lr' in eclf1.named_estimators_
    assert eclf1.named_estimators_.lr is eclf1.estimators_[0]
    assert eclf1.named_estimators_.lr is eclf1.named_estimators_['lr']

    eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
                             weights=[1, 2])
    eclf2.set_params(nb=clf2).fit(X, y)
    assert not hasattr(eclf2, 'nb')

    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
    assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
    assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())

    eclf1.set_params(lr__C=10.0)
    eclf2.set_params(nb__max_depth=5)

    assert eclf1.estimators[0][1].get_params()['C'] == 10.0
    assert eclf2.estimators[1][1].get_params()['max_depth'] == 5
    assert_equal(eclf1.get_params()["lr__C"],
                 eclf1.get_params()["lr"].get_params()['C']) 
示例26
def test_estimator_weights_format():
    # Test estimator weights inputs as list and array
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    eclf1 = VotingClassifier(estimators=[
                ('lr', clf1), ('rf', clf2)],
                weights=[1, 2],
                voting='soft')
    eclf2 = VotingClassifier(estimators=[
                ('lr', clf1), ('rf', clf2)],
                weights=np.array((1, 2)),
                voting='soft')
    eclf1.fit(X, y)
    eclf2.fit(X, y)
    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) 
示例27
def test_transform():
    """Check transform method of VotingClassifier on toy dataset."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()
    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
    y = np.array([1, 1, 2, 2])

    eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft').fit(X, y)
    eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft',
        flatten_transform=True).fit(X, y)
    eclf3 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft',
        flatten_transform=False).fit(X, y)

    assert_array_equal(eclf1.transform(X).shape, (4, 6))
    assert_array_equal(eclf2.transform(X).shape, (4, 6))
    assert_array_equal(eclf3.transform(X).shape, (3, 4, 2))
    assert_array_almost_equal(eclf1.transform(X),
                              eclf2.transform(X))
    assert_array_almost_equal(
            eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)),
            eclf2.transform(X)
    ) 
示例28
def log(msg):
  print("[%s] %s" % (time.asctime(), msg))

#-------------------------------------------------------------------------------
# The original VotingClassifier class uses np.bincount() with an array and
# annoyingly it will fail with a message like "cannot cast float64 to int64".
# 
示例29
def vote(self):
    log("Loading data...")
    self.X, self.y = self.load_data()
    estimators = []
    names = []
    for classifier, name, arg in ML_CLASSIFIERS:
      clf = classifier(arg)
      log("Creating model %s..." % (classifier.__name__))
      estimators.append([classifier.__name__, clf])
      names.append(name)

    log("Fitting data with VotingClassifier('hard')")
    self.clf = CPigaiosVotingClassifier(estimators=estimators, voting='hard', n_jobs=-1)
    self.clf.fit(self.X, self.y)

    log("Predicting...")
    self.predict()
    log("Saving model...")
    joblib.dump(self.clf, "clf.pkl")

    for clf, label in zip(estimators, names):
      try:
        scores = cross_val_score(clf, self.X, self.y, cv=5, scoring='accuracy')
        print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
      except:
        print("Error with", clf, ":", sys.exc_info()[1]) 
示例30
def getEstimator(scorer_type):
    if scorer_type == 'grad_boost':
        clf = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True)

    if scorer_type == 'svm1': # stochastic gradient decent classifier
        clf = svm.SVC(gamma=0.001, C=100., verbose=True)

    if scorer_type == 'logistic_regression' :
        clf = logistic.LogisticRegression()

    if scorer_type == 'svm3':
        clf = svm.SVC(kernel='poly', C=1.0, probability=True, class_weight='unbalanced')

    if scorer_type == "bayes":
        clf = naive_bayes.GaussianNB()

    if scorer_type == 'voting_hard_svm_gradboost_logistic':
        svm2 = svm.SVC(kernel='linear', C=1.0, probability=True, class_weight='balanced', verbose=True)
        log_reg = logistic.LogisticRegression()
        gradboost = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True)

        clf = VotingClassifier(estimators=[  # ('gb', gb),
            ('svm', svm2),
            ('grad_boost', gradboost),
            ('logisitc_regression', log_reg)
        ],  n_jobs=1,
            voting='hard')

    if scorer_type == 'voting_hard_bayes_gradboost':
        bayes = naive_bayes.GaussianNB()
        gradboost = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True)

        clf = VotingClassifier(estimators=[  # ('gb', gb),
            ('bayes', bayes),
            ('grad_boost', gradboost),
        ],  n_jobs=1,
            voting='hard')

    return clf