Python源码示例:sklearn.metrics.explained_variance_score()

示例1
def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score] 
示例2
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) 
示例3
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
示例4
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
示例5
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
示例6
def eval_metrics_on(predictions, labels):
    '''
    assuming this is a regression task; labels are continuous-valued floats
    
    returns most regression-related scores for the given predictions/targets as a dictionary:
    
        r2, mean_abs_error, mse, rmse, median_absolute_error, explained_variance_score
    '''
    if len(labels[0])==2: #labels is list of data/labels pairs
        labels = np.concatenate([l[1] for l in labels])
    predictions = predictions[:,0]
    
    r2                       = metrics.r2_score(labels, predictions)
    mean_abs_error           = np.abs(predictions - labels).mean()
    mse                      = ((predictions - labels)**2).mean()
    rmse                     = np.sqrt(mse)
    median_absolute_error    = metrics.median_absolute_error(labels, predictions) # robust to outliers
    explained_variance_score = metrics.explained_variance_score(labels, predictions) # best score = 1, lower is worse
    return {'r2':r2, 'mean_abs_error':mean_abs_error, 'mse':mse, 'rmse':rmse, 
            'median_absolute_error':median_absolute_error, 
            'explained_variance_score':explained_variance_score} 
示例7
def test(self):
        """
        Tests the regressor using the dataset and writes:
            1- coefficient r2
            2- explained variance
            3- mean absolute error
            4- mean squared error

        :return: None
        """
        X = np.array([precedent['facts_vector'][self.important_facts_index] for precedent in self.dataset])
        y_pred = self.model.predict(X)
        y_true = np.array([precedent['outcomes_vector'][self.outcome_index]
                           for precedent in self.dataset])
        r2 = metrics.r2_score(y_true, y_pred)
        variance = metrics.explained_variance_score(y_true, y_pred)
        mean_abs_error = metrics.mean_absolute_error(y_true, y_pred)
        mean_squared_error = metrics.mean_squared_error(y_true, y_pred)
        Log.write('R2: {0:.2f}'.format(r2))
        Log.write('Explained Variance: {0:.2f}'.format(variance))
        Log.write('Mean Absolute Error: {0:.2f}'.format(mean_abs_error))
        Log.write('Mean Squared Error: {0:.2f}'.format(mean_squared_error)) 
示例8
def test_explained_variance_score(self):

        try:
            from sklearn.metrics import explained_variance_score as skevs
        except:
            unittest.TestCase.skipTest(self, "sklearn is not found in the libraries")

        skevs_score1 = skevs(self.local_reg1.target, self.local_reg1.p_target)
        dlpyevs_score1 = explained_variance_score('target', 'p_target', castable=self.reg_table1)

        self.assertAlmostEqual(skevs_score1, dlpyevs_score1)

        skevs_score2 = skevs(self.local_reg1.target, self.local_reg2.p_target)
        dlpyevs_score2 = explained_variance_score(self.reg_table1.target, self.reg_table2.p_target,
                                                  id_vars='id1')

        self.assertAlmostEqual(skevs_score2, dlpyevs_score2) 
示例9
def grid_search_init_n_components(estimator, x, y, n_components_range=None, cv=10, n_jobs=-1,
                                  scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数,
    具体阅读AbuMLCreater._estimators_prarms_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_components_range: 默认None, None则会使用:
            n_estimators_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,回归器使用
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_components': 10})
    """
    if n_components_range is None:
        n_components_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    return grid_search_init_kwargs(estimator, x, y, 'n_components', n_components_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
示例10
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
示例11
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
示例12
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
示例13
def test_multi(self):
    if os.path.isdir('sad/testm'):
      shutil.rmtree('sad/testm')

    sad_opts = '--rc --shifts "0,21"'
    sad_opts += ' -o sad/testm -q "" -p 4'

    cmd = 'basenji_sad_multi.py %s %s %s %s' % \
        (sad_opts, self.params_file, self.model_file, self.vcf_file)
    subprocess.call(cmd, shell=True)

    saved_h5 = h5py.File('sad/saved/sad.h5', 'r')
    this_h5 = h5py.File('sad/testm/sad.h5', 'r')

    saved_keys = sorted(saved_h5.keys())
    this_keys = sorted(this_h5.keys())
    assert(len(saved_keys) == len(this_keys))
    assert(saved_keys == this_keys)

    for key in saved_h5:
      if key[-4:] != '_pct':
        saved_value = saved_h5[key][:]
        this_value = this_h5[key][:]

        if saved_value.dtype.char == 'S':
          np.testing.assert_array_equal(saved_value, this_value)
        else:
          np.testing.assert_allclose(saved_value, this_value, atol=1e-1, rtol=5e-2)
          r2 = explained_variance_score(saved_value.flatten(), this_value.flatten())
          assert(r2 > 0.999)

    saved_h5.close()
    this_h5.close()

    shutil.rmtree('sad/testm') 
示例14
def test_multi(self):
    if os.path.isdir('sad/testrm'):
      shutil.rmtree('sad/testrm')

    sad_opts = '--rc --shifts "0,21"'
    sad_opts += ' -o sad/testrm -q "" -p 4'
    cmd = 'basenji_sad_ref_multi.py %s %s %s %s' % \
        (sad_opts, self.params_file, self.model_file, self.vcf_file)
    subprocess.call(cmd, shell=True)

    saved_h5 = h5py.File('sad/saved/sadr.h5', 'r')
    this_h5 = h5py.File('sad/testrm/sad.h5', 'r')

    saved_keys = sorted(saved_h5.keys())
    this_keys = sorted(this_h5.keys())
    assert(len(saved_keys) == len(this_keys))
    assert(saved_keys == this_keys)

    for key in saved_h5:
      if key[-4:] != '_pct':
        saved_value = saved_h5[key][:]
        this_value = this_h5[key][:]

        if saved_value.dtype.char == 'S':
          assert((saved_value == this_value).all())
          np.testing.assert_array_equal(saved_value, this_value)
        else:
          np.testing.assert_allclose(saved_value, this_value, atol=2e-1, rtol=2e-1)
          r2 = explained_variance_score(saved_value.flatten(), this_value.flatten())
          assert(r2 > 0.999)

    saved_h5.close()
    this_h5.close()

    shutil.rmtree('sad/testrm') 
示例15
def _calculate_optimal_reconstruction_orders(self,
                                                 timeseries,
                                                 components):
        '''Calculates the optimal component ordering for reconstructing
        each of the timeseries. This is done by simply ranking the components
        in terms of how much variance they explain for each timeseries in the
        original data.
        '''

        optimal_orders = optimal_component_ordering(
            timeseries,
            components
        )

        optimal_orders = optimal_orders.astype(int)

        order_explained_variance = np.zeros_like(optimal_orders).astype(float)
        for ts_idx in range(timeseries.shape[1]):
            ts_comp = components[ts_idx, :, :]
            ts_comp = ts_comp[:, optimal_orders[:, ts_idx]]
            # ts_comp = np.cumsum(ts_comp, axis=1)

            order_explained_variance[:, ts_idx] = np.apply_along_axis(
                partial(explained_variance_score, timeseries[:, ts_idx]),
                0,
                ts_comp
            )

        return optimal_orders, order_explained_variance 
示例16
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-")
        in machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics
    ) 
示例17
def score(
        self,
        X: Union[np.ndarray, pd.DataFrame],
        y: Union[np.ndarray, pd.DataFrame],
        sample_weight: Optional[np.ndarray] = None,
    ) -> float:
        """
        Returns the explained variance score between auto encoder's input vs output

        Parameters
        ----------
        X: Union[np.ndarray, pd.DataFrame]
            Input data to the model
        y: Union[np.ndarray, pd.DataFrame]
            Target
        sample_weight: Optional[np.ndarray]
            sample weights

        Returns
        -------
        score: float
            Returns the explained variance score
        """
        if not hasattr(self, "model"):
            raise NotFittedError(
                f"This {self.__class__.__name__} has not been fitted yet."
            )

        out = self.model.predict(X)

        return explained_variance_score(y, out) 
示例18
def score(
        self,
        X: Union[np.ndarray, pd.DataFrame],
        y: Union[np.ndarray, pd.DataFrame],
        sample_weight: Optional[np.ndarray] = None,
    ) -> float:
        """
        Returns the explained variance score between 1 step forecasted input and true
        input at next time step (note: for LSTM X is offset by `lookback_window`).

        Parameters
        ----------
        X: Union[np.ndarray, pd.DataFrame]
            Input data to the model.
        y: Union[np.ndarray, pd.DataFrame]
            Target
        sample_weight: Optional[np.ndarray]
            Sample weights

        Returns
        -------
        score: float
            Returns the explained variance score.
        """
        if not hasattr(self, "model"):
            raise NotFittedError(
                f"This {self.__class__.__name__} has not been fitted yet."
            )

        out = self.predict(X)

        # Limit X samples to match the offset causes by LSTM lookback window
        # ie, if look back window is 5, 'out' will be 5 rows less than X by now
        return explained_variance_score(y[-len(out) :], out) 
示例19
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(max_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) 
示例20
def compute(labels, pred_scores):
        return explained_variance_score(labels, pred_scores) 
示例21
def _score_explained_variance(self):
        return explained_variance_score(self._y_test, self._y_predicted) 
示例22
def test_explained_variance_score(self):
        result = self.df.metrics.explained_variance_score()
        expected = metrics.explained_variance_score(self.target, self.pred)
        self.assertEqual(result, expected) 
示例23
def _scoring_grid(estimator, scoring):
    """
    只针对有监督学习过滤无监督学习,对scoring未赋予的情况根据
    学习器分类器使用accuracy进行度量,回归器使用可释方差值explained_variance_score,
    使用make_scorer对函数进行score封装

    :param estimator: 学习器对象
    :param scoring: 度量使用的方法,未赋予的情况根据
                    学习器分类器使用accuracy进行度量,回归器使用explained_variance_score进行度量
    :return: scoring
    """

    if not isinstance(estimator, (ClassifierMixin, RegressorMixin)):
        logging.info('only support supervised learning')
        # TODO 无监督学习的scoring度量以及GridSearchCV
        return None

    if scoring is None:
        if isinstance(estimator, ClassifierMixin):
            # 分类器使用accuracy
            return 'accuracy'
        elif isinstance(estimator, RegressorMixin):
            # 回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
            """
                make_scorer中通过greater_is_better对返回值进行正负分配
                eg: sign = 1 if greater_is_better else -1
            """
            return make_scorer(explained_variance_score, greater_is_better=True)
        return None
    return scoring 
示例24
def grid_search_init_n_estimators(estimator, x, y, n_estimators_range=None, cv=10, n_jobs=-1,
                                  scoring=None, show=True):
    """
    封装grid search特定的'n_estimators'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数,

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.random_forest_classifier_best()

    eg:
        from abupy import AbuML, ml
        ttn_abu = AbuML.create_test_more_fiter()
        ttn_abu.estimator.random_forest_classifier()
        ml.grid_search_init_n_estimators(ttn_abu.estimator.clf, ttn_abu.x, ttn_abu.y)

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_estimators_range: 默认None, None则会使用 n_estimators_range = np.arange(50, 500, 10)
    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_estimators': 310})
    """

    if n_estimators_range is None:
        n_estimators_range = np.arange(50, 500, 10)

    return grid_search_init_kwargs(estimator, x, y, 'n_estimators', n_estimators_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
示例25
def grid_search_init_max_depth(estimator, x, y, max_depth_range=None, cv=10, n_jobs=-1,
                               scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.decision_tree_classifier_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param max_depth_range: 默认None, None则会使用:
            max_depth_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'max_depth': 3})
    """

    if max_depth_range is None:
        max_depth_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    return grid_search_init_kwargs(estimator, x, y, 'max_depth', max_depth_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
示例26
def grid_search_init_n_neighbors(estimator, x, y, n_neighbors_range=None, cv=10, n_jobs=-1,
                                 scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.knn_classifier_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_neighbors_range: 默认None, None则会使用:
            n_estimators_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_components': 10})
    """

    if n_neighbors_range is None:
        # 邻居投票者控制在1-np.minimum(26, 总数的1/3)
        n_neighbors_range = np.arange(1, np.minimum(26, int(x.shape[0] / 3)), 1)

    return grid_search_init_kwargs(estimator, x, y, 'n_neighbors', n_neighbors_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
示例27
def plot_learning_curve(model, err_func=explained_variance_score, N=300, n_runs=10, n_sizes=50, ylim=None):
    sizes = np.linspace(5, N, n_sizes).astype(int)
    train_err = np.zeros((n_runs, n_sizes))
    validation_err = np.zeros((n_runs, n_sizes))
    for i in range(n_runs):
        for j, size in enumerate(sizes):
            xtrain, xtest, ytrain, ytest = train_test_split(
                X, y, train_size=size, random_state=i)
            # Train on only the first `size` points
            model.fit(xtrain, ytrain)
            validation_err[i, j] = err_func(ytest, model.predict(xtest))
            train_err[i, j] = err_func(ytrain, model.predict(xtrain))

    plt.plot(sizes, validation_err.mean(axis=0), lw=2, label='validation')
    plt.plot(sizes, train_err.mean(axis=0), lw=2, label='training')

    plt.xlabel('traning set size')
    plt.ylabel(err_func.__name__.replace('_', ' '))
    
    plt.grid(True)
    
    plt.legend(loc=0)
    
    plt.xlim(0, N-1)
    
    if ylim:
        plt.ylim(ylim) 
示例28
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) 
示例29
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.]) 
示例30
def test_regression_multioutput_array():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')

    assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2)
    assert_array_almost_equal(mae, [0.25, 0.625], decimal=2)
    assert_array_almost_equal(r, [0.95, 0.93], decimal=2)
    assert_array_almost_equal(evs, [0.95, 0.93], decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    y_true = [[0, 0]]*4
    y_pred = [[1, 1]]*4
    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(mse, [1., 1.], decimal=2)
    assert_array_almost_equal(mae, [1., 1.], decimal=2)
    assert_array_almost_equal(r, [0., 0.], decimal=2)

    r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values')
    assert_array_almost_equal(r, [0, -3.5], decimal=2)
    assert_equal(np.mean(r), r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                 multioutput='uniform_average'))
    evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                                   multioutput='raw_values')
    assert_array_almost_equal(evs, [0, -1.25], decimal=2)

    # Checking for the condition in which both numerator and denominator is
    # zero.
    y_true = [[1, 3], [-1, 2]]
    y_pred = [[1, 4], [-1, 1]]
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(r2, [1., -3.], decimal=2)
    assert_equal(np.mean(r2), r2_score(y_true, y_pred,
                 multioutput='uniform_average'))
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(evs, [1., -3.], decimal=2)
    assert_equal(np.mean(evs), explained_variance_score(y_true, y_pred))

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput='raw_values')
    assert_array_almost_equal(msle, msle2, decimal=2)