Python源码示例:sklearn.metrics.mean_absolute_error()

示例1
def proxy_a_distance(source_X, target_X):
    """
    Compute the Proxy-A-Distance of a source/target representation
    """
    nb_source = np.shape(source_X)[0]
    nb_target = np.shape(target_X)[0]

    train_X = np.vstack((source_X, target_X))
    train_Y = np.hstack((np.zeros(nb_source, dtype=int),
                         np.ones(nb_target, dtype=int)))

    clf = svm.LinearSVC(random_state=0)
    clf.fit(train_X, train_Y)
    y_pred = clf.predict(train_X)
    error = metrics.mean_absolute_error(train_Y, y_pred)
    dist = 2 * (1 - 2 * error)
    return dist 
示例2
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
    """
    Given a trained estimator, calculate metrics.

    Args:
        trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
        y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
        x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)

    Returns:
        dict: A dictionary of metrics objects
    """
    # Get predictions
    predictions = trained_sklearn_estimator.predict(x_test)

    # Calculate individual metrics
    mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
    mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)

    result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}

    return result 
示例3
def score_regression(y, y_hat, report=True):
    """
    Create regression score
    :param y:
    :param y_hat:
    :return:
    """
    r2 = r2_score(y, y_hat)
    rmse = sqrt(mean_squared_error(y, y_hat))
    mae = mean_absolute_error(y, y_hat)

    report_string = "---Regression Score--- \n"
    report_string += "R2 = " + str(r2) + "\n"
    report_string += "RMSE = " + str(rmse) + "\n"
    report_string += "MAE = " + str(mae) + "\n"

    if report:
        print(report_string)

    return mae, report_string 
示例4
def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score] 
示例5
def test_experiment_cat_custom_eval(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'max_depth': 8,
        'num_boost_round': 100,
        'eval_metric': 'MAE'
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name,
                            algorithm_type='cat', eval_func=mean_absolute_error)

    assert mean_absolute_error(y_train, result.oof_prediction) == result.metrics[-1]
    _check_file_exists(tmpdir_name) 
示例6
def test_multioutput_regression():
    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
    y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])

    error = mean_squared_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = mean_squared_log_error(y_true, y_pred)
    assert_almost_equal(error, 0.200, decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    error = mean_absolute_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = r2_score(y_true, y_pred, multioutput='variance_weighted')
    assert_almost_equal(error, 1. - 5. / 2)
    error = r2_score(y_true, y_pred, multioutput='uniform_average')
    assert_almost_equal(error, -.875) 
示例7
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) 
示例8
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
示例9
def test_trigonometric():
    """Check that using trig functions work and that results differ"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(population_size=100, generations=2,
                             function_set=['add', 'sub', 'mul', 'div',
                                           'sin', 'cos', 'tan'],
                             random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert(abs(est1 - est2) > 0.01) 
示例10
def test_subsample():
    """Check that subsample work and that results differ"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             max_samples=1.0, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(population_size=100, generations=2,
                             max_samples=0.5, random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert(abs(est1 - est2) > 0.01) 
示例11
def test_parsimony_coefficient():
    """Check that parsimony coefficients work and that results differ"""

    est1 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient=0.001, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
                               boston.target[400:])

    est2 = SymbolicRegressor(population_size=100, generations=2,
                             parsimony_coefficient='auto', random_state=0)
    est2.fit(boston.data[:400, :], boston.target[:400])
    est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
                               boston.target[400:])

    assert(abs(est1 - est2) > 0.01) 
示例12
def pla(data, period=15):
    N = int(len(data)/period)
    orig_x = range(0,len(data))
    tck = splrep(orig_x, data,s=0)
    test_xs = np.linspace(0,len(data),N)
    spline_ys = splev(test_xs, tck)
    spline_yps = splev(test_xs, tck, der=1)
    xi = np.unique(tck[0])
    yi = [[splev(x, tck, der=j) for j in xrange(3)] for x in xi]
    P = interpolate.PiecewisePolynomial(xi,yi,orders=1)
    test_ys = P(test_xs)
    #inter_y = interp0(test_xs, test_ys, orig_x)
    inter_y = interp1(test_xs, test_ys, orig_x)
    
    mae = sqrt(mean_absolute_error(inter_y, data))
    #       mae = np.var(inter_y-data)
    return mae

#def paa(data, period=15): 
示例13
def paa(data, period=15):
    numCoeff = int(len(data)/period)
    data = data[:numCoeff*period]
    data = data[:int(len(data)/numCoeff)*numCoeff]
    origData = deepcopy(data)
    N = len(data)
    segLen = int(N/numCoeff)
    sN = np.reshape(data, (numCoeff, segLen))
    g = lambda data: np.mean(data)
    #       avg = np.mean(sN)
    avg = map(g,sN)
    data = np.matlib.repmat(avg, segLen, 1)
    data = data.ravel(order='F')
#       plt.plot(data)
#       plt.plot(origData)
#       plt.show()
#rmse = sqrt(mean_squared_error(data, origData))
    mae = sqrt(mean_absolute_error(data, origData))
#       mae = np.var(origData-data)
    return mae 
示例14
def pla(data, period=15):
    N = int(len(data)/period)
    orig_x = range(0,len(data))
    tck = splrep(orig_x, data,s=0)
    test_xs = np.linspace(0,len(data),N)
    spline_ys = splev(test_xs, tck)
    spline_yps = splev(test_xs, tck, der=1)
    xi = np.unique(tck[0])
    yi = [[splev(x, tck, der=j) for j in xrange(3)] for x in xi]
    P = interpolate.PiecewisePolynomial(xi,yi,orders=1)
    test_ys = P(test_xs)
    #inter_y = interp0(test_xs, test_ys, orig_x)
    inter_y = interp1(test_xs, test_ys, orig_x)
    
    mae = sqrt(mean_absolute_error(inter_y, data))
    #       mae = np.var(inter_y-data)
    return mae

#def paa(data, period=15): 
示例15
def paa(data, period=15):
    numCoeff = int(len(data)/period)
    data = data[:numCoeff*period]
    data = data[:int(len(data)/numCoeff)*numCoeff]
    origData = deepcopy(data)
    N = len(data)
    segLen = int(N/numCoeff)
    sN = np.reshape(data, (numCoeff, segLen))
    g = lambda data: np.mean(data)
    #       avg = np.mean(sN)
    avg = map(g,sN)
    data = np.matlib.repmat(avg, segLen, 1)
    data = data.ravel(order='F')
#       plt.plot(data)
#       plt.plot(origData)
#       plt.show()
#rmse = sqrt(mean_squared_error(data, origData))
    mae = sqrt(mean_absolute_error(data, origData))
#       mae = np.var(origData-data)
    return mae 
示例16
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
示例17
def macro_mae(y_test, y_pred, classes):
    cat_to_class_mapping = {v: int(k) for k, v in
                            get_labels_to_categories_map(classes).items()}
    _y_test = [cat_to_class_mapping[y] for y in y_test]
    _y_pred = [cat_to_class_mapping[y] for y in y_pred]

    c = Counter(_y_pred)
    print(c)

    classes = set(_y_test)
    micro_m = {}
    for c in classes:
        class_sentences = [(t, p) for t, p in zip(_y_test, _y_pred) if t == c]
        yt = [y[0] for y in class_sentences]
        yp = [y[1] for y in class_sentences]
        micro_m[c] = mean_absolute_error(yt, yp)

    # pprint.pprint(sorted(micro_m.items(), key=lambda x: x[1], reverse=True))

    return numpy.mean(list(micro_m.values())) 
示例18
def test_automl():
    X, y = make_regression(n_samples=N_OBS,
                           n_features=N_FEATURE,
                           n_informative=N_IMP_FEATURE,
                           random_state=RANDOM_SEED)
    X = pd.DataFrame(X, columns=['x{}'.format(i) for i in range(X.shape[1])])
    y = pd.Series(y)
    logging.info(X.shape, y.shape)

    X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, test_size=.2, random_state=RANDOM_SEED)

    model = AutoLGB(objective='regression', metric='l1')
    model.tune(X_trn, y_trn)
    model.fit(X_trn, y_trn)
    p = model.predict(X_tst)
    r = (np.random.rand(X_tst.shape[0]) * (y_trn.max() - y_trn.min()) + y_trn.min())
    logging.info('MAE (LGB): {:.4f}'.format(mae(y_tst, p)))
    assert mae(y_tst, p) < mae(y_tst, r)

    model = AutoXGB(objective='reg:linear', metric='rmse')
    model.tune(X_trn, y_trn)
    model.fit(X_trn, y_trn)
    p = model.predict(X_tst)
    r = (np.random.rand(X_tst.shape[0]) * (y_trn.max() - y_trn.min()) + y_trn.min())
    logging.info('MAE (XGB): {:.4f}'.format(mae(y_tst, p)))
    assert mae(y_tst, p) < mae(y_tst, r) 
示例19
def proxy_a_distance(source_X, target_X):
    """
    Compute the Proxy-A-Distance of a source/target representation
    """
    nb_source = np.shape(source_X)[0]
    nb_target = np.shape(target_X)[0]
    train_X = np.vstack((source_X, target_X))
    train_Y = np.hstack((np.zeros(nb_source, dtype=int), np.ones(nb_target, dtype=int)))
    clf = svm.LinearSVC(random_state=0)
    clf.fit(train_X, train_Y)
    y_pred = clf.predict(train_X)
    error = metrics.mean_absolute_error(train_Y, y_pred)
    dist = 2 * (1 - 2 * error)
    return dist 
示例20
def mae(self, labels, x, x_lens, y, y_lens):
        """ mean absolute error (MAE)
        """
        examples = self.prepare_examples(x, x_lens, y, y_lens)
        y_hat = self.model.predict_proba(examples)
        mae = mean_absolute_error(labels, y_hat)
        return mae 
示例21
def mae_score(y_true, y_pred):
  """Computes MAE."""
  return mean_absolute_error(y_true, y_pred) 
示例22
def mae_score(y_true, y_pred):
  """Computes MAE."""
  return mean_absolute_error(y_true, y_pred) 
示例23
def evaluate(self, data, label):
        ypred = self.model.predict(data, verbose=1)[0]
        if not do_reg:
            acc = accuracy_score(label.argmax(1), ypred.argmax(1))
        mae = mean_absolute_error(label, ypred)
        return mae if do_reg else (acc, mae)
    
    
#%% 
示例24
def test_apply():
    output = pipeline.apply(lambda x: np.mean(x, axis=0)).execute()
    assert output.shape[0] == dataset.X_test.shape[0]

    output = pipeline.apply(lambda x: np.mean(x, axis=0)).validate(scorer=mean_absolute_error, k=10)
    assert len(output) == 10 
示例25
def test_optimizer():
    opt = Optimizer([model, model_2], scorer=mean_absolute_error)
    output = opt.minimize('SLSQP')
    assert output.shape[0] == 2
    assert_almost_equal(output.sum(), 1.0, decimal=5) 
示例26
def test_report_score():
    report_score(np.array([1, 2, 3]), mean_absolute_error)
    report_score(np.array([1, 2, 3]), None) 
示例27
def cross_val_score(estimator, y, exogenous=None, scoring=None, cv=None,
                    verbose=0, error_score=np.nan):
    """Evaluate a score by cross-validation

    Parameters
    ----------
    estimator : estimator
        An estimator object that implements the ``fit`` method

    y : array-like or iterable, shape=(n_samples,)
            The time-series array.

    exogenous : array-like, shape=[n_obs, n_vars], optional (default=None)
        An optional 2-d array of exogenous variables.

    scoring : str or callable, optional (default=None)
        The scoring metric to use. If a callable, must adhere to the signature
        ``metric(true, predicted)``. Valid string scoring metrics include:

        - 'smape'
        - 'mean_absolute_error'
        - 'mean_squared_error'

    cv : BaseTSCrossValidator or None, optional (default=None)
        An instance of cross-validation. If None, will use a RollingForecastCV

    verbose : integer, optional
        The verbosity level.

    error_score : 'raise' or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If a numeric value is given, ModelFitWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.
    """
    cv_results = cross_validate(estimator=estimator, y=y, exogenous=exogenous,
                                scoring=scoring, cv=cv,
                                verbose=verbose,
                                error_score=error_score)
    return cv_results['test_score'] 
示例28
def test_isoup_tree_mean(test_path):
    stream = RegressionGenerator(n_samples=2000, n_features=20,
                                 n_informative=15, random_state=1,
                                 n_targets=3)

    learner = iSOUPTreeRegressor(leaf_prediction='mean')

    cnt = 0
    max_samples = 2000
    wait_samples = 200
    y_pred = np.zeros((int(max_samples / wait_samples), 3))
    y_true = np.zeros((int(max_samples / wait_samples), 3))

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred[int(cnt / wait_samples), :] = learner.predict(X)
            y_true[int(cnt / wait_samples), :] = y
        learner.partial_fit(X, y)
        cnt += 1

    test_file = os.path.join(test_path,
                             'expected_preds_multi_target_regression_mean.npy')
    expected_predictions = np.load(test_file)

    assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 191.2823924547882
    assert np.isclose(error, expected_error)

    expected_info = "iSOUPTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='mean', " \
                    "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \
                    "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \
                    "nominal_attributes=None, random_state=None, remove_poor_atts=False, split_confidence=1e-07, " \
                    "stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert type(learner.predict(X)) == np.ndarray 
示例29
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-")
        in machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics
    ) 
示例30
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(max_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)