Python源码示例:sklearn.metrics.r2_score()

示例1
def fit_model(self, data, cross_val_data, cross_val_labels):
        eval_metrics = []
        for i in range(self.n_ensemble):
            train_sm = np.concatenate(cross_val_data[:i] +
                                      cross_val_data[(i + 1):])
            test_sm = cross_val_data[i]
            train_labels = np.concatenate(cross_val_labels[:i] +
                                          cross_val_labels[(i + 1):])
            test_labels = cross_val_labels[i]
            fp_train = get_fp(train_sm)
            fp_test = get_fp(test_sm)
            self.model[i].fit(fp_train, train_labels.ravel())
            predicted = self.model[i].predict(fp_test)
            if self.model_type == 'classifier':
                fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
                eval_metrics.append(metrics.auc(fpr, tpr))
                metrics_type = 'AUC'
            elif self.model_type == 'regressor':
                r2 = metrics.r2_score(test_labels, predicted)
                eval_metrics.append(r2)
                metrics_type = 'R^2 score'
        return eval_metrics, metrics_type 
示例2
def crossValidation(X, y, cvFolds, estimator):
    r2 = np.zeros((cvFolds,1))   
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0    
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        est.fit(train_X,train_y)
        y_true, y_pred = test_y,est.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1
    return r2
    
#parameters: 'X' the predictors, 'y' the target, 'cvFolds' number of folds, 'estimator' machine learning algorithm 
#returns: the R squared for each fold 
示例3
def r2(self, log=False, pseudocount=1, clip=None):
    """ Compute target R2 vector. """
    r2_vec = np.zeros(self.num_targets)

    for ti in range(self.num_targets):
      if self.targets_na is not None:
        preds_ti = self.preds[~self.targets_na, ti].astype('float64')
        targets_ti = self.targets[~self.targets_na, ti].astype('float64')
      else:
        preds_ti = self.preds[:, :, ti].flatten().astype('float64')
        targets_ti = self.targets[:, :, ti].flatten().astype('float64')

      if clip is not None:
        preds_ti = np.clip(preds_ti, 0, clip)
        targets_ti = np.clip(targets_ti, 0, clip)

      if log:
        preds_ti = np.log2(preds_ti + pseudocount)
        targets_ti = np.log2(targets_ti + pseudocount)

      r2_vec[ti] = metrics.r2_score(targets_ti, preds_ti)

    return r2_vec 
示例4
def score_regression(y, y_hat, report=True):
    """
    Create regression score
    :param y:
    :param y_hat:
    :return:
    """
    r2 = r2_score(y, y_hat)
    rmse = sqrt(mean_squared_error(y, y_hat))
    mae = mean_absolute_error(y, y_hat)

    report_string = "---Regression Score--- \n"
    report_string += "R2 = " + str(r2) + "\n"
    report_string += "RMSE = " + str(rmse) + "\n"
    report_string += "MAE = " + str(mae) + "\n"

    if report:
        print(report_string)

    return mae, report_string 
示例5
def r2_score_vec(y_true,y_pred):
    """ returns non-aggregate version of r2 score.

    based on r2_score() function from sklearn (http://sklearn.org)
    """

    numerator = (y_true - y_pred) ** 2
    denominator = (y_true - np.average(y_true)) ** 2

    nonzero_denominator = denominator != 0
    nonzero_numerator = numerator != 0
    valid_score = nonzero_denominator & nonzero_numerator
    output_scores = np.ones([y_true.shape[0]])
    output_scores[valid_score] = 1 - (numerator[valid_score] /
                                      denominator[valid_score])
    # arbitrary set to zero to avoid -inf scores, having a constant
    # y_true is not interesting for scoring a regression anyway
    output_scores[nonzero_numerator & ~nonzero_denominator] = 0.

    return output_scores 
示例6
def test_few_fit_shapes():
    """test_few.py: fit and predict return correct shapes """
    np.random.seed(202)
    # load example data
    boston = load_boston()
    d = pd.DataFrame(data=boston.data)
    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=0.2, crossover_rate=0.8,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'epsilon_lexicase', tourn_size = 2,
                random_state=0, verbosity=0,
                disable_update_check=False, fit_choice = 'mse')

    score = learner.fit(boston.data[:300], boston.target[:300])
    print("learner:",learner._best_estimator)
    yhat_test = learner.predict(boston.data[300:])
    test_score = learner.score(boston.data[300:],boston.target[300:])
    print("train score:",score,"test score:",test_score,
    "test r2:",r2_score(boston.target[300:],yhat_test))
    assert yhat_test.shape == boston.target[300:].shape 
示例7
def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score] 
示例8
def test_boston_dataset(max_bins):
    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, random_state=42)

    mapper = _BinMapper(max_bins=max_bins, random_state=42)
    X_train_binned = mapper.fit_transform(X_train)

    # Init gradients and hessians to that of least squares loss
    gradients = -y_train.astype(G_H_DTYPE)
    hessians = np.ones(1, dtype=G_H_DTYPE)

    min_samples_leaf = 8
    max_leaf_nodes = 31
    grower = TreeGrower(X_train_binned, gradients, hessians,
                        min_samples_leaf=min_samples_leaf,
                        max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
                        actual_n_bins=mapper.actual_n_bins_)
    grower.grow()

    predictor = grower.make_predictor(bin_thresholds=mapper.bin_thresholds_)

    assert r2_score(y_train, predictor.predict(X_train)) > 0.85
    assert r2_score(y_test, predictor.predict(X_test)) > 0.70 
示例9
def test_multioutput_regression():
    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
    y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])

    error = mean_squared_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = mean_squared_log_error(y_true, y_pred)
    assert_almost_equal(error, 0.200, decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    error = mean_absolute_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = r2_score(y_true, y_pred, multioutput='variance_weighted')
    assert_almost_equal(error, 1. - 5. / 2)
    error = r2_score(y_true, y_pred, multioutput='uniform_average')
    assert_almost_equal(error, -.875) 
示例10
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) 
示例11
def compute_perf_metrics(self, per_task=False):
        """Returns the R-squared metrics for each task or averaged over tasks based on the accumulated values
        
        Args:
            per_task (bool): True if calculating per-task metrics, False otherwise.
        
        Returns:
            A tuple (r2_score, std): 
                r2_score (np.array): An array of scores for each task, if per_task is True.
                Otherwise, it is a float containing the average R^2 score over tasks.

                std: Always None for this class.
        """
        r2_scores = self.perf_metrics[0]
        if per_task or self.num_tasks == 1:
            return (r2_scores, None)
        else:
            return (r2_scores.mean(), None)



# **************************************************************************************** 
示例12
def score(self, X, y):
        """Returns the coefficient of determination R^2 of the fitted linear
        regression model, computed on the given features matrix and labels.

        Parameters
        ----------
        X : `np.ndarray` or `scipy.sparse.csr_matrix`, shape=(n_samples, n_features)
            Features matrix.

        y : `np.ndarray`, shape = (n_samples,)
            Labels vector.

        Returns
        -------
        score : `float`
            R^2 of self.predict(X) against y
        """
        from sklearn.metrics import r2_score
        return r2_score(y, self.predict(X)) 
示例13
def neural_regression(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7):
    reg = neural_network(X_train.shape[1])
    reg.fit(X_train, Y_train,
                        nb_epoch=nb_epoch,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_val, Y_val),
                        callbacks=[
                                    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01),
                                    EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'),
                        ]
                        )
    pred = reg.predict(X_test)
    pred = np.reshape(pred, pred.shape[0])
    r2 = r2_score(Y_test, pred)

    return r2 
示例14
def nestedCrossValidation(X, y, cvFolds, estimator):  
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0
    param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]}
    r2 = np.zeros((cvFolds,1))   
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error')
        grid.fit(train_X,train_y)
        y_true, y_pred = test_y,grid.best_estimator_.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1 
    return r2
    
#%% main script 
示例15
def __init__(
            self,
            wrapped: BaseStep = None,
            test_size: float = 0.2,
            scoring_function=r2_score,
            run_validation_split_in_test_mode=True,
            cache_folder_when_no_handle=None
    ):
        """
        :param wrapped: wrapped step
        :param test_size: ratio for test size between 0 and 1
        :param scoring_function: scoring function with two arguments (y_true, y_pred)
        """
        BaseCrossValidationWrapper.__init__(self, wrapped=wrapped, cache_folder_when_no_handle=cache_folder_when_no_handle)

        self.run_validation_split_in_test_mode = run_validation_split_in_test_mode
        self.test_size = test_size
        self.scoring_function = scoring_function 
示例16
def __init__(
            self,
            scoring_function=r2_score,
            k_fold=3,
            joiner=NumpyConcatenateOuterBatch(),
            cache_folder_when_no_handle=None,
            split_data_container_during_fit=True,
            predict_after_fit=True
    ):
        self.k_fold = k_fold
        BaseCrossValidationWrapper.__init__(
            self,
            scoring_function=scoring_function,
            joiner=joiner,
            cache_folder_when_no_handle=cache_folder_when_no_handle,
            split_data_container_during_fit=split_data_container_during_fit,
            predict_after_fit=predict_after_fit
        ) 
示例17
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
示例18
def test_ensemble_model():
    X = np.vstack((np.arange(30, 10, -2, dtype='float64'),
                   np.arange(100, 90, -1, dtype='float64'))).T

    Y = np.arange(10, dtype='float64')

    rf = regressors.randomforest(random_state=42)
    nn = regressors.neuralnetwork(solver='lbfgs', random_state=42)
    ensemble = ensemble_model((rf, nn))

    # we do not need to fit underlying models, they change when we fit enseble
    ensemble.fit(X, Y)

    pred = ensemble.predict(X)
    mean_pred = np.vstack((rf.predict(X), nn.predict(X))).mean(axis=0)
    assert_array_almost_equal(pred, mean_pred)
    assert_almost_equal(ensemble.score(X, Y), r2_score(Y, pred))

    # ensemble of a single model should behave exactly like this model
    nn = neuralnetwork(solver='lbfgs', random_state=42)
    ensemble = ensemble_model((nn,))
    ensemble.fit(X, Y)
    assert_array_almost_equal(ensemble.predict(X), nn.predict(X))
    assert_almost_equal(ensemble.score(X, Y), nn.score(X, Y)) 
示例19
def print_evaluation_metrics(trained_model, trained_model_name, X_test, y_test):
    print('--------- For Model: ', trained_model_name, ' ---------\n')
    predicted_values = trained_model.predict(X_test)
    print("Mean absolute error: ",
          metrics.mean_absolute_error(y_test, predicted_values))
    print("Median absolute error: ",
          metrics.median_absolute_error(y_test, predicted_values))
    print("Mean squared error: ", metrics.mean_squared_error(
        y_test, predicted_values))
    print("R2: ", metrics.r2_score(y_test, predicted_values))
    plt.scatter(y_test, predicted_values, color='black')
    # plt.plot(x, y_pred, color='blue', linewidth=3)
    plt.title(trained_model_name)
    plt.xlabel('$y_{test}$')
    plt.ylabel('$y_{predicted}/y_{test}$')
    plt.savefig('%s.png' %trained_model_name, bbox_inches='tight')
    print("---------------------------------------\n") 
示例20
def plot(results, ax):
    df = results[["size", "time"]]

    df.plot.scatter(x='size', y='time', c='LightBlue', ax=ax, marker=".")

    # linear regression
    x = df["size"].values.reshape((-1, 1))
    y = df["time"].values
    linear = LinearRegression().fit(x, y)
    y_pred = linear.predict(x)

    mqe = mean_squared_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    ax.plot(x, y_pred, color='DarkBlue', linewidth=2)

    ax.set_title(
        "Linear regression between size and time "
        f"\n$mse={mqe:.3f}$ - $R^2={r2:.3f}$")
    ax.set_xlabel("Size")
    ax.set_ylabel("Seconds")

    return ax


# =============================================================================
# CLI MAIN
# ============================================================================= 
示例21
def fit_model(self, data):
        eval_metrics = []
        if self.feature_type == 'fingerprints':
            fps = get_fp(data.smiles)
        elif self.feature_type == 'descriptors':
            fps, _, _ = get_desc(data.smiles, self.calc)
        if self.model_type == 'classifier':
            cross_val_data, cross_val_labels = \
                cross_validation_split(fps, data.binary_labels)
        elif self.model_type == 'regressor':
            cross_val_data, cross_val_labels = \
                cross_validation_split(fps, data.property)
        for i in range(self.n_ensemble):
            train_sm = np.concatenate(cross_val_data[:i] + cross_val_data[(i + 1):])
            test_sm = cross_val_data[i]
            train_labels = np.concatenate(cross_val_labels[:i] +
                                          cross_val_labels[(i + 1):])
            test_labels = cross_val_labels[i]
            if self.feature_type == 'descriptors':
                train_sm, desc_mean = normalize_desc(train_sm)
                self.desc_mean[i] = desc_mean
                test_sm, _ = normalize_desc(test_sm, desc_mean)
            self.model[i].fit(train_sm, train_labels.ravel())
            predicted = self.model[i].predict(test_sm)
            if self.model_type == 'classifier':
                fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
                eval_metrics.append(metrics.auc(fpr, tpr))
                metrics_type = 'AUC'
            elif self.model_type == 'regressor':
                r2 = metrics.r2_score(test_labels, predicted)
                eval_metrics.append(r2)
                metrics_type = 'R^2 score'

        return eval_metrics, metrics_type 
示例22
def test_boston_dataset(max_bins):
    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, random_state=42)

    mapper = BinMapper(max_bins=max_bins, random_state=42)
    X_train_binned = mapper.fit_transform(X_train)
    X_test_binned = mapper.transform(X_test)

    # Init gradients and hessians to that of least squares loss
    gradients = -y_train.astype(np.float32)
    hessians = np.ones(1, dtype=np.float32)

    min_samples_leaf = 8
    max_leaf_nodes = 31
    grower = TreeGrower(X_train_binned, gradients, hessians,
                        min_samples_leaf=min_samples_leaf,
                        max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
                        n_bins_per_feature=mapper.n_bins_per_feature_)
    grower.grow()

    predictor = grower.make_predictor(
        numerical_thresholds=mapper.numerical_thresholds_)

    assert r2_score(y_train, predictor.predict_binned(X_train_binned)) > 0.85
    assert r2_score(y_test, predictor.predict_binned(X_test_binned)) > 0.70

    assert_allclose(predictor.predict(X_train),
                    predictor.predict_binned(X_train_binned))

    assert_allclose(predictor.predict(X_test),
                    predictor.predict_binned(X_test_binned))

    assert r2_score(y_train, predictor.predict(X_train)) > 0.85
    assert r2_score(y_test, predictor.predict(X_test)) > 0.70 
示例23
def oob_regression_r2_score(rf, X_train, y_train):
    """
    Compute out-of-bag (OOB) R^2 for a scikit-learn random forest
    regressor. We learned the guts of scikit's RF from the BSD licensed
    code:

    https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/ensemble/forest.py#L702
    """
    X = X_train.values if isinstance(X_train, pd.DataFrame) else X_train
    y = y_train.values if isinstance(y_train, pd.Series) else y_train

    n_samples = len(X)
    predictions = np.zeros(n_samples)
    n_predictions = np.zeros(n_samples)
    for tree in rf.estimators_:
        unsampled_indices = _generate_unsampled_indices(tree.random_state, n_samples)
        tree_preds = tree.predict(X[unsampled_indices, :])
        predictions[unsampled_indices] += tree_preds
        n_predictions[unsampled_indices] += 1

    if (n_predictions == 0).any():
        warnings.warn("Too few trees; some variables do not have OOB scores.")
        n_predictions[n_predictions == 0] = 1

    predictions /= n_predictions

    oob_score = r2_score(y, predictions)
    return oob_score 
示例24
def oob_regression_r2_score(rf, X_train, y_train):
    """
    Compute out-of-bag (OOB) R^2 for a scikit-learn random forest
    regressor. We learned the guts of scikit's RF from the BSD licensed
    code:

    https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/ensemble/forest.py#L702
    """
    X = X_train.values if isinstance(X_train, pd.DataFrame) else X_train
    y = y_train.values if isinstance(y_train, pd.Series) else y_train

    n_samples = len(X)
    predictions = np.zeros(n_samples)
    n_predictions = np.zeros(n_samples)
    for tree in rf.estimators_:
        unsampled_indices = _generate_unsampled_indices(tree.random_state, n_samples)
        tree_preds = tree.predict(X[unsampled_indices, :])
        predictions[unsampled_indices] += tree_preds
        n_predictions[unsampled_indices] += 1

    if (n_predictions == 0).any():
        warnings.warn("Too few trees; some variables do not have OOB scores.")
        n_predictions[n_predictions == 0] = 1

    predictions /= n_predictions

    oob_score = r2_score(y, predictions)
    return oob_score 
示例25
def main():
    """Run the demo."""
    # Training batches
    Xr, Yr, Xs, Ys = get_data()
    N = Yr.shape[0]

    my_feature_columns = []
    for key in Xr.keys():
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

    # # Build 2 hidden layer DNN with 10, 10 units respectively.
    estimator = tf.estimator.Estimator(
        model_fn=my_model,
        model_dir="./sarcos/",
        params={"N": N,
                "feature_columns": my_feature_columns})

    input_fn = train_input_fn(Xr, Yr)
    eval_fn = test_input_fn(Xs, Ys)
    predict_fn = predict_input_fn(Xs)

    steps = EPOCHS_PER_EVAL * (N // BATCH_SIZE)

    for i in range(NEPOCHS // EPOCHS_PER_EVAL):
        # Train the Model.
        estimator.train(input_fn=input_fn, steps=steps)
        # Evaluate the model.
        eval_result = estimator.evaluate(input_fn=eval_fn)

        # Use the predict interface to do alternative R2 calculation
        d = estimator.predict(input_fn=predict_fn,
                              yield_single_examples=False)
        Yt = []
        print("\n\nEval Result: {}".format(eval_result))
        for di in d:
            Yt.append(di["predictions"])
        Yt = np.concatenate(Yt, axis=0)
        print("R2: {}\n\n".format(r2_score(Ys, Yt))) 
示例26
def get_diversity(self,X):
        """compute mean diversity of individual outputs"""
        # diversity in terms of cosine distances between features
        feature_correlations = np.zeros(X.shape[0]-1)
        for i in np.arange(1,X.shape[0]-1):
            feature_correlations[i] = max(0.0,r2_score(X[0],X[i]))
        # pdb.set_trace()
        self.diversity.append(1-np.mean(feature_correlations)) 
示例27
def deterministic_crowding(self,parents,offspring,X_parents,X_offspring):
        """deterministic crowding implementation (for non-steady state).
        offspring compete against the parent they are most similar to, here defined as
        the parent they are most correlated with.
        the offspring only replace their parent if they are more fit.
        """
        # get children locations produced from crossover
        cross_children = [i for i,o in enumerate(offspring) if len(o.parentid) > 1]
        # order offspring so that they are lined up with their most similar parent
        for c1,c2 in zip(cross_children[::2], cross_children[1::2]):
            # get parent locations
            p_loc = [j for j,p in enumerate(parents) if p.id in offspring[c1].parentid]
            if len(p_loc) != 2:
                continue
            # if child is more correlated with its non-root parent
            if r2_score(X_parents[p_loc[0]],X_offspring[c1]) + r2_score(X_parents[p_loc[1]],X_offspring[c2]) < r2_score(X_parents[p_loc[0]],X_offspring[c2]) + r2_score(X_parents[p_loc[1]],X_offspring[c1]):
                # swap offspring
                offspring[c1],offspring[c2] = offspring[c2],offspring[c1]

        survivors = []
        survivor_index = []

        for i,(p,o) in enumerate(zip(parents,offspring)):
            if p.fitness >= o.fitness:
                survivors.append(copy.deepcopy(p))
                survivor_index.append(i)
            else:
                survivors.append(copy.deepcopy(o))
                survivor_index.append(i+len(parents))

        # return survivors along with their indices
        return survivors, survivor_index 
示例28
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-")
        in machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics
    ) 
示例29
def metrics_from_list(metric_list: Optional[List[str]] = None) -> List[Callable]:
        """
        Given a list of metric function paths. ie. sklearn.metrics.r2_score or
        simple function names which are expected to be in the ``sklearn.metrics`` module,
        this will return a list of those loaded functions.

        Parameters
        ----------
        metrics: Optional[List[str]]
            List of function paths to use as metrics for the model Defaults to
            those specified in :class:`gordo.workflow.config_components.NormalizedConfig`
            sklearn.metrics.explained_variance_score,
            sklearn.metrics.r2_score,
            sklearn.metrics.mean_squared_error,
            sklearn.metrics.mean_absolute_error

        Returns
        -------
        List[Callable]
            A list of the functions loaded

        Raises
        ------
        AttributeError:
           If the function cannot be loaded.
        """
        defaults = NormalizedConfig.DEFAULT_CONFIG_GLOBALS["evaluation"]["metrics"]
        funcs = list()
        for func_path in metric_list or defaults:
            func = pydoc.locate(func_path)
            if func is None:
                # Final attempt, load function from sklearn.metrics module.
                funcs.append(getattr(metrics, func_path))
            else:
                funcs.append(func)
        return funcs 
示例30
def main(transpose_inputs=False):
    train, test = get_train_test(random_state=12)
    if transpose_inputs:
        train = train.T
        test  = test.T

    predicted = predict(train)
    r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
    print('R2 score (binary {} neighbours): {:.1%}'.format(
        ('movie' if transpose_inputs else 'user'),
        r2))