Python源码示例:sklearn.metrics.average_precision_score()

示例1
def plot_precision_recall_curve(y_true, y_score, size=None):
    """plot_precision_recall_curve."""
    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    if size is not None:
        plt.figure(figsize=(size, size))
        plt.axis('equal')
    plt.plot(recall, precision, lw=2, color='navy')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([-0.05, 1.05])
    plt.xlim([-0.05, 1.05])
    plt.grid()
    plt.title('Precision-Recall AUC={0:0.2f}'.format(average_precision_score(
        y_true, y_score))) 
示例2
def test(self, z, pos_edge_index, neg_edge_index):
        r"""Given latent variables :obj:`z`, positive edges
        :obj:`pos_edge_index` and negative edges :obj:`neg_edge_index`,
        computes area under the ROC curve (AUC) and average precision (AP)
        scores.

        Args:
            z (Tensor): The latent space :math:`\mathbf{Z}`.
            pos_edge_index (LongTensor): The positive edges to evaluate
                against.
            neg_edge_index (LongTensor): The negative edges to evaluate
                against.
        """
        pos_y = z.new_ones(pos_edge_index.size(1))
        neg_y = z.new_zeros(neg_edge_index.size(1))
        y = torch.cat([pos_y, neg_y], dim=0)

        pos_pred = self.decoder(z, pos_edge_index, sigmoid=True)
        neg_pred = self.decoder(z, neg_edge_index, sigmoid=True)
        pred = torch.cat([pos_pred, neg_pred], dim=0)

        y, pred = y.detach().cpu().numpy(), pred.detach().cpu().numpy()

        return roc_auc_score(y, pred), average_precision_score(y, pred) 
示例3
def get_all_metrics(model, eval_data, eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    score = model.evaluate(eval_data, eval_labels, verbose=0)
    print("Test accuracy: " + str(score[1]))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr 
示例4
def get_all_metrics_(eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, precision, recall, f1, average_precision, fpr, tpr 
示例5
def test(data, model, epoch, args):
    model.eval()

    n_iters = 0
    ap_sum = 0.0

    progress_bar = tqdm(data)
    for batch_idx, sample_batched in enumerate(progress_bar):
        img, target = load_tensor_data(sample_batched, args.cuda, volatile=True)
        
        output = model(img)
        ap = average_precision_score(target.data, output.data) 
        n_iters += 1
        ap_sum += ap
        if batch_idx % args.log_interval == 0:
            m_ap = ap_sum / n_iters
            progress_bar.set_postfix(dict(AP='{:.2}'.format(m_ap)))

    m_ap = ap_sum / n_iters
    print('Test Epoch {}: Avg. Precision Score = {:.2};'.format(epoch, m_ap)) 
示例6
def calculate_scores(y_predicted, y_true):
    """
    Function to calculate different performance scores
    """
    accuracy = accuracy_score(y_pred=y_predicted, y_true=y_true)
    precision = precision_score(y_pred=y_predicted, y_true=y_true)
    average_precision_score1 = average_precision_score(y_score=y_predicted, y_true=y_true)
    f1_score1 = f1_score(y_pred=y_predicted, y_true=y_true)

    print("Accuracy score:", accuracy)
    print("Precision score:", precision)
    print("Average Precision score:", average_precision_score1)
    print("F1 score:", f1_score1)
    print("Outlier detection and/or treatment completed.")

    return {"accuracy": accuracy,
            "precision": precision,
            "average_precision_score": average_precision_score1,
            "f1_score": f1_score1,
            } 
示例7
def _average_precision_slow(y_true, y_score):
    """A second alternative implementation of average precision that closely
    follows the Wikipedia article's definition (see References). This should
    give identical results as `average_precision_score` for all inputs.

    References
    ----------
    .. [1] `Wikipedia entry for the Average precision
       <https://en.wikipedia.org/wiki/Average_precision>`_
    """
    precision, recall, threshold = precision_recall_curve(y_true, y_score)
    precision = list(reversed(precision))
    recall = list(reversed(recall))
    average_precision = 0
    for i in range(1, len(precision)):
        average_precision += precision[i] * (recall[i] - recall[i - 1])
    return average_precision 
示例8
def test_average_precision_score_pos_label_errors():
    # Raise an error when pos_label is not in binary y_true
    y_true = np.array([0, 1])
    y_pred = np.array([0, 1])
    error_message = ("pos_label=2 is invalid. Set it to a label in y_true.")
    assert_raise_message(ValueError, error_message, average_precision_score,
                         y_true, y_pred, pos_label=2)
    # Raise an error for multilabel-indicator y_true with
    # pos_label other than 1
    y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
    y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]])
    error_message = ("Parameter pos_label is fixed to 1 for multilabel"
                     "-indicator y_true. Do not set pos_label or set "
                     "pos_label to 1.")
    assert_raise_message(ValueError, error_message, average_precision_score,
                         y_true, y_pred, pos_label=0) 
示例9
def test_score_scale_invariance():
    # Test that average_precision_score and roc_auc_score are invariant by
    # the scaling or shifting of probabilities
    # This test was expanded (added scaled_down) in response to github
    # issue #3864 (and others), where overly aggressive rounding was causing
    # problems for users with very small y_score values
    y_true, _, probas_pred = make_prediction(binary=True)

    roc_auc = roc_auc_score(y_true, probas_pred)
    roc_auc_scaled_up = roc_auc_score(y_true, 100 * probas_pred)
    roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * probas_pred)
    roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
    assert_equal(roc_auc, roc_auc_scaled_up)
    assert_equal(roc_auc, roc_auc_scaled_down)
    assert_equal(roc_auc, roc_auc_shifted)

    pr_auc = average_precision_score(y_true, probas_pred)
    pr_auc_scaled_up = average_precision_score(y_true, 100 * probas_pred)
    pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * probas_pred)
    pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
    assert_equal(pr_auc, pr_auc_scaled_up)
    assert_equal(pr_auc, pr_auc_scaled_down)
    assert_equal(pr_auc, pr_auc_shifted) 
示例10
def score_link_prediction(labels, scores):
    """
    Calculates the area under the ROC curve and the average precision score.

    Parameters
    ----------
    labels : array-like, shape [N]
        The ground truth labels
    scores : array-like, shape [N]
        The (unnormalized) scores of how likely are the instances

    Returns
    -------
    roc_auc : float
        Area under the ROC curve score
    ap : float
        Average precision score
    """

    return roc_auc_score(labels, scores), average_precision_score(labels, scores) 
示例11
def average_precision_compute_fn(y_preds, y_targets, mask, activation=None):
    try:
        from sklearn.metrics import average_precision_score
    except ImportError:
        raise RuntimeError("This contrib module requires sklearn to be installed.")

    y_true = y_targets.numpy()
    if activation is not None:
        y_preds = activation(y_preds)
    y_pred = y_preds.numpy()

    if mask is not None:
        y_true = y_true[:, mask]
        y_pred = y_pred[:, mask]

    return average_precision_score(y_true, y_pred) 
示例12
def get_ap(self, q_name, sorted_idx):
        rel   = self.__relevants[q_name]
        junk  = self.__junk[q_name]

        # construct ground-truth and scores:
        y_scores = np.zeros(self.N_images)
        y_true   = np.zeros(self.N_images)
        for e,i in enumerate(sorted_idx): y_scores[i] = self.N_images - e
        for i in rel: y_true[i] = 1

        # remove junk:
        y_scores = np.delete(y_scores, junk)
        y_true   = np.delete(y_true, junk)

        # compute ap:
        return average_precision_score(y_true, y_scores) 
示例13
def plot_PR_curve(classifier):
    
    precision, recall, thresholds = precision_recall_curve(DataPrep.test_news['Label'], classifier)
    average_precision = average_precision_score(DataPrep.test_news['Label'], classifier)
    
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2,
                     color='b')
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Random Forest Precision-Recall curve: AP={0:0.2f}'.format(
              average_precision)) 
示例14
def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
    # Print additional metrics involving predictions
    n_rows = (y_dset.shape[0] / batch_size) * batch_size
    y_true = y_dset[0:n_rows, :].flatten()
    y_pred = y_pred.flatten()

    val_ap = average_precision_score(y_true, y_pred)
    val_roc = roc_auc_score(y_true, y_pred)

    n = y_true.size
    n_pos = y_true.sum()
    idx_sorted = np.argsort(-y_pred)
    val_rec = []

    logging.info(dset + "-AP {:.6f}".format(val_ap))
    logging.info(dset + "-ROC {:.6f}".format(val_roc))
    for i, v in enumerate([10, 25, 50, 75, 100]):
        tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
        val_rec.append(tp * 1.0 / n_pos)
        logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
    return val_ap, val_rec[2]


# ############################## Main program ################################# 
示例15
def evaluate_embedding_link_prediction(adj_matrix, node_pairs, embedding_matrix, norm=False):
    """Evaluate the node embeddings on the link prediction task.

    :param adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes]
        Adjacency matrix of the graph
    :param node_pairs:
    :param embedding_matrix: np.ndarray, shape [n_nodes, embedding_dim]
        Embedding matrix
    :param norm: bool
        Whether to normalize the embeddings
    :return: float, float
        Average precision (AP) score and area under ROC curve (AUC) score
    """
    if norm:
        embedding_matrix = normalize(embedding_matrix)

    true = adj_matrix[node_pairs[:, 0], node_pairs[:, 1]].A1
    scores = (embedding_matrix[node_pairs[:, 0]] * embedding_matrix[node_pairs[:, 1]]).sum(1)

    auc_score, ap_score = roc_auc_score(true, scores), average_precision_score(true, scores)

    return auc_score, ap_score 
示例16
def __call__(self, args, env):

        import numpy as np
        import matplotlib.pyplot as plt
        from sklearn.metrics import average_precision_score
        from sklearn.metrics import precision_recall_curve
        from vergeml.plots import load_labels, load_predictions

        try:
            labels = load_labels(env)
        except FileNotFoundError:
            raise VergeMLError("Can't plot PR curve - not supported by model.")

        nclasses = len(labels)
        if args['class'] not in labels:
            raise VergeMLError("Unknown class: " + args['class'])

        try:
            y_test, y_score = load_predictions(env, nclasses)
        except FileNotFoundError:
            raise VergeMLError("Can't plot PR curve - not supported by model.")

        # From:
        # https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html#sphx-glr-auto-examples-model-selection-plot-precision-recall-py

        ix = labels.index(args['class'])
        y_test = y_test[:,ix].astype(np.int)
        y_score = y_score[:,ix]

        precision, recall, _ = precision_recall_curve(y_test, y_score)
        average_precision = average_precision_score(y_test, y_score)

        plt.step(recall, precision, color='b', alpha=0.2, where='post')
        plt.fill_between(recall, precision, alpha=0.2, color='b', step='post')

        plt.xlabel('Recall ({})'.format(args['class']))
        plt.ylabel('Precision ({})'.format(args['class']))
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.title('Precision-Recall curve for @{0}: AP={1:0.2f}'.format(args['@AI'], average_precision))
        plt.show() 
示例17
def estimate_predictive_performance(x_y,
                                    estimator=None,
                                    n_splits=10,
                                    random_state=1):
    """estimate_predictive_performance."""
    x, y = x_y
    cv = ShuffleSplit(n_splits=n_splits,
                      test_size=0.3,
                      random_state=random_state)
    scoring = make_scorer(average_precision_score)
    scores = cross_val_score(estimator, x, y, cv=cv, scoring=scoring)
    return scores 
示例18
def perf(y_true, y_pred, y_score):
    """perf."""
    print('Accuracy: %.2f' % accuracy_score(y_true, y_pred))
    print(' AUC ROC: %.2f' % roc_auc_score(y_true, y_score))
    print('  AUC AP: %.2f' % average_precision_score(y_true, y_score))
    print()
    print('Classification Report:')
    print(classification_report(y_true, y_pred))
    print()
    plot_confusion_matrices(y_true, y_pred, size=int(len(set(y_true)) * 2.5))
    print()
    plot_aucs(y_true, y_score, size=10) 
示例19
def estimate_model(positive_data_matrix=None,
                   negative_data_matrix=None,
                   target=None,
                   estimator=None,
                   n_jobs=4):
    """estimate_model."""
    X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
                            negative_data_matrix=negative_data_matrix,
                            target=target)
    logger.info('Test set')
    logger.info(describe(X))
    logger.info('-' * 80)
    logger.info('Test Estimate')
    predictions = estimator.predict(X)
    margins = estimator.decision_function(X)
    logger.info(classification_report(y, predictions))
    apr = average_precision_score(y, margins)
    logger.info('APR: %.3f' % apr)
    roc = roc_auc_score(y, margins)
    logger.info('ROC: %.3f' % roc)

    logger.info('Cross-validated estimate')
    scoring_strings = ['accuracy', 'precision', 'recall', 'f1',
                       'average_precision', 'roc_auc']
    for scoring in scoring_strings:
        scores = cross_validation.cross_val_score(
            estimator, X, y, cv=5,
            scoring=scoring, n_jobs=n_jobs)
        logger.info('%20s: %.3f +- %.3f' % (scoring,
                                            np.mean(scores),
                                            np.std(scores)))

    return roc, apr 
示例20
def report_evaluation_metrics(y_true, y_pred):
    average_precision = average_precision_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, labels=[0, 1], pos_label=1)
    recall = recall_score(y_true, y_pred, labels=[0, 1], pos_label=1)
    f1 = f1_score(y_true, y_pred, labels=[0, 1], pos_label=1)

    print('Average precision-recall score: {0:0.2f}'.format(average_precision))
    print('Precision: {0:0.2f}'.format(precision))
    print('Recall: {0:0.2f}'.format(recall))
    print('F1: {0:0.2f}'.format(f1)) 
示例21
def calculate_roc_pr(model, sequence, mask=-1, return_pred=False):
    y_true = sequence.y
    y_pred = model.predict_generator(sequence, use_multiprocessing=True, workers=6)

    if y_true.ndim == 1:
        val_roc = roc_auc_score(y_true, y_pred)
        val_pr = average_precision_score(y_true, y_pred)

    elif y_true.ndim == 2:
        y_true = y_true.transpose()
        y_pred = y_pred.transpose()

        unmask_idx = [np.where(y != mask)[0] for y in y_true]
        val_roc = [roc_auc_score(yt[idx], yp[idx]) for (yt, yp, idx) in zip(y_true, y_pred, unmask_idx)]
        val_pr = [average_precision_score(yt[idx], yp[idx]) for (yt, yp, idx) in zip(y_true, y_pred, unmask_idx)]

        val_roc = np.array(val_roc).mean()
        val_pr = np.array(val_pr).mean()
        y_pred = y_pred.transpose()

    else:
        raise ValueError("Unsupported output shape for auc calculation")

    if return_pred:
        return val_roc, val_pr, y_pred
    else:
        return val_roc, val_pr 
示例22
def auprc(labels, scores):
    ap = average_precision_score(labels, scores)
    return ap 
示例23
def map_sklearn(y_true, y_pred):
    # """ Returns mAP """
    n_classes = y_true.shape[1]
    map = [average_precision_score(y_true[:, i], y_pred[:, i]) for i in range(n_classes)]
    map = np.nan_to_num(map)
    map = np.mean(map)
    return map 
示例24
def map_sklearn(y_true, y_pred):
    # """ Returns mAP """
    n_classes = y_true.shape[1]
    map = [average_precision_score(y_true[:, i], y_pred[:, i]) for i in range(n_classes)]
    map = np.nan_to_num(map)
    map = np.mean(map)
    return map 
示例25
def average_precision_score(conditions, prediction_scores, average='micro',
                            sample_weight=None):
    # average == [micro, macro, sampled, weidhted]
    return metrics.average_precision_score(conditions, prediction_scores,
                                           average=average,
                                           sample_weight=sample_weight)

# if __name__ == '__main__':
#     parser = argparse.ArgumentParser(
#         description='This script trains and tests a model.')
#     parser.add_argument('gold_standard', help='file containing gold standars')
#     parser.add_argument(PREDICTIONS, help='file containing predictions')
#     parser.add_argument('output_fp', help='output file')
#     args = parser.parse_args()
#
#     hdf5_data = h5py.File(args.gold_standard, 'r')
#     split = hdf5_data[SPLIT].value
#     column = hdf5_data['macros'].value
#     hdf5_data.close()
#     conditions = column[split == 2]  # ground truth
#
#     predictions = np.load(args.predictions)
#
#     confusion_matrix = ConfusionMatrix(predictions, conditions)
#
#     results = load_json(args.output_fp)
#     results['confusion_matrix_stats'] = {
#         'confusion_matrix': confusion_matrix.cm.tolist(),
#         'overall_stats': confusion_matrix.stats(),
#         'per_class_stats': confusion_matrix.per_class_stats()
#     }
#     save_json(args.output_fp, results) 
示例26
def compute_pr(y_test, probability_predictions):
    """
    Compute Precision-Recall, thresholds and PR AUC.

    Args:
        y_test (list) : true label values corresponding to the predictions. Also length n.
        probability_predictions (list) : predictions coming from an ML algorithm of length n.

    Returns:
        dict: 

    """
    _validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)

    # Calculate PR
    precisions, recalls, pr_thresholds = skmetrics.precision_recall_curve(y_test, probability_predictions)
    pr_auc = skmetrics.average_precision_score(y_test, probability_predictions)

    # get ideal cutoffs for suggestions (upper right or 1,1)
    pr_distances = (precisions - 1) ** 2 + (recalls - 1) ** 2

    # To prevent the case where there are two points with the same minimum distance, return only the first
    # np.where returns a tuple (we want the first element in the first array)
    pr_index = np.where(pr_distances == np.min(pr_distances))[0][0]
    best_precision = precisions[pr_index]
    best_recall = recalls[pr_index]
    ideal_pr_cutoff = pr_thresholds[pr_index]

    return {'pr_auc': pr_auc,
            'best_pr_cutoff': ideal_pr_cutoff,
            'best_precision': best_precision,
            'best_recall': best_recall,
            'precisions': precisions,
            'recalls': recalls,
            'pr_thresholds': pr_thresholds} 
示例27
def auprc(self, data, model, tt, name):
        scores = self.get_predictions_loss(data, model, tt)[0]
        labels = [prot["label"][:, 2] for prot in data[tt]]
        close_count = 0
        auprcs = []
        for preds, lbls in zip(scores, labels):
            if np.allclose(preds[:, 0], np.zeros_like(preds[:, 0]) + np.mean(preds[:, 0])):
                close_count += 1
            auprcs.append(average_precision_score(lbls, preds))
        if close_count > 0:
            printt("For {} proteins, all predicted scores are close to each other, auprc may be based on improper sorting".format(close_count))
        med_auprc = np.median(auprcs)
        printt("{} median auprc: {:0.3f}".format(name, med_auprc))
        return ["auprc_med_" + tt], [med_auprc] 
示例28
def get_stats_detection(logit, label, n_classes=52):
  '''
  Calculate the accuracy and average precisions.
  '''
  logit = to_numpy(logit)
  label = to_numpy(label)
  scores = softmax(logit, axis=1)

  pred = np.argmax(logit, 1)
  length = label.shape[0]
  acc = np.sum(pred == label)/length

  keep_bg = label == 0
  acc_bg = np.sum(pred[keep_bg] == label[keep_bg])/label[keep_bg].shape[0]
  ratio_bg = np.sum(keep_bg)/length

  keep_action = label != 0
  acc_action = np.sum(
      pred[keep_action] == label[keep_action]) / label[keep_action].shape[0]

  # Average precision
  y_true = np.zeros((len(label), n_classes))
  y_true[np.arange(len(label)), label] = 1
  acc = np.sum(pred == label)/label.shape[0]
  aps = average_precision_score(y_true, scores, average=None)
  aps = list(filter(lambda x: not np.isnan(x), aps))
  ap = np.mean(aps)

  return ap, acc, acc_bg, acc_action, ratio_bg, pred, label 
示例29
def test(opt, model, dataloader):
  '''Test model.'''
  # Logging
  logger = logging.Logger(opt.load_ckpt_path, opt.split)
  stats = logging.Statistics(opt.ckpt_path, opt.split)
  logger.log(opt)

  logits, labels = [], []
  model.load(opt.load_ckpt_paths, opt.load_epoch)
  for step, data in enumerate(dataloader, 1):
    inputs, label = data
    info_acc, logit = model.test(inputs, label)
    logits.append(utils.to_numpy(logit.squeeze(0)))
    labels.append(utils.to_numpy(label))
    update = stats.update(label.size(0), info_acc)
    if utils.is_due(step, opt.print_every):
      utils.info('step {}/{}: {}'.format(step, len(dataloader), update))

  logits = np.concatenate(logits, axis=0)
  length, n_classes = logits.shape
  labels = np.concatenate(labels)
  scores = utils.softmax(logits, axis=1)

  # Accuracy
  preds = np.argmax(scores, axis=1)
  acc = np.sum(preds == labels) / length
  # Average precision
  y_true = np.zeros((length, n_classes))
  y_true[np.arange(length), labels] = 1
  aps = average_precision_score(y_true, scores, average=None)
  aps = list(filter(lambda x: not np.isnan(x), aps))
  mAP = np.mean(aps)

  logger.log('[Summary]: {}'.format(stats.summarize()))
  logger.log('Acc: {}, mAP: {}'.format(acc, mAP)) 
示例30
def mean_ap_metric(predicts, targets):

    predict = predicts[:, ~np.all(targets == 0, axis=0)]
    target = targets[:, ~np.all(targets == 0, axis=0)]

    mean_auc = 0
    aps = [0]
    try:
        mean_auc = metrics.roc_auc_score(target, predict)
    except ValueError:
        print(
            'The roc_auc curve requires a sufficient number of classes \
            which are missing in this sample.'
        )
    try:
        aps = metrics.average_precision_score(target, predict, average=None)
    except ValueError:
        print(
            'Average precision requires a sufficient number of samples \
            in a batch which are missing in this sample.'
        )

    mean_ap = np.mean(aps)
    weights = np.sum(target.astype(float), axis=0)
    weights /= np.sum(weights)
    mean_wap = np.sum(np.multiply(aps, weights))
    all_aps = np.zeros((1, targets.shape[1]))
    all_aps[:, ~np.all(targets == 0, axis=0)] = aps

    return mean_auc, mean_ap, mean_wap, all_aps.flatten()