Python源码示例:sklearn.metrics.adjusted_mutual_info_score()

示例1
def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score] 
示例2
def evaluate_groups(true_groups, predicted):
    """ Compute the AMI score and corresponding mean confidence for given gammas.
    :param true_groups: (B, 1, W, H, 1)
    :param predicted: (B, K, W, H, 1)
    :return: scores, confidences (B,)
    """
    scores, confidences = [], []
    assert true_groups.ndim == predicted.ndim == 5, true_groups.shape
    batch_size, K = predicted.shape[:2]
    true_groups = true_groups.reshape(batch_size, -1)
    predicted = predicted.reshape(batch_size, K, -1)
    predicted_groups = predicted.argmax(1)
    predicted_conf = predicted.max(1)
    for i in range(batch_size):
        true_group = true_groups[i]
        idxs = np.where(true_group != 0.0)[0]
        scores.append(adjusted_mutual_info_score(true_group[idxs], predicted_groups[i, idxs]))
        confidences.append(np.mean(predicted_conf[i, idxs]))

    return scores, confidences 
示例3
def evaluate_groups(true_groups, predicted):
    """ Compute the AMI score and corresponding mean confidence for given gammas.
    :param true_groups: (B, 1, W, H, 1)
    :param predicted: (B, K, W, H, 1)
    :return: scores, confidences (B,)
    """
    scores, confidences = [], []
    assert true_groups.ndim == predicted.ndim == 5, true_groups.shape
    batch_size, K = predicted.shape[:2]
    true_groups = true_groups.reshape(batch_size, -1)
    predicted = predicted.reshape(batch_size, K, -1)
    predicted_groups = predicted.argmax(1)
    predicted_conf = predicted.max(1)
    for i in range(batch_size):
        true_group = true_groups[i]
        idxs = np.where(true_group != 0.0)[0]
        scores.append(adjusted_mutual_info_score(true_group[idxs], predicted_groups[i, idxs]))
        confidences.append(np.mean(predicted_conf[i, idxs]))

    return scores, confidences 
示例4
def bench_k_means(estimator, name, data):
    estimator.fit(data)
    # A short explanation for every score:
    # homogeneity:          each cluster contains only members of a single class (range 0 - 1)
    # completeness:         all members of a given class are assigned to the same cluster (range 0 - 1)
    # v_measure:            harmonic mean of homogeneity and completeness
    # adjusted_rand:        similarity of the actual values and their predictions,
    #                       ignoring permutations and with chance normalization
    #                       (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
    # adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
    #                       (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
    # silhouette:           uses the mean distance between a sample and all other points in the same class,
    #                       as well as the mean distance between a sample and all other points in the nearest cluster
    #                       to calculate a score (range: -1 to 1, with the former being incorrect,
    #                       and the latter standing for highly dense clustering.
    #                       0 indicates overlapping clusters.
    print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
          'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
          % (name, estimator.inertia_,
             metrics.homogeneity_score(y, estimator.labels_),
             metrics.completeness_score(y, estimator.labels_),
             metrics.v_measure_score(y, estimator.labels_),
             metrics.adjusted_rand_score(y, estimator.labels_),
             metrics.adjusted_mutual_info_score(y,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean'))) 
示例5
def evalClusteringOnLabels(clusters, groupLabels, verbose=True):
    """
    Evaluates clustering against labels
    Alternative methodology to label prediction for testing
    """
    results = []
    results.append(metrics.adjusted_mutual_info_score(clusters, groupLabels))
    results.append(metrics.adjusted_rand_score(clusters, groupLabels))
    results.append(metrics.fowlkes_mallows_score(clusters, groupLabels))
    if verbose:
        print(f"MI: {results[0]:.2f}, RAND {results[2]:.2f}, FM: {results[2]:.2f}")
    return dict(zip(['MI', 'RAND', 'FM'], np.array(results))) 
示例6
def benchmarking(gtlabels, labels):
    # TODO: Please note that the AMI definition used in the paper differs from that in the sklearn python package.
    # TODO: Please modify it accordingly.
    numeval = len(gtlabels)
    ari = metrics.adjusted_rand_score(gtlabels[:numeval], labels[:numeval])
    ami = metrics.adjusted_mutual_info_score(gtlabels[:numeval], labels[:numeval])
    nmi = metrics.normalized_mutual_info_score(gtlabels[:numeval], labels[:numeval])
    acc = clustering_accuracy(gtlabels[:numeval], labels[:numeval])

    return ari, ami, nmi, acc 
示例7
def mutual_info_kmeans_scorer(self, min_similarity):
        return self.kmeans_scorer(
            metrics.adjusted_mutual_info_score,
            min_similarity
        ) 
示例8
def mutual_info_dbscan_scorer(self, min_similarity):
        return self.dbscan_scorer(
            metrics.adjusted_mutual_info_score,
            min_similarity
        ) 
示例9
def _compute_ami_score(labels, predictions):
  ami_score = math_ops.to_float(
      script_ops.py_func(
          metrics.adjusted_mutual_info_score, [labels, predictions],
          [dtypes.float64],
          name='ami'))
  return math_ops.maximum(0.0, ami_score) 
示例10
def adjusted_mutual_information(x, tx, y, ty, ffactor=3, maxdev=3):
    x, y = discretized_sequences(x, tx, y, ty, ffactor, maxdev)
    try:
        return adjusted_mutual_info_score(x, y)
    except ValueError:
        return adjusted_mutual_info_score(x.squeeze(1), y.squeeze(1)) 
示例11
def predict(self, a, b, **kwargs):
        """Perform the independence test.

        :param a: input data
        :param b: input data
        :type a: array-like, numerical data
        :type b: array-like, numerical data
        :return: dependency statistic (1=Highly dependent, 0=Not dependent)
        :rtype: float
        """
        binning_alg = kwargs.get('bins', 'fd')
        return metrics.adjusted_mutual_info_score(bin_variable(a, bins=binning_alg),
                                                  bin_variable(b, bins=binning_alg)) 
示例12
def predict(self, a, b, **kwargs):
        """Perform the independence test.

        :param a: input data
        :param b: input data
        :type a: array-like, numerical data
        :type b: array-like, numerical data
        :return: dependency statistic (1=Highly dependent, 0=Not dependent)
        :rtype: float
        """
        binning_alg = kwargs.get('bins', 'fd')
        return metrics.adjusted_mutual_info_score(bin_variable(a, bins=binning_alg),
                                                  bin_variable(b, bins=binning_alg)) 
示例13
def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size))) 
示例14
def test_adjusted_mutual_info_score(self):
        result = self.df.metrics.adjusted_mutual_info_score()
        expected = metrics.adjusted_mutual_info_score(self.target, self.pred)
        self.assertEqual(result, expected) 
示例15
def calculate_AMI(self, query_labels, cluster_labels, **kwargs):
        return adjusted_mutual_info_score(query_labels, cluster_labels) 
示例16
def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size))) 
示例17
def evaluate_clustering_performance(clusters, labels):
    set_of_dimensionality = set()
    for cluster in clusters:
        set_of_dimensionality.add(frozenset(cluster.dimensions))

    # Evaluating performance in all dimensionality
    for dim in set_of_dimensionality:
        print("\nEvaluating clusters in dimension: ", list(dim))
        # Finding clusters with same dimensions
        clusters_in_dim = []
        for c in clusters:
            if c.dimensions == dim:
                clusters_in_dim.append(c)
        clustering_labels = np.zeros(np.shape(labels))
        for i, c in enumerate(clusters_in_dim):
            clustering_labels[list(c.data_point_ids)] = i + 1

        print("Number of clusters: ", len(clusters_in_dim))
        print("Adjusted Rand index: ", metrics.adjusted_rand_score(
            labels, clustering_labels))
        print("Mutual Information: ", metrics.adjusted_mutual_info_score(
            labels, clustering_labels))

        print("Homogeneity, completeness, V-measure: ",
              metrics.homogeneity_completeness_v_measure(labels, clustering_labels))

        print("Fowlkes-Mallows: ",
              metrics.fowlkes_mallows_score(labels, clustering_labels)) 
示例18
def compute_adjusted_evaluations(self, labels_families,
                                     predicted_clusters):
        if labels_families is None:
            self.adjusted_rand_score = 0
            self.adjusted_mutual_info_score = 0
            return
        self.adjusted_rand_score = metrics.adjusted_rand_score(
            labels_families, predicted_clusters)
        self.adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(
            labels_families, predicted_clusters, average_method='arithmetic') 
示例19
def to_json(self):
        return {'homogeneity': self.homogeneity,
                'completeness': self.completeness,
                'v_measure': self.v_measure,
                'adjusted_rand_score': self.adjusted_rand_score,
                'adjusted_mutual_info_score': self.adjusted_mutual_info_score} 
示例20
def adjusted_mutual_information(first_partition, second_partition):
    """Adjusted Mutual Information between two clusterings.

    Adjusted Mutual Information (AMI) is an adjustment of the Mutual
    Information (MI) score to account for chance. It accounts for the fact that
    the MI is generally higher for two clusterings with a larger number of
    clusters, regardless of whether there is actually more information shared.
    For two clusterings :math:`U` and :math:`V`, the AMI is given as::

        AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [max(H(U), H(V)) - E(MI(U, V))]

    This metric is independent of the absolute values of the labels:
    a permutation of the class or cluster label values won't change the
    score value in any way.

    This metric is furthermore symmetric: switching ``label_true`` with
    ``label_pred`` will return the same score value. This can be useful to
    measure the agreement of two independent label assignments strategies
    on the same dataset when the real ground truth is not known.

    Be mindful that this function is an order of magnitude slower than other
    metrics, such as the Adjusted Rand Index.

    :param first_partition: NodeClustering object
    :param second_partition: NodeClustering object
    :return: MatchingResult object

    :Example:

    >>> from cdlib import evaluation, algorithms
    >>> g = nx.karate_club_graph()
    >>> louvain_communities = algorithms.louvain(g)
    >>> leiden_communities = algorithms.leiden(g)
    >>> evaluation.adjusted_mutual_information(louvain_communities,leiden_communities)

    :Reference:

    1. Vinh, N. X., Epps, J., & Bailey, J. (2010). `Information theoretic measures for clusterings comparison: Variants, properties, normalization and correction for chance. <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf/>`_ Journal of Machine Learning Research, 11(Oct), 2837-2854.
    """

    __check_partition_coverage(first_partition, second_partition)
    __check_partition_overlap(first_partition, second_partition)

    first_partition_c = [x[1]
                         for x in sorted([(node, nid)
                                          for nid, cluster in enumerate(first_partition.communities)
                                          for node in cluster], key=lambda x: x[0])]

    second_partition_c = [x[1]
                          for x in sorted([(node, nid)
                                           for nid, cluster in enumerate(second_partition.communities)
                                           for node in cluster], key=lambda x: x[0])]

    from sklearn.metrics import adjusted_mutual_info_score
    return MatchingResult(score=adjusted_mutual_info_score(first_partition_c, second_partition_c))