Python源码示例:sklearn.metrics.adjusted_rand_score()

示例1
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = self.get_latent()
            if prediction_algorithm == "knn":
                labels_pred = KMeans(
                    self.gene_dataset.n_labels, n_init=200
                ).fit_predict(
                    latent
                )  # n_jobs>1 ?
            elif prediction_algorithm == "gmm":
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
            logger.debug(
                "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
                % (asw_score, nmi_score, ari_score, uca_score)
            )
            return asw_score, nmi_score, ari_score, uca_score 
示例2
def test_spectral_clustering(eigen_solver, assign_labels):
    S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]])

    for mat in (S, sparse.csr_matrix(S)):
        model = SpectralClustering(random_state=0, n_clusters=2,
                                   affinity='precomputed',
                                   eigen_solver=eigen_solver,
                                   assign_labels=assign_labels
                                   ).fit(mat)
        labels = model.labels_
        if labels[0] == 0:
            labels = 1 - labels

        assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1

        model_copy = pickle.loads(pickle.dumps(model))
        assert model_copy.n_clusters == model.n_clusters
        assert model_copy.eigen_solver == model.eigen_solver
        assert_array_equal(model_copy.labels_, model.labels_) 
示例3
def test_discretize(n_samples):
    # Test the discretize using a noise assignment matrix
    random_state = np.random.RandomState(seed=8)
    for n_class in range(2, 10):
        # random class labels
        y_true = random_state.randint(0, n_class + 1, n_samples)
        y_true = np.array(y_true, np.float)
        # noise class assignment matrix
        y_indicator = sparse.coo_matrix((np.ones(n_samples),
                                         (np.arange(n_samples),
                                          y_true)),
                                        shape=(n_samples,
                                               n_class + 1))
        y_true_noisy = (y_indicator.toarray()
                        + 0.1 * random_state.randn(n_samples,
                                                   n_class + 1))
        y_pred = discretize(y_true_noisy, random_state)
        assert adjusted_rand_score(y_true, y_pred) > 0.8 
示例4
def calculate_metrics(y_true, y_pred,duration,clustering=False):
    """
    Return a data frame that contains the precision, accuracy, recall and the duration
    For clustering it applys the adjusted rand index
    """
    if clustering == False:
        res = pd.DataFrame(data = np.zeros((1,5),dtype=np.float), index=[0], 
            columns=['precision','accuracy','error','recall','duration'])
        res['precision'] = precision_score(y_true,y_pred,average='macro')
        res['accuracy'] = accuracy_score(y_true,y_pred)
        res['recall'] = recall_score(y_true,y_pred,average='macro')
        res['duration'] = duration
        res['error'] = 1-res['accuracy']
        return res
    else: 
        res = pd.DataFrame(data = np.zeros((1,2),dtype=np.float), index=[0], 
            columns=['ari','duration'])
        res['duration']=duration
        res['ari'] = adjusted_rand_score(y_pred,y_true)
        return res 
示例5
def compare_segms_metric_ars(dict_segm_a, dict_segm_b, suffix=''):
    """ compute ARS for each pair of segmentation

    :param {str: ndarray} dict_segm_a:
    :param {str: ndarray} dict_segm_b:
    :param str suffix:
    :return DF:
    """
    df_ars = pd.DataFrame()
    for n in dict_segm_a:
        if n not in dict_segm_b:
            logging.warning('particular key "%s" is missing in dictionary', n)
            continue
        y_a = dict_segm_a[n].ravel()
        y_b = dict_segm_b[n].ravel()
        dict_ars = {'image': n,
                    'ARS' + suffix: metrics.adjusted_rand_score(y_a, y_b)}
        df_ars = df_ars.append(dict_ars, ignore_index=True)
    df_ars.set_index(['image'], inplace=True)
    return df_ars 
示例6
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 512))
    for _, (x, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        feat = model(x)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return acc, nmi, ari, kmeans.cluster_centers_, probs 
示例7
def test(model, test_loader, args):
    model.eval()
    acc_record = AverageMeter()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
示例8
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 512))
    for _, (x, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return acc, nmi, ari, kmeans.cluster_centers_, probs 
示例9
def test(model, test_loader, args, epoch=0):
    model.eval()
    acc_record = AverageMeter()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs = np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        output = model(x)
        prob = feat2prob(output, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = output.cpu().detach().numpy()
        probs[idx, :]= prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    return acc, nmi, ari, torch.from_numpy(probs) 
示例10
def test(model, test_loader, args):
    model.eval()
    acc_record = AverageMeter()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
示例11
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 512))
    for _, (x, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        feat = model(x)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return acc, nmi, ari, kmeans.cluster_centers_, probs 
示例12
def test(model, test_loader, args):
    model.eval()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
示例13
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 512))
    for _, (x, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return acc, nmi, ari, kmeans.cluster_centers_, probs 
示例14
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 1024))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return kmeans.cluster_centers_, probs 
示例15
def test(model, eval_loader, args):
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    y_pred = np.zeros(len(eval_loader.dataset)) 
    probs= np.zeros((len(eval_loader.dataset), args.n_clusters))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        #  prob = F.softmax(logit, dim=1)
        idx = idx.data.cpu().numpy()
        y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1)
        targets[idx] = label.data.cpu().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    # evaluate clustering performance
    y_pred = y_pred.astype(np.int64)
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
示例16
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 1024))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return kmeans.cluster_centers_, probs 
示例17
def test(model, eval_loader, args):
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    y_pred = np.zeros(len(eval_loader.dataset)) 
    probs= np.zeros((len(eval_loader.dataset), args.n_clusters))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        idx = idx.data.cpu().numpy()
        y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1)
        targets[idx] = label.data.cpu().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    # evaluate clustering performance
    y_pred = y_pred.astype(np.int64)
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
示例18
def test_DCSBM_fit_unsupervised(self):
        np.random.seed(12345)
        n_verts = 1500

        distances = np.random.beta(4, 1, n_verts)
        B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]])
        n = np.array([500, 500, 500])
        labels = _n_to_labels(n)
        p_mat = _block_to_full(B, labels, (n_verts, n_verts))
        p_mat = p_mat * np.outer(distances, distances)
        p_mat -= np.diag(np.diag(p_mat))
        graph = sample_edges(p_mat, directed=True, loops=False)
        dcsbe = DCSBMEstimator(directed=True, loops=False)
        dcsbe.fit(graph)
        assert adjusted_rand_score(labels, dcsbe.vertex_assignments_) > 0.95
        assert_allclose(p_mat, dcsbe.p_mat_, atol=0.12) 
示例19
def test_discretize(seed=8):
    # Test the discretize using a noise assignment matrix
    random_state = np.random.RandomState(seed)
    for n_samples in [50, 100, 150, 500]:
        for n_class in range(2, 10):
            # random class labels
            y_true = random_state.randint(0, n_class + 1, n_samples)
            y_true = np.array(y_true, np.float)
            # noise class assignment matrix
            y_indicator = sparse.coo_matrix((np.ones(n_samples),
                                             (np.arange(n_samples),
                                              y_true)),
                                            shape=(n_samples,
                                                   n_class + 1))
            y_true_noisy = (y_indicator.toarray()
                            + 0.1 * random_state.randn(n_samples,
                                                       n_class + 1))
            y_pred = discretize(y_true_noisy, random_state)
            assert_greater(adjusted_rand_score(y_true, y_pred), 0.8) 
示例20
def column_average_ari(Zv, Zc, cc_state_object):
    from sklearn.metrics import adjusted_rand_score
    ari = 0
    n_cols = len(Zv)
    for col in xrange(n_cols):
        view_t = Zv[col]
        Zc_true = Zc[view_t]

        view_i = cc_state_object.Zv[col]
        Zc_inferred = cc_state_object.views[view_i].Z.tolist()
        ari += adjusted_rand_score(Zc_true, Zc_inferred)

    return ari/float(n_cols) 
示例21
def bench_k_means(estimator, name, data):
    estimator.fit(data)
    # A short explanation for every score:
    # homogeneity:          each cluster contains only members of a single class (range 0 - 1)
    # completeness:         all members of a given class are assigned to the same cluster (range 0 - 1)
    # v_measure:            harmonic mean of homogeneity and completeness
    # adjusted_rand:        similarity of the actual values and their predictions,
    #                       ignoring permutations and with chance normalization
    #                       (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
    # adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
    #                       (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
    # silhouette:           uses the mean distance between a sample and all other points in the same class,
    #                       as well as the mean distance between a sample and all other points in the nearest cluster
    #                       to calculate a score (range: -1 to 1, with the former being incorrect,
    #                       and the latter standing for highly dense clustering.
    #                       0 indicates overlapping clusters.
    print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
          'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
          % (name, estimator.inertia_,
             metrics.homogeneity_score(y, estimator.labels_),
             metrics.completeness_score(y, estimator.labels_),
             metrics.v_measure_score(y, estimator.labels_),
             metrics.adjusted_rand_score(y, estimator.labels_),
             metrics.adjusted_mutual_info_score(y,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean'))) 
示例22
def evalClusteringOnLabels(clusters, groupLabels, verbose=True):
    """
    Evaluates clustering against labels
    Alternative methodology to label prediction for testing
    """
    results = []
    results.append(metrics.adjusted_mutual_info_score(clusters, groupLabels))
    results.append(metrics.adjusted_rand_score(clusters, groupLabels))
    results.append(metrics.fowlkes_mallows_score(clusters, groupLabels))
    if verbose:
        print(f"MI: {results[0]:.2f}, RAND {results[2]:.2f}, FM: {results[2]:.2f}")
    return dict(zip(['MI', 'RAND', 'FM'], np.array(results))) 
示例23
def benchmarking(gtlabels, labels):
    # TODO: Please note that the AMI definition used in the paper differs from that in the sklearn python package.
    # TODO: Please modify it accordingly.
    numeval = len(gtlabels)
    ari = metrics.adjusted_rand_score(gtlabels[:numeval], labels[:numeval])
    ami = metrics.adjusted_mutual_info_score(gtlabels[:numeval], labels[:numeval])
    nmi = metrics.normalized_mutual_info_score(gtlabels[:numeval], labels[:numeval])
    acc = clustering_accuracy(gtlabels[:numeval], labels[:numeval])

    return ari, ami, nmi, acc 
示例24
def test_spectral_clustering_sparse():
    X, y = make_blobs(n_samples=20, random_state=0,
                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)

    S = rbf_kernel(X, gamma=1)
    S = np.maximum(S - 1e-4, 0)
    S = sparse.coo_matrix(S)

    labels = SpectralClustering(random_state=0, n_clusters=2,
                                affinity='precomputed').fit(S).labels_
    assert adjusted_rand_score(y, labels) == 1 
示例25
def test_spectral_clustering_with_arpack_amg_solvers():
    # Test that spectral_clustering is the same for arpack and amg solver
    # Based on toy example from plot_segmentation_toy.py

    # a small two coin image
    x, y = np.indices((40, 40))

    center1, center2 = (14, 12), (20, 25)
    radius1, radius2 = 8, 7

    circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
    circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2

    circles = circle1 | circle2
    mask = circles.copy()
    img = circles.astype(float)

    graph = img_to_graph(img, mask=mask)
    graph.data = np.exp(-graph.data / graph.data.std())

    labels_arpack = spectral_clustering(
        graph, n_clusters=2, eigen_solver='arpack', random_state=0)

    assert len(np.unique(labels_arpack)) == 2

    if amg_loaded:
        labels_amg = spectral_clustering(
            graph, n_clusters=2, eigen_solver='amg', random_state=0)
        assert adjusted_rand_score(labels_arpack, labels_amg) == 1
    else:
        assert_raises(
            ValueError, spectral_clustering,
            graph, n_clusters=2, eigen_solver='amg', random_state=0) 
示例26
def adjusted_rand_kmeans_scorer(self, min_similarity):
        return self.kmeans_scorer(
            metrics.adjusted_rand_score,
            min_similarity
        ) 
示例27
def adjusted_rand_dbscan_scorer(self, min_similarity):
        return self.dbscan_scorer(
            metrics.adjusted_rand_score,
            min_similarity
        ) 
示例28
def _compute_ari_score(labels, predictions):
  ari_score = math_ops.to_float(
      script_ops.py_func(
          metrics.adjusted_rand_score, [labels, predictions], [dtypes.float64],
          name='ari'))
  # ari score can go below 0
  # http://scikit-learn.org/stable/modules/clustering.html#adjusted-rand-score
  return math_ops.maximum(0.0, ari_score) 
示例29
def test_umap_clusterability_on_supervised_iris(supervised_iris_model, iris):
    embedding = supervised_iris_model.embedding_
    clusters = KMeans(3).fit_predict(embedding)
    assert_greater_equal(adjusted_rand_score(clusters, iris.target), 0.95)


# UMAP Inverse transform on Iris
# ------------------------------ 
示例30
def test_blobs_cluster():
    data, labels = make_blobs(n_samples=500, n_features=10, centers=5)
    embedding = UMAP().fit_transform(data)
    assert_equal(adjusted_rand_score(labels, KMeans(5).fit_predict(embedding)), 1.0)


# Multi-components Layout