|
| 1 | +""" |
| 2 | +.. _tutorials-stochastic-variability: |
| 3 | +
|
| 4 | +========================================================= |
| 5 | +Stochastic Variability in Community Detection Algorithms |
| 6 | +========================================================= |
| 7 | +
|
| 8 | +This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures (NMI, VI, RI) on both random and structured graphs. |
| 9 | +
|
| 10 | +""" |
| 11 | +# %% |
| 12 | +# Import Libraries |
| 13 | +import igraph as ig |
| 14 | +import numpy as np |
| 15 | +import matplotlib.pyplot as plt |
| 16 | +import itertools |
| 17 | + |
| 18 | +# %% |
| 19 | +# First, we generate a graph. |
| 20 | +# Generates a random Erdos-Renyi graph (no clear community structure) |
| 21 | +def generate_random_graph(n, p): |
| 22 | + return ig.Graph.Erdos_Renyi(n=n, p=p) |
| 23 | + |
| 24 | +# %% |
| 25 | +# Generates a clustered graph with clear communities using the Stochastic Block Model (SBM) |
| 26 | +def generate_clustered_graph(n, clusters, intra_p, inter_p): |
| 27 | + block_sizes = [n // clusters] * clusters |
| 28 | + prob_matrix = [[intra_p if i == j else inter_p for j in range(clusters)] for i in range(clusters)] |
| 29 | + return ig.Graph.SBM(sum(block_sizes), prob_matrix, block_sizes) |
| 30 | + |
| 31 | +# %% |
| 32 | +# Computes pairwise similarity (NMI, VI, RI) between partitions |
| 33 | +def compute_pairwise_similarity(partitions, method): |
| 34 | + """Computes pairwise similarity measure between partitions.""" |
| 35 | + scores = [] |
| 36 | + for p1, p2 in itertools.combinations(partitions, 2): |
| 37 | + scores.append(ig.compare_communities(p1, p2, method=method)) |
| 38 | + return scores |
| 39 | + |
| 40 | +# %% |
| 41 | +# Stochastic Community Detection |
| 42 | +# Runs Louvain's method iteratively to generate partitions |
| 43 | +# Computes similarity metrics: |
| 44 | +def run_experiment(graph, iterations=50): |
| 45 | + """Runs the stochastic method multiple times and collects community partitions.""" |
| 46 | + partitions = [graph.community_multilevel().membership for _ in range(iterations)] |
| 47 | + nmi_scores = compute_pairwise_similarity(partitions, method="nmi") |
| 48 | + vi_scores = compute_pairwise_similarity(partitions, method="vi") |
| 49 | + ri_scores = compute_pairwise_similarity(partitions, method="rand") |
| 50 | + return nmi_scores, vi_scores, ri_scores |
| 51 | + |
| 52 | +# %% |
| 53 | +# Parameters |
| 54 | +n_nodes = 100 |
| 55 | +p_random = 0.05 |
| 56 | +clusters = 4 |
| 57 | +p_intra = 0.3 # High intra-cluster connection probability |
| 58 | +p_inter = 0.01 # Low inter-cluster connection probability |
| 59 | + |
| 60 | +# %% |
| 61 | +# Generate graphs |
| 62 | +random_graph = generate_random_graph(n_nodes, p_random) |
| 63 | +clustered_graph = generate_clustered_graph(n_nodes, clusters, p_intra, p_inter) |
| 64 | + |
| 65 | +# %% |
| 66 | +# Run experiments |
| 67 | +nmi_random, vi_random, ri_random = run_experiment(random_graph) |
| 68 | +nmi_clustered, vi_clustered, ri_clustered = run_experiment(clustered_graph) |
| 69 | + |
| 70 | +# %% |
| 71 | +# Lets, plot the histograms |
| 72 | +fig, axes = plt.subplots(3, 2, figsize=(12, 10)) |
| 73 | +measures = [(nmi_random, nmi_clustered, "NMI"), (vi_random, vi_clustered, "VI"), (ri_random, ri_clustered, "RI")] |
| 74 | +colors = ["red", "blue", "green"] |
| 75 | + |
| 76 | +for i, (random_scores, clustered_scores, measure) in enumerate(measures): |
| 77 | + axes[i][0].hist(random_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black") |
| 78 | + axes[i][0].set_title(f"Histogram of {measure} - Random Graph") |
| 79 | + axes[i][0].set_xlabel(f"{measure} Score") |
| 80 | + axes[i][0].set_ylabel("Frequency") |
| 81 | + |
| 82 | + axes[i][1].hist(clustered_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black") |
| 83 | + axes[i][1].set_title(f"Histogram of {measure} - Clustered Graph") |
| 84 | + axes[i][1].set_xlabel(f"{measure} Score") |
| 85 | + |
| 86 | +plt.tight_layout() |
| 87 | +plt.show() |
| 88 | + |
| 89 | +# %% |
| 90 | +# The results are plotted as histograms for random vs. clustered graphs, highlighting differences in detected community structures. |
| 91 | +#The key reason for the inconsistency in random graphs and higher consistency in structured graphs is due to community structure strength: |
| 92 | +#Random Graphs: Lack clear communities, leading to unstable partitions. Stochastic algorithms detect different structures across runs, resulting in low NMI, high VI, and inconsistent RI. |
| 93 | +#Structured Graphs: Have well-defined communities, so detected partitions are more stable across multiple runs, leading to high NMI, low VI, and stable RI. |
| 94 | + |
| 95 | + |
| 96 | +# %% |
0 commit comments