Skip to content

Commit 1c5e1ad

Browse files
committed
Added stochastic_variability.py file
This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures (NMI, VI, RI) on both random and structured graphs.
1 parent 8322002 commit 1c5e1ad

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""
2+
.. _tutorials-stochastic-variability:
3+
4+
=========================================================
5+
Stochastic Variability in Community Detection Algorithms
6+
=========================================================
7+
8+
This example demonstrates the variability of stochastic community detection methods by analyzing the consistency of multiple partitions using similarity measures (NMI, VI, RI) on both random and structured graphs.
9+
10+
"""
11+
# %%
12+
# Import Libraries
13+
import igraph as ig
14+
import numpy as np
15+
import matplotlib.pyplot as plt
16+
import itertools
17+
18+
# %%
19+
# First, we generate a graph.
20+
# Generates a random Erdos-Renyi graph (no clear community structure)
21+
def generate_random_graph(n, p):
22+
return ig.Graph.Erdos_Renyi(n=n, p=p)
23+
24+
# %%
25+
# Generates a clustered graph with clear communities using the Stochastic Block Model (SBM)
26+
def generate_clustered_graph(n, clusters, intra_p, inter_p):
27+
block_sizes = [n // clusters] * clusters
28+
prob_matrix = [[intra_p if i == j else inter_p for j in range(clusters)] for i in range(clusters)]
29+
return ig.Graph.SBM(sum(block_sizes), prob_matrix, block_sizes)
30+
31+
# %%
32+
# Computes pairwise similarity (NMI, VI, RI) between partitions
33+
def compute_pairwise_similarity(partitions, method):
34+
"""Computes pairwise similarity measure between partitions."""
35+
scores = []
36+
for p1, p2 in itertools.combinations(partitions, 2):
37+
scores.append(ig.compare_communities(p1, p2, method=method))
38+
return scores
39+
40+
# %%
41+
# Stochastic Community Detection
42+
# Runs Louvain's method iteratively to generate partitions
43+
# Computes similarity metrics:
44+
def run_experiment(graph, iterations=50):
45+
"""Runs the stochastic method multiple times and collects community partitions."""
46+
partitions = [graph.community_multilevel().membership for _ in range(iterations)]
47+
nmi_scores = compute_pairwise_similarity(partitions, method="nmi")
48+
vi_scores = compute_pairwise_similarity(partitions, method="vi")
49+
ri_scores = compute_pairwise_similarity(partitions, method="rand")
50+
return nmi_scores, vi_scores, ri_scores
51+
52+
# %%
53+
# Parameters
54+
n_nodes = 100
55+
p_random = 0.05
56+
clusters = 4
57+
p_intra = 0.3 # High intra-cluster connection probability
58+
p_inter = 0.01 # Low inter-cluster connection probability
59+
60+
# %%
61+
# Generate graphs
62+
random_graph = generate_random_graph(n_nodes, p_random)
63+
clustered_graph = generate_clustered_graph(n_nodes, clusters, p_intra, p_inter)
64+
65+
# %%
66+
# Run experiments
67+
nmi_random, vi_random, ri_random = run_experiment(random_graph)
68+
nmi_clustered, vi_clustered, ri_clustered = run_experiment(clustered_graph)
69+
70+
# %%
71+
# Lets, plot the histograms
72+
fig, axes = plt.subplots(3, 2, figsize=(12, 10))
73+
measures = [(nmi_random, nmi_clustered, "NMI"), (vi_random, vi_clustered, "VI"), (ri_random, ri_clustered, "RI")]
74+
colors = ["red", "blue", "green"]
75+
76+
for i, (random_scores, clustered_scores, measure) in enumerate(measures):
77+
axes[i][0].hist(random_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black")
78+
axes[i][0].set_title(f"Histogram of {measure} - Random Graph")
79+
axes[i][0].set_xlabel(f"{measure} Score")
80+
axes[i][0].set_ylabel("Frequency")
81+
82+
axes[i][1].hist(clustered_scores, bins=20, alpha=0.7, color=colors[i], edgecolor="black")
83+
axes[i][1].set_title(f"Histogram of {measure} - Clustered Graph")
84+
axes[i][1].set_xlabel(f"{measure} Score")
85+
86+
plt.tight_layout()
87+
plt.show()
88+
89+
# %%
90+
# The results are plotted as histograms for random vs. clustered graphs, highlighting differences in detected community structures.
91+
#The key reason for the inconsistency in random graphs and higher consistency in structured graphs is due to community structure strength:
92+
#Random Graphs: Lack clear communities, leading to unstable partitions. Stochastic algorithms detect different structures across runs, resulting in low NMI, high VI, and inconsistent RI.
93+
#Structured Graphs: Have well-defined communities, so detected partitions are more stable across multiple runs, leading to high NMI, low VI, and stable RI.
94+
95+
96+
# %%

0 commit comments

Comments
 (0)