Skip to content

Commit 893863e

Browse files
committed
Remove Dense NxN Where Sparse Works
1 parent fe5e3f5 commit 893863e

File tree

2 files changed

+109
-39
lines changed

2 files changed

+109
-39
lines changed

hypergraphx/dynamics/randwalk.py

Lines changed: 91 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,41 @@ def transition_matrix(HG: Hypergraph) -> sparse.spmatrix:
2222
----------
2323
[1] Timoteo Carletti, Federico Battiston, Giulia Cencetti, and Duccio Fanelli, Random walks on hypergraphs, Phys. Rev. E 96, 012308 (2017)
2424
"""
25-
N = HG.num_nodes()
26-
# assert if HG i connected
27-
assert HG.is_connected(), "The hypergraph is not connected"
28-
hedge_list = HG.get_edges()
29-
T = np.zeros((N, N))
30-
for l in hedge_list:
31-
for i in range(len(l)):
32-
for j in range(i + 1, len(l)):
33-
T[l[i], l[j]] += len(l) - 1
34-
T[l[j], l[i]] += len(l) - 1
35-
row_sums = T.sum(axis=1, keepdims=True)
36-
T = T / row_sums
37-
38-
T = sparse.csr_matrix(T)
25+
if not HG.is_connected():
26+
raise ValueError("The hypergraph is not connected")
27+
28+
# Build a sparse transition matrix using the binary incidence matrix B.
29+
# For each hyperedge e of size s, contribute (s-1) to every off-diagonal pair in e.
30+
# This corresponds to: M = B * diag(s_e - 1) * B^T, then zero the diagonal, then row-normalize.
31+
B, idx_to_node = HG.binary_incidence_matrix(return_mapping=True)
32+
_ = idx_to_node # mapping is relevant for callers; matrix is in index space.
33+
34+
edges = HG.get_edges()
35+
if len(edges) == 0:
36+
raise ValueError("Cannot compute a random walk on an empty hypergraph.")
37+
38+
w = np.asarray([len(e) - 1 for e in edges], dtype=float)
39+
if np.any(w < 0):
40+
raise ValueError("Invalid hyperedge size encountered.")
41+
42+
M = B @ sparse.diags(w, offsets=0, format="csr") @ B.T
43+
M = M.tocsr()
44+
M.setdiag(0)
45+
M.eliminate_zeros()
46+
47+
row_sums = np.asarray(M.sum(axis=1)).ravel()
48+
if np.any(row_sums == 0):
49+
# This can happen with isolated nodes (or empty edges), which contradicts connectivity anyway.
50+
raise ValueError(
51+
"Random-walk transition undefined: a node has no outgoing probability mass."
52+
)
53+
54+
inv_row = sparse.diags(1.0 / row_sums, offsets=0, format="csr")
55+
T = (inv_row @ M).tocsr()
3956
return T
4057

4158

42-
def random_walk(HG: Hypergraph, s: int, time: int) -> list:
59+
def random_walk(HG: Hypergraph, s, time: int) -> list:
4360
"""Compute the random walk on the hypergraph.
4461
4562
Parameters
@@ -56,15 +73,33 @@ def random_walk(HG: Hypergraph, s: int, time: int) -> list:
5673
nodes : list
5774
The list of nodes visited by the random walk.
5875
"""
59-
K = np.array(transition_matrix(HG).todense())
60-
nodes = [s]
61-
for t in range(time):
62-
next_node = np.random.choice(K.shape[0], p=K[nodes[-1], :])
63-
nodes.append(next_node)
64-
return nodes
65-
66-
67-
def RW_stationary_state(HG: Hypergraph) -> np.ndarray:
76+
if time < 0:
77+
raise ValueError("time must be non-negative.")
78+
79+
T, mapping = HG.binary_incidence_matrix(return_mapping=True)
80+
node_to_idx = {node: idx for idx, node in mapping.items()}
81+
if s not in node_to_idx:
82+
raise ValueError("Starting node is not in the hypergraph.")
83+
start_idx = node_to_idx[s]
84+
85+
P = transition_matrix(HG).tocsr()
86+
path_idx = [start_idx]
87+
for _ in range(time):
88+
cur = path_idx[-1]
89+
row = P.getrow(cur)
90+
if row.nnz == 0:
91+
# This should not happen for connected hypergraphs, but keep a safe fallback.
92+
path_idx.append(cur)
93+
continue
94+
next_idx = int(np.random.choice(row.indices, p=row.data))
95+
path_idx.append(next_idx)
96+
97+
return [mapping[i] for i in path_idx]
98+
99+
100+
def RW_stationary_state(
101+
HG: Hypergraph, *, tol: float = 1e-12, max_iter: int = 10000
102+
) -> np.ndarray:
68103
"""Compute the stationary state of the random walk on the hypergraph.
69104
70105
Parameters
@@ -77,10 +112,29 @@ def RW_stationary_state(HG: Hypergraph) -> np.ndarray:
77112
stationary_state : np.ndarray
78113
The stationary state of the random walk on the hypergraph.
79114
"""
80-
K = np.array(transition_matrix(HG).todense())
81-
stationary_state = np.linalg.solve(np.eye(K.shape[0]) - K.T, np.ones(K.shape[0]))
82-
stationary_state = stationary_state / np.sum(stationary_state)
83-
return stationary_state
115+
if tol <= 0:
116+
raise ValueError("tol must be positive.")
117+
if max_iter <= 0:
118+
raise ValueError("max_iter must be positive.")
119+
120+
P = transition_matrix(HG).tocsr()
121+
n = P.shape[0]
122+
pi = np.full(n, 1.0 / n, dtype=float)
123+
124+
# Power iteration on row-stochastic P: pi_{k+1} = pi_k P.
125+
for _ in range(max_iter):
126+
pi_next = pi @ P
127+
# L1 distance is natural for distributions.
128+
if np.linalg.norm(pi_next - pi, ord=1) < tol:
129+
pi = pi_next
130+
break
131+
pi = pi_next
132+
133+
total = pi.sum()
134+
if total == 0 or not np.isfinite(total):
135+
raise RuntimeError("Failed to compute a valid stationary distribution.")
136+
pi = pi / total
137+
return np.asarray(pi).ravel()
84138

85139

86140
def random_walk_density(HG: Hypergraph, s: np.ndarray, time: int) -> list:
@@ -98,12 +152,15 @@ def random_walk_density(HG: Hypergraph, s: np.ndarray, time: int) -> list:
98152
nodes : list
99153
The list of density vectors over time.
100154
"""
101-
assert np.isclose(np.sum(s), 1), "The vector is not a probability density"
155+
if not np.isclose(np.sum(s), 1):
156+
raise ValueError("The vector is not a probability density")
157+
if time < 0:
158+
raise ValueError("time must be non-negative.")
102159

103-
K = np.array(transition_matrix(HG).todense())
104-
starting_density = s
160+
P = transition_matrix(HG).tocsr()
161+
starting_density = np.asarray(s, dtype=float)
105162
density_list = [starting_density]
106-
for t in range(time):
107-
s = s @ K
108-
density_list.append(s)
163+
for _ in range(time):
164+
starting_density = starting_density @ P
165+
density_list.append(np.asarray(starting_density).ravel())
109166
return density_list

tests/dynamics/test_randwalk.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@ def test_random_walk_length():
3232

3333

3434
def test_stationary_state_properties():
35-
"""Test stationary state raises on singular system for small graphs."""
35+
"""Test stationary state is a valid distribution."""
3636
hg = _make_connected_hypergraph()
37-
with pytest.raises(np.linalg.LinAlgError):
38-
RW_stationary_state(hg)
37+
pi = RW_stationary_state(hg)
38+
assert np.all(pi >= 0)
39+
assert np.isclose(pi.sum(), 1.0)
3940

4041

4142
def test_random_walk_density_normalization():
@@ -48,7 +49,19 @@ def test_random_walk_density_normalization():
4849

4950

5051
def test_random_walk_density_invalid():
51-
"""Test invalid density vector raises assertion."""
52+
"""Test invalid density vector raises."""
5253
hg = _make_connected_hypergraph()
53-
with pytest.raises(AssertionError, match="probability"):
54+
with pytest.raises(ValueError, match="probability"):
5455
random_walk_density(hg, np.array([0.2, 0.2, 0.2]), time=1)
56+
57+
58+
def test_transition_matrix_large_sparse_chain():
59+
"""Smoke test: large N should remain sparse and fast enough."""
60+
N = 10_000
61+
edges = [(i, i + 1) for i in range(N - 1)]
62+
hg = Hypergraph(edge_list=edges)
63+
T = transition_matrix(hg).tocsr()
64+
assert T.shape == (N, N)
65+
assert T.nnz == 2 * (N - 1)
66+
rowsum = np.asarray(T.sum(axis=1)).ravel()
67+
assert np.allclose(rowsum, 1.0)

0 commit comments

Comments
 (0)