Skip to content

Commit 35331cb

Browse files
committed
Refine configuration model API and docs
1 parent 5830e37 commit 35331cb

File tree

4 files changed

+219
-26
lines changed

4 files changed

+219
-26
lines changed

docs/quickstart.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,30 @@ Projections and matrices
112112
A = adjacency_matrix(hg)
113113
G = clique_projection(hg)
114114
115+
Configuration model
116+
-------------------
117+
118+
Use ``configuration_model`` to randomize a hypergraph with a
119+
configuration-model-style MCMC sampler.
120+
121+
.. code-block:: python
122+
123+
from hypergraphx.generation import configuration_model
124+
125+
hg_rand = configuration_model(hg, n_steps=500, label="edge", seed=0)
126+
127+
The ``duplicate_output`` parameter controls how repeated sampled hyperedges are
128+
handled:
129+
130+
- ``"merge"`` (default): collapse duplicates into a simple hypergraph
131+
- ``"count"``: return a weighted hypergraph whose edge weights equal sampled multiplicities
132+
- ``"error"``: raise an error if repeated sampled hyperedges occur
133+
134+
.. code-block:: python
135+
136+
hg_rand = configuration_model(hg, n_steps=500, duplicate_output="merge", seed=0)
137+
hg_counts = configuration_model(hg, n_steps=500, duplicate_output="count", seed=0)
138+
115139
Measures
116140
--------
117141

hypergraphx/generation/configuration_model.py

Lines changed: 143 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,79 @@
88
from hypergraphx.generation._rng import np_rng, split_seed
99

1010

11+
def _build_hypergraph_from_sampled_edges(
12+
edges,
13+
*,
14+
duplicate_output="merge",
15+
weighted_output=False,
16+
):
17+
edge_counts = Counter(tuple(sorted(edge)) for edge in edges)
18+
generated_edges = list(edge_counts.keys())
19+
20+
if duplicate_output == "merge":
21+
new_h = Hypergraph(weighted=weighted_output)
22+
if weighted_output:
23+
new_h.add_edges(generated_edges, weights=[1] * len(generated_edges))
24+
else:
25+
new_h.add_edges(generated_edges)
26+
return new_h
27+
28+
if duplicate_output == "count":
29+
new_h = Hypergraph(weighted=True)
30+
new_h.add_edges(
31+
generated_edges, weights=[edge_counts[edge] for edge in generated_edges]
32+
)
33+
return new_h
34+
35+
if duplicate_output == "error":
36+
repeated_edges = [edge for edge, count in edge_counts.items() if count > 1]
37+
if repeated_edges:
38+
raise InvalidParameterError(
39+
"Repeated sampled hyperedges are not allowed when "
40+
"duplicate_output='error'."
41+
)
42+
new_h = Hypergraph(weighted=weighted_output)
43+
if weighted_output:
44+
new_h.add_edges(generated_edges, weights=[1] * len(generated_edges))
45+
else:
46+
new_h.add_edges(generated_edges)
47+
return new_h
48+
49+
raise InvalidParameterError(
50+
"duplicate_output must be one of: 'merge', 'count', 'error'."
51+
)
52+
53+
1154
def _cm_MCMC(
1255
hypergraph,
1356
n_steps=1000,
1457
label="edge",
1558
n_clash=1,
16-
detailed=True,
59+
restrict_to_same_size=True,
60+
duplicate_output="merge",
1761
*,
1862
seed: int | None = None,
1963
):
2064
"""
2165
Conduct Markov Chain Monte Carlo in order to approximately
2266
sample from the space of appropriately-labeled graphs.
2367
n_steps: number of steps to perform
24-
label: the label space to use. Can take values in ['vertex' , 'stub', 'edge'].
68+
label: the label space to use. Can take values in ['vertex', 'edge'].
2569
n_clash: the number of clashes permitted when updating the edge counts in vertex-labeled MH.
2670
n_clash = 0 will be exact but very slow.
2771
n_clash >= 2 may lead to performance gains at the cost of decreased accuracy.
28-
detailed: if True, preserve the number of edges of given dimension incident to each node
72+
restrict_to_same_size: if True, only rewire pairs of hyperedges with the same size
73+
duplicate_output: controls how repeated sampled hyperedges are handled
2974
"""
3075
rng = np_rng(seed)
3176

3277
def proposal_generator(m):
33-
# Propose a transition in stub- and edge-labeled MH.
78+
# Propose a transition in edge-labeled MH.
3479

3580
def __proposal(edge_list):
3681
i, j = rng.integers(0, m, 2)
3782
f1, f2 = edge_list[i], edge_list[j]
38-
if detailed:
83+
if restrict_to_same_size:
3984
while len(f1) != len(f2):
4085
i, j = rng.integers(0, m, 2)
4186
f1, f2 = edge_list[i], edge_list[j]
@@ -73,7 +118,7 @@ def __pairwise_reshuffle(f1, f2):
73118
logger.debug("%s %s %s %s", f1, f2, g1, g2)
74119
return tuple(sorted(g1)), tuple(sorted(g2))
75120

76-
def stub_edge_mh(message=True):
121+
def edge_mh(message=True):
77122
mh_rounds = 0
78123
mh_steps = 0
79124
c_new = [list(c) for c in hypergraph.get_edges()]
@@ -92,10 +137,11 @@ def mh_step():
92137
mh_step()
93138
n += 1
94139

95-
new_h = Hypergraph()
96-
# check this behavior
97-
generated_edges = list(set([tuple(sorted(f)) for f in c_new]))
98-
new_h.add_edges(generated_edges)
140+
new_h = _build_hypergraph_from_sampled_edges(
141+
c_new,
142+
duplicate_output=duplicate_output,
143+
weighted_output=hypergraph.is_weighted(),
144+
)
99145
mh_steps += n
100146
mh_rounds += 1
101147

@@ -149,7 +195,7 @@ def vertex_labeled_mh(message=True):
149195
i, j = (ij[k_], ij[k_ + 1])
150196
k_ += 2
151197
f1, f2 = l[i], l[j]
152-
if detailed:
198+
if restrict_to_same_size:
153199
while len(f1) != len(f2):
154200
i, j = (ij[k_], ij[k_ + 1])
155201
k_ += 2
@@ -190,18 +236,20 @@ def vertex_labeled_mh(message=True):
190236
n_rejected,
191237
)
192238

193-
new_h = Hypergraph()
194-
new_h.add_edges([tuple(sorted(f)) for f in list(c.elements())])
239+
new_h = _build_hypergraph_from_sampled_edges(
240+
list(c.elements()),
241+
duplicate_output=duplicate_output,
242+
weighted_output=hypergraph.is_weighted(),
243+
)
195244
mh_steps += k - n_rejected
196245
mh_rounds += 1
197246
return new_h
198247

199-
if (label == "edge") or (label == "stub"):
200-
return stub_edge_mh()
248+
if label == "edge":
249+
return edge_mh()
201250
elif label == "vertex":
202251
return vertex_labeled_mh()
203-
else:
204-
logging.getLogger(__name__).warning("Not implemented")
252+
raise InvalidParameterError("label must be one of: 'edge', 'vertex'.")
205253

206254

207255
def configuration_model(
@@ -211,34 +259,99 @@ def configuration_model(
211259
order=None,
212260
size=None,
213261
n_clash=1,
214-
detailed=True,
262+
restrict_to_same_size=True,
263+
duplicate_output="merge",
215264
seed: int | None = None,
216265
):
217266
"""
218267
Sample a randomized hypergraph using a configuration-model-style MCMC.
219268
220-
Parameters are largely legacy; the key UX improvements are:
221-
- `seed=` controls the RNG (reproducible, does not rely on global `np.random.seed`)
222-
- when `order`/`size` is provided, only that size class is rewired
269+
The sampler supports two labeling conventions:
270+
271+
- ``label="edge"``: rewires pairs of hyperedges directly.
272+
- ``label="vertex"``: samples in the space of vertex-labeled hypergraphs
273+
using a collision-controlled update scheme.
274+
275+
By default, all hyperedges are eligible for rewiring. If ``order`` or
276+
``size`` is specified, only hyperedges of the selected size are rewired and
277+
all other hyperedges are copied unchanged into the output hypergraph.
278+
279+
Parameters
280+
----------
281+
hypergraph : Hypergraph
282+
Input hypergraph to randomize.
283+
n_steps : int, default=1000
284+
Number of MCMC update steps.
285+
label : {"edge", "vertex"}, default="edge"
286+
Labeling convention used by the sampler.
287+
order : int, optional
288+
Hyperedge order to rewire. If provided, only hyperedges of size
289+
``order + 1`` are randomized.
290+
size : int, optional
291+
Hyperedge size to rewire. Mutually exclusive with ``order``.
292+
n_clash : int, default=1
293+
Collision threshold used in the vertex-labeled sampler. Only relevant
294+
when ``label="vertex"``.
295+
restrict_to_same_size : bool, default=True
296+
If ``True``, proposals are restricted to pairs of hyperedges with the
297+
same size, so rewiring is performed within size classes.
298+
duplicate_output : {"merge", "count", "error"}, default="merge"
299+
Controls how repeated sampled hyperedges are handled in the returned
300+
object. ``"merge"`` collapses duplicates into a simple hypergraph,
301+
``"count"`` encodes multiplicities as edge weights, and ``"error"``
302+
raises if repeated sampled hyperedges occur.
303+
seed : int, optional
304+
Seed for the random number generator.
305+
306+
Returns
307+
-------
308+
Hypergraph
309+
Randomized hypergraph.
310+
311+
Raises
312+
------
313+
InvalidParameterError
314+
If both ``order`` and ``size`` are specified.
315+
InvalidParameterError
316+
If ``label`` is not one of ``"edge"`` or ``"vertex"``.
317+
InvalidParameterError
318+
If ``duplicate_output="count"`` is used with a weighted input
319+
hypergraph.
320+
321+
Notes
322+
-----
323+
In the size-restricted mode, the function first extracts the corresponding
324+
subhypergraph, randomizes it, and then reinserts the untouched hyperedges.
325+
The default ``duplicate_output="merge"`` returns a simple hypergraph and
326+
collapses repeated sampled hyperedges.
223327
224328
Examples
225329
--------
226330
>>> from hypergraphx import Hypergraph
227331
>>> from hypergraphx.generation import configuration_model
228332
>>> H = Hypergraph(edge_list=[(0, 1), (1, 2), (0, 1, 2)], weighted=False)
229333
>>> H2 = configuration_model(H, n_steps=10, label="edge", seed=0)
230-
>>> H.num_edges() == H2.num_edges()
334+
>>> isinstance(H2, Hypergraph)
335+
True
336+
337+
>>> H3 = configuration_model(H, n_steps=10, seed=0, duplicate_output="count")
338+
>>> H3.is_weighted()
231339
True
232340
"""
233341
if order is not None and size is not None:
234342
raise InvalidParameterError("Only one of order and size can be specified.")
343+
if duplicate_output == "count" and hypergraph.is_weighted():
344+
raise InvalidParameterError(
345+
"duplicate_output='count' is only supported for unweighted hypergraphs."
346+
)
235347
if order is None and size is None:
236348
return _cm_MCMC(
237349
hypergraph,
238350
n_steps=n_steps,
239351
label=label,
240352
n_clash=n_clash,
241-
detailed=detailed,
353+
restrict_to_same_size=restrict_to_same_size,
354+
duplicate_output=duplicate_output,
242355
seed=seed,
243356
)
244357

@@ -256,10 +369,15 @@ def configuration_model(
256369
n_steps=n_steps,
257370
label=label,
258371
n_clash=n_clash,
259-
detailed=detailed,
372+
restrict_to_same_size=restrict_to_same_size,
373+
duplicate_output=duplicate_output,
260374
seed=sub_seed,
261375
)
262376
for e in hypergraph.get_edges():
263377
if len(e) != size:
264-
shuffled.add_edge(e)
378+
if shuffled.is_weighted():
379+
weight = hypergraph.get_weight(e) if hypergraph.is_weighted() else 1
380+
shuffled.add_edge(e, weight=weight)
381+
else:
382+
shuffled.add_edge(e)
265383
return shuffled

hypergraphx/motifs/motifs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def _motifs_order_4(edges):
9191
for i in range(ROUNDS):
9292
logger.info("Computing config model motifs of order %s. Step: %s", order, i + 1)
9393
sub_seed = int(rng.integers(0, 2**32 - 1, dtype=np.uint32))
94-
e1 = configuration_model(hypergraph, label="stub", n_steps=STEPS, seed=sub_seed)
94+
e1 = configuration_model(hypergraph, label="edge", n_steps=STEPS, seed=sub_seed)
9595
if order == 3:
9696
m1 = _motifs_order_3(e1.get_edges())
9797
elif order == 4:

tests/generation/test_configuration_model.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pytest
33

44
from hypergraphx import Hypergraph
5+
from hypergraphx.exceptions import InvalidParameterError
56
from hypergraphx.generation.configuration_model import configuration_model
67

78

@@ -35,3 +36,53 @@ def test_configuration_model_invalid_args():
3536
hg = _make_hypergraph()
3637
with pytest.raises(ValueError, match="Only one"):
3738
configuration_model(hg, order=1, size=2)
39+
40+
41+
def test_configuration_model_rejects_stub_label():
42+
"""Test configuration model rejects the removed stub label."""
43+
hg = _make_hypergraph()
44+
with pytest.raises(InvalidParameterError, match="label must be one of"):
45+
configuration_model(hg, label="stub")
46+
47+
48+
def test_configuration_model_duplicate_output_merge_default():
49+
"""Test repeated sampled hyperedges are merged by default."""
50+
hg = Hypergraph(edge_list=[(0, 1), (2, 3), (0, 2), (1, 3)], weighted=False)
51+
52+
sampled = configuration_model(hg, n_steps=50, label="edge", seed=6)
53+
54+
assert not sampled.is_weighted()
55+
assert sampled.num_edges() == 2
56+
assert set(sampled.get_edges()) == {(0, 2), (1, 3)}
57+
58+
59+
def test_configuration_model_duplicate_output_count():
60+
"""Test repeated sampled hyperedges can be encoded as weights."""
61+
hg = Hypergraph(edge_list=[(0, 1), (2, 3), (0, 2), (1, 3)], weighted=False)
62+
63+
sampled = configuration_model(
64+
hg, n_steps=50, label="edge", seed=6, duplicate_output="count"
65+
)
66+
67+
assert sampled.is_weighted()
68+
assert sampled.num_edges() == 2
69+
assert sampled.get_weight((0, 2)) == 2
70+
assert sampled.get_weight((1, 3)) == 2
71+
72+
73+
def test_configuration_model_duplicate_output_error():
74+
"""Test repeated sampled hyperedges can be rejected."""
75+
hg = Hypergraph(edge_list=[(0, 1), (2, 3), (0, 2), (1, 3)], weighted=False)
76+
77+
with pytest.raises(InvalidParameterError, match="Repeated sampled hyperedges"):
78+
configuration_model(
79+
hg, n_steps=50, label="edge", seed=6, duplicate_output="error"
80+
)
81+
82+
83+
def test_configuration_model_duplicate_output_count_rejects_weighted_input():
84+
"""Test multiplicity-as-weight output is rejected for weighted inputs."""
85+
hg = Hypergraph(edge_list=[(0, 1), (1, 2)], weighted=True, weights=[2.0, 3.0])
86+
87+
with pytest.raises(InvalidParameterError, match="only supported for unweighted"):
88+
configuration_model(hg, n_steps=5, duplicate_output="count", seed=0)

0 commit comments

Comments
 (0)