88from hypergraphx .generation ._rng import np_rng , split_seed
99
1010
11+ def _build_hypergraph_from_sampled_edges (
12+ edges ,
13+ * ,
14+ duplicate_output = "merge" ,
15+ weighted_output = False ,
16+ ):
17+ edge_counts = Counter (tuple (sorted (edge )) for edge in edges )
18+ generated_edges = list (edge_counts .keys ())
19+
20+ if duplicate_output == "merge" :
21+ new_h = Hypergraph (weighted = weighted_output )
22+ if weighted_output :
23+ new_h .add_edges (generated_edges , weights = [1 ] * len (generated_edges ))
24+ else :
25+ new_h .add_edges (generated_edges )
26+ return new_h
27+
28+ if duplicate_output == "count" :
29+ new_h = Hypergraph (weighted = True )
30+ new_h .add_edges (
31+ generated_edges , weights = [edge_counts [edge ] for edge in generated_edges ]
32+ )
33+ return new_h
34+
35+ if duplicate_output == "error" :
36+ repeated_edges = [edge for edge , count in edge_counts .items () if count > 1 ]
37+ if repeated_edges :
38+ raise InvalidParameterError (
39+ "Repeated sampled hyperedges are not allowed when "
40+ "duplicate_output='error'."
41+ )
42+ new_h = Hypergraph (weighted = weighted_output )
43+ if weighted_output :
44+ new_h .add_edges (generated_edges , weights = [1 ] * len (generated_edges ))
45+ else :
46+ new_h .add_edges (generated_edges )
47+ return new_h
48+
49+ raise InvalidParameterError (
50+ "duplicate_output must be one of: 'merge', 'count', 'error'."
51+ )
52+
53+
1154def _cm_MCMC (
1255 hypergraph ,
1356 n_steps = 1000 ,
1457 label = "edge" ,
1558 n_clash = 1 ,
16- detailed = True ,
59+ restrict_to_same_size = True ,
60+ duplicate_output = "merge" ,
1761 * ,
1862 seed : int | None = None ,
1963):
2064 """
2165 Conduct Markov Chain Monte Carlo in order to approximately
2266 sample from the space of appropriately-labeled graphs.
2367 n_steps: number of steps to perform
24- label: the label space to use. Can take values in ['vertex' , 'stub' , 'edge'].
68+ label: the label space to use. Can take values in ['vertex', 'edge'].
2569 n_clash: the number of clashes permitted when updating the edge counts in vertex-labeled MH.
2670 n_clash = 0 will be exact but very slow.
2771 n_clash >= 2 may lead to performance gains at the cost of decreased accuracy.
28- detailed: if True, preserve the number of edges of given dimension incident to each node
72+ restrict_to_same_size: if True, only rewire pairs of hyperedges with the same size
73+ duplicate_output: controls how repeated sampled hyperedges are handled
2974 """
3075 rng = np_rng (seed )
3176
3277 def proposal_generator (m ):
33- # Propose a transition in stub- and edge-labeled MH.
78+ # Propose a transition in edge-labeled MH.
3479
3580 def __proposal (edge_list ):
3681 i , j = rng .integers (0 , m , 2 )
3782 f1 , f2 = edge_list [i ], edge_list [j ]
38- if detailed :
83+ if restrict_to_same_size :
3984 while len (f1 ) != len (f2 ):
4085 i , j = rng .integers (0 , m , 2 )
4186 f1 , f2 = edge_list [i ], edge_list [j ]
@@ -73,7 +118,7 @@ def __pairwise_reshuffle(f1, f2):
73118 logger .debug ("%s %s %s %s" , f1 , f2 , g1 , g2 )
74119 return tuple (sorted (g1 )), tuple (sorted (g2 ))
75120
76- def stub_edge_mh (message = True ):
121+ def edge_mh (message = True ):
77122 mh_rounds = 0
78123 mh_steps = 0
79124 c_new = [list (c ) for c in hypergraph .get_edges ()]
@@ -92,10 +137,11 @@ def mh_step():
92137 mh_step ()
93138 n += 1
94139
95- new_h = Hypergraph ()
96- # check this behavior
97- generated_edges = list (set ([tuple (sorted (f )) for f in c_new ]))
98- new_h .add_edges (generated_edges )
140+ new_h = _build_hypergraph_from_sampled_edges (
141+ c_new ,
142+ duplicate_output = duplicate_output ,
143+ weighted_output = hypergraph .is_weighted (),
144+ )
99145 mh_steps += n
100146 mh_rounds += 1
101147
@@ -149,7 +195,7 @@ def vertex_labeled_mh(message=True):
149195 i , j = (ij [k_ ], ij [k_ + 1 ])
150196 k_ += 2
151197 f1 , f2 = l [i ], l [j ]
152- if detailed :
198+ if restrict_to_same_size :
153199 while len (f1 ) != len (f2 ):
154200 i , j = (ij [k_ ], ij [k_ + 1 ])
155201 k_ += 2
@@ -190,18 +236,20 @@ def vertex_labeled_mh(message=True):
190236 n_rejected ,
191237 )
192238
193- new_h = Hypergraph ()
194- new_h .add_edges ([tuple (sorted (f )) for f in list (c .elements ())])
239+ new_h = _build_hypergraph_from_sampled_edges (
240+ list (c .elements ()),
241+ duplicate_output = duplicate_output ,
242+ weighted_output = hypergraph .is_weighted (),
243+ )
195244 mh_steps += k - n_rejected
196245 mh_rounds += 1
197246 return new_h
198247
199- if ( label == "edge" ) or ( label == "stub" ) :
200- return stub_edge_mh ()
248+ if label == "edge" :
249+ return edge_mh ()
201250 elif label == "vertex" :
202251 return vertex_labeled_mh ()
203- else :
204- logging .getLogger (__name__ ).warning ("Not implemented" )
252+ raise InvalidParameterError ("label must be one of: 'edge', 'vertex'." )
205253
206254
207255def configuration_model (
@@ -211,34 +259,99 @@ def configuration_model(
211259 order = None ,
212260 size = None ,
213261 n_clash = 1 ,
214- detailed = True ,
262+ restrict_to_same_size = True ,
263+ duplicate_output = "merge" ,
215264 seed : int | None = None ,
216265):
217266 """
218267 Sample a randomized hypergraph using a configuration-model-style MCMC.
219268
220- Parameters are largely legacy; the key UX improvements are:
221- - `seed=` controls the RNG (reproducible, does not rely on global `np.random.seed`)
222- - when `order`/`size` is provided, only that size class is rewired
269+ The sampler supports two labeling conventions:
270+
271+ - ``label="edge"``: rewires pairs of hyperedges directly.
272+ - ``label="vertex"``: samples in the space of vertex-labeled hypergraphs
273+ using a collision-controlled update scheme.
274+
275+ By default, all hyperedges are eligible for rewiring. If ``order`` or
276+ ``size`` is specified, only hyperedges of the selected size are rewired and
277+ all other hyperedges are copied unchanged into the output hypergraph.
278+
279+ Parameters
280+ ----------
281+ hypergraph : Hypergraph
282+ Input hypergraph to randomize.
283+ n_steps : int, default=1000
284+ Number of MCMC update steps.
285+ label : {"edge", "vertex"}, default="edge"
286+ Labeling convention used by the sampler.
287+ order : int, optional
288+ Hyperedge order to rewire. If provided, only hyperedges of size
289+ ``order + 1`` are randomized.
290+ size : int, optional
291+ Hyperedge size to rewire. Mutually exclusive with ``order``.
292+ n_clash : int, default=1
293+ Collision threshold used in the vertex-labeled sampler. Only relevant
294+ when ``label="vertex"``.
295+ restrict_to_same_size : bool, default=True
296+ If ``True``, proposals are restricted to pairs of hyperedges with the
297+ same size, so rewiring is performed within size classes.
298+ duplicate_output : {"merge", "count", "error"}, default="merge"
299+ Controls how repeated sampled hyperedges are handled in the returned
300+ object. ``"merge"`` collapses duplicates into a simple hypergraph,
301+ ``"count"`` encodes multiplicities as edge weights, and ``"error"``
302+ raises if repeated sampled hyperedges occur.
303+ seed : int, optional
304+ Seed for the random number generator.
305+
306+ Returns
307+ -------
308+ Hypergraph
309+ Randomized hypergraph.
310+
311+ Raises
312+ ------
313+ InvalidParameterError
314+ If both ``order`` and ``size`` are specified.
315+ InvalidParameterError
316+ If ``label`` is not one of ``"edge"`` or ``"vertex"``.
317+ InvalidParameterError
318+ If ``duplicate_output="count"`` is used with a weighted input
319+ hypergraph.
320+
321+ Notes
322+ -----
323+ In the size-restricted mode, the function first extracts the corresponding
324+ subhypergraph, randomizes it, and then reinserts the untouched hyperedges.
325+ The default ``duplicate_output="merge"`` returns a simple hypergraph and
326+ collapses repeated sampled hyperedges.
223327
224328 Examples
225329 --------
226330 >>> from hypergraphx import Hypergraph
227331 >>> from hypergraphx.generation import configuration_model
228332 >>> H = Hypergraph(edge_list=[(0, 1), (1, 2), (0, 1, 2)], weighted=False)
229333 >>> H2 = configuration_model(H, n_steps=10, label="edge", seed=0)
230- >>> H.num_edges() == H2.num_edges()
334+ >>> isinstance(H2, Hypergraph)
335+ True
336+
337+ >>> H3 = configuration_model(H, n_steps=10, seed=0, duplicate_output="count")
338+ >>> H3.is_weighted()
231339 True
232340 """
233341 if order is not None and size is not None :
234342 raise InvalidParameterError ("Only one of order and size can be specified." )
343+ if duplicate_output == "count" and hypergraph .is_weighted ():
344+ raise InvalidParameterError (
345+ "duplicate_output='count' is only supported for unweighted hypergraphs."
346+ )
235347 if order is None and size is None :
236348 return _cm_MCMC (
237349 hypergraph ,
238350 n_steps = n_steps ,
239351 label = label ,
240352 n_clash = n_clash ,
241- detailed = detailed ,
353+ restrict_to_same_size = restrict_to_same_size ,
354+ duplicate_output = duplicate_output ,
242355 seed = seed ,
243356 )
244357
@@ -256,10 +369,15 @@ def configuration_model(
256369 n_steps = n_steps ,
257370 label = label ,
258371 n_clash = n_clash ,
259- detailed = detailed ,
372+ restrict_to_same_size = restrict_to_same_size ,
373+ duplicate_output = duplicate_output ,
260374 seed = sub_seed ,
261375 )
262376 for e in hypergraph .get_edges ():
263377 if len (e ) != size :
264- shuffled .add_edge (e )
378+ if shuffled .is_weighted ():
379+ weight = hypergraph .get_weight (e ) if hypergraph .is_weighted () else 1
380+ shuffled .add_edge (e , weight = weight )
381+ else :
382+ shuffled .add_edge (e )
265383 return shuffled
0 commit comments