Merge pull request #525 from pyt-team/frantzen/ahorn-multinetwork

ffl096 · web-flow · commit b69c5388e666 · 2026-01-21T10:14:02.000+01:00
Support for multi-network AHORN datasets
diff --git a/test/datasets/test_ahorn.py b/test/datasets/test_ahorn.py
@@ -1,6 +1,8 @@
 """Tests for AHORN dataset loader."""
 # pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
 
+from io import StringIO
+
 import networkx as nx
 import pytest
 
@@ -50,3 +52,36 @@ def test_read_missing_dependency_raises() -> None:
     """When `ahorn-loader` is not installed, read_ahorn_dataset raises RuntimeError."""
     with pytest.raises(RuntimeError, match=r"optional `ahorn-loader`"):
         read_ahorn_dataset("dummy.json")
+
+
+@pytest.mark.skipif(
+    ahorn_loader is None, reason="Optional dependency `ahorn-loader` not installed."
+)
+def test_read_multi_network_dataset() -> None:
+    """Test reading a multi-network AHORN dataset from a mock file."""
+    mock_data = """{"name": "Mock Multi-Network Dataset"}
+{"id": "network-001"}
+0 {"label": "node0_net1"}
+1 {"label": "node1_net1"}
+2 {"label": "node2_net1"}
+{"id": "network-002"}
+0,1 {"weight": 2.0}
+1,2 {"weight": 3.0}
+0,1,2 {}
+"""
+
+    result = read_ahorn_dataset(StringIO(mock_data))
+
+    assert isinstance(result, list)
+    assert len(result) == 2
+
+    assert len(list(result[0].simplices)) == 3
+    assert result[0].complex["id"] == "network-001"
+    assert result[0].nodes[0]["label"] == "node0_net1"
+    assert result[0].nodes[1]["label"] == "node1_net1"
+    assert result[0].nodes[2]["label"] == "node2_net1"
+
+    assert len(list(result[1].simplices)) == 7
+    assert result[1].complex["id"] == "network-002"
+    assert result[1].simplices[(0, 1)]["weight"] == 2.0
+    assert result[1].simplices[(1, 2)]["weight"] == 3.0
diff --git a/toponetx/datasets/ahorn.py b/toponetx/datasets/ahorn.py
@@ -27,9 +27,12 @@
 """
 
 import json
+from collections.abc import Iterable
 from pathlib import Path
 from typing import IO
 
+from more_itertools import peekable, split_before
+
 from toponetx.classes import CellComplex, SimplicialComplex
 from toponetx.classes.complex import Complex
 
@@ -56,7 +59,9 @@ def _assert_ahorn_loader_installed() -> None:
         )
 
 
-def load_ahorn_dataset[T: Complex](name: str, create_using: type[T] | None = None) -> T:
+def load_ahorn_dataset[T: Complex](
+    name: str, create_using: type[T] | None = None
+) -> T | Iterable[T]:
     """Load the specified dataset from the Aachen Higher-Order Repository of Networks.
 
     The dataset file will be stored in your system cache and can be deleted according
@@ -73,8 +78,9 @@ def load_ahorn_dataset[T: Complex](name: str, create_using: type[T] | None = Non
 
     Returns
     -------
-    Complex
-        The complex representing the AHORN dataset.
+    Complex or list[Complex]
+        The complex representing the AHORN dataset. A list of complexes if the dataset
+        contains multiple networks.
 
     Raises
     ------
@@ -96,7 +102,7 @@ def load_ahorn_dataset[T: Complex](name: str, create_using: type[T] | None = Non
 
 def read_ahorn_dataset[T](
     path: str | Path | IO[str], create_using: type[T] | None = None
-) -> T:
+) -> T | Iterable[T]:
     """Read an AHORN dataset from a local file or file-like object.
 
     This function accepts file paths and file-like objects provided by users. When
@@ -113,8 +119,9 @@ def read_ahorn_dataset[T](
 
     Returns
     -------
-    Complex
-        The complex representing the AHORN dataset.
+    Complex or list[Complex]
+        The complex representing the AHORN dataset. A list of complexes if the dataset
+        contains multiple networks.
 
     Raises
     ------
@@ -138,9 +145,15 @@ def read_ahorn_dataset[T](
         raise RuntimeError(f"Failed to read dataset: {e!s}") from e
 
 
-def _read_ahorn_dataset[T](file: IO[str], create_using: type[T] | None = None) -> T:
+def _read_ahorn_dataset[T](
+    file: Iterable[str], create_using: type[T] | None = None
+) -> T | Iterable[T]:
     """Read AHORN dataset from file-like object.
 
+    Supports both single-network and multi-network datasets. Multi-network datasets are
+    detected by checking if the first two lines both start with '{', indicating they are
+    JSON objects representing separate networks.
+
     Parameters
     ----------
     file : IO
@@ -150,15 +163,38 @@ def _read_ahorn_dataset[T](file: IO[str], create_using: type[T] | None = None) -
 
     Returns
     -------
-    Complex
-        The complex representing the AHORN dataset.
+    Complex or list[Complex]
+        The complex representing the AHORN dataset. A list of complexes if the dataset
+        contains multiple networks.
     """
     if create_using is None:
         create_using = SimplicialComplex
 
-    complex_obj = create_using(**json.loads(next(file)))
+    # Convert to peekable iterator to detect multi-network datasets
+    lines = peekable(file)
+
+    # Check if this is a multi-network dataset by peeking at the first two lines
+    first_line = next(lines)
+    is_multi_network = False
+    try:
+        is_multi_network = lines.peek().strip().startswith("{")
+    except StopIteration:
+        is_multi_network = False
+
+    # If multi-network, create empty complex; otherwise parse first line
+    if is_multi_network:
+        network_lines = list(
+            split_before(lines, lambda line: line.strip().startswith("{"))
+        )
+        return [
+            _read_ahorn_dataset(network, create_using=create_using)
+            for network in network_lines
+        ]
+
+    complex_obj = create_using(**json.loads(first_line))
 
-    for line_num, line in enumerate(file, start=2):
+    # Process remaining lines
+    for line_num, line in enumerate(lines, start=2):
         try:
             elements_part, metadata = line.split(" ", maxsplit=1)
             elements = list(map(int, elements_part.split(",")))