Skip to content

Commit 488235b

Browse files
committed
nlq2sparql/ontology: prefer 21Aug2025 TTL (fallback to 11Aug); make slices deterministic; tests: reference ONTOLOGY_FILE instead of hardcoded name
1 parent a266e0c commit 488235b

File tree

2 files changed

+30
-7
lines changed

2 files changed

+30
-7
lines changed

shared/nlq2sparql/agents/ontology_agent.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,19 @@
1111

1212
from .base import BaseAgent
1313

14-
ONTOLOGY_FILE = Path(__file__).resolve().parents[1] / "ontology" / "11Aug2025_ontology.ttl"
14+
# Prefer the newest ontology file if present, fallback to the previous placeholder to keep tests stable
15+
ONTOLOGY_DIR = Path(__file__).resolve().parents[1] / "ontology"
16+
_candidates = [
17+
"21Aug2025_ontology.ttl",
18+
"11Aug2025_ontology.ttl",
19+
]
20+
_selected = None
21+
for _name in _candidates:
22+
_path = ONTOLOGY_DIR / _name
23+
if _path.exists():
24+
_selected = _path
25+
break
26+
ONTOLOGY_FILE = _selected or (ONTOLOGY_DIR / "11Aug2025_ontology.ttl")
1527

1628

1729
@dataclass
@@ -77,12 +89,17 @@ async def run(self, question: str, max_neighbors: int = 30, mode: str = "ttl", d
7789
if mode == "ttl":
7890
# Return raw TTL snippet(s) verbatim for each matched subject
7991
snippets: List[str] = []
92+
# Deterministic subject ordering
93+
subjects_sorted = sorted(matched_subjects)
8094
# Limit subjects to avoid runaway size
81-
for subj in list(matched_subjects)[: max_neighbors]:
95+
for subj in subjects_sorted[: max_neighbors]:
8296
subj_ref = URIRef(subj)
8397
lines: List[str] = []
8498
# Collect triples for this subject
85-
for _, pred, obj in g.triples((subj_ref, None, None)):
99+
triples = list(g.triples((subj_ref, None, None)))
100+
# Deterministic predicate/object ordering
101+
triples.sort(key=lambda t: (str(t[1]), str(t[2])))
102+
for _, pred, obj in triples:
86103
if isinstance(obj, Literal):
87104
o_txt = f'"{obj}"@en' if obj.language == 'en' else f'"{obj}"'
88105
else:
@@ -93,7 +110,8 @@ async def run(self, question: str, max_neighbors: int = 30, mode: str = "ttl", d
93110
# If dataset filtering is enabled, skip subjects not matching allowed prefixes
94111
if allowed_prefixes and not any(header.startswith(p) for p in allowed_prefixes):
95112
continue
96-
snippet = header + "\n\t" + "\n\t".join(lines)
113+
# Sort lines for deterministic snippet content
114+
snippet = header + "\n\t" + "\n\t".join(sorted(lines))
97115
snippets.append(snippet)
98116
# Maintain backward-compatible empty structural fields so older tests / consumers don't break.
99117
result = {"tokens": tokens, "ttl_snippets": snippets, "nodes": [], "edges": [], "literals": [], "source": "unified_ontology_v1", "mode": "ttl"}
@@ -108,7 +126,9 @@ async def run(self, question: str, max_neighbors: int = 30, mode: str = "ttl", d
108126
if len(edges) >= max_neighbors:
109127
break
110128
subj_ref = URIRef(subj)
111-
for _, pred, obj in g.triples((subj_ref, None, None)):
129+
triples = list(g.triples((subj_ref, None, None)))
130+
triples.sort(key=lambda t: (str(t[1]), str(t[2])))
131+
for _, pred, obj in triples:
112132
if len(edges) >= max_neighbors:
113133
break
114134
if isinstance(obj, Literal):
@@ -124,7 +144,9 @@ async def run(self, question: str, max_neighbors: int = 30, mode: str = "ttl", d
124144
labels_map[s] = str(o)
125145
break
126146
nodes = [{"id": nid, "label": labels_map.get(nid)} for nid in sorted(added_nodes)]
127-
result = {"tokens": tokens, "nodes": nodes, "edges": edges, "literals": literals, "source": "unified_ontology_v1", "mode": "structured"}
147+
# Deterministic ordering of edges
148+
edges_sorted = sorted(edges, key=lambda e: (e["subject"], e["predicate"], e["object"]))
149+
result = {"tokens": tokens, "nodes": nodes, "edges": edges_sorted, "literals": literals, "source": "unified_ontology_v1", "mode": "structured"}
128150
self._set_cached_slice(cache_key, result)
129151
return result
130152

shared/nlq2sparql/tests/test_agents.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
sys.path.insert(0, str(ROOT))
1515

1616
from shared.nlq2sparql.agents import UnifiedOntologyAgent, ExampleRetrievalAgent, SupervisorAgent, RouterAgent
17+
from shared.nlq2sparql.agents.ontology_agent import ONTOLOGY_FILE
1718
from shared.nlq2sparql import prompt_builder
1819

1920

@@ -28,7 +29,7 @@ async def test_ontology_agent_slice_deterministic():
2829
for key in ["tokens", "nodes", "edges", "literals", "source"]:
2930
assert key in slice1
3031
# No mutation of ontology file (hash stable via size+first bytes heuristic)
31-
ontology_path = Path(__file__).parent.parent / "ontology" / "11Aug2025_ontology.ttl"
32+
ontology_path = Path(ONTOLOGY_FILE)
3233
before = ontology_path.stat().st_size
3334
_ = await agent.run(question="Different query for slice")
3435
after = ontology_path.stat().st_size

0 commit comments

Comments
 (0)