Skip to content

Commit 1c30deb

Browse files
committed
fix: expand KG seed fulltext to key phrase
1 parent 6c0d768 commit 1c30deb

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

lib/kg_hybrid_graph_rag.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,33 @@ def _retrieve_seed_nodes(
617617
}
618618
)
619619

620+
query_terms = _query_terms(query)
621+
if len(query_terms) >= 2:
622+
phrase_query = " ".join(query_terms[:2])
623+
if phrase_query and phrase_query != query:
624+
rows = postgres.execute_query(
625+
"""
626+
SELECT id, type, label, aliases, ts_rank(tsv, plainto_tsquery('english', %s)) as rank
627+
FROM kg_nodes
628+
WHERE tsv @@ plainto_tsquery('english', %s)
629+
ORDER BY rank DESC
630+
LIMIT %s
631+
""",
632+
(phrase_query, phrase_query, seed_k * 2),
633+
)
634+
for row in rows:
635+
rank = float(row[4] or 0.0)
636+
fulltext_candidates.append(
637+
{
638+
"id": row[0],
639+
"type": row[1],
640+
"label": row[2],
641+
"aliases": row[3] or [],
642+
"score": rank,
643+
"match_reason": "fulltext_phrase",
644+
}
645+
)
646+
620647
# Alias exact match (cheap + good for proper nouns)
621648
try:
622649
from lib.id_generators import normalize_label

tests/test_kg_hybrid_graph_rag_unit.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,28 @@ def generate_query_embedding(self, _query: str) -> list[float]:
165165
return [0.0] * 768
166166

167167

168+
class _FakePostgresPhraseMatch(_FakePostgres):
169+
def execute_query(self, sql: str, params: tuple[Any, ...] | None = None):
170+
if "plainto_tsquery" in sql and params:
171+
query = params[0]
172+
if query == "future barbados":
173+
return [
174+
(
175+
"kg_future",
176+
"schema:Organization",
177+
"Future Barbados",
178+
["Future Barbados"],
179+
0.8,
180+
)
181+
]
182+
return []
183+
184+
if "FROM kg_nodes" in sql and "WHERE id IN" in sql:
185+
return [("kg_future", "Future Barbados", "schema:Organization")]
186+
187+
return super().execute_query(sql, params)
188+
189+
168190
class _FakePostgresNameMatch(_FakePostgres):
169191
def execute_query(self, sql: str, params: tuple[Any, ...] | None = None):
170192
if "FROM kg_nodes" in sql and "embedding <=>" in sql:
@@ -270,6 +292,26 @@ def test_kg_hybrid_graph_rag_prefers_name_substring_matches() -> None:
270292
assert "Tameisha Rochester" not in seed_labels
271293

272294

295+
def test_kg_hybrid_graph_rag_adds_phrase_fulltext_candidates() -> None:
296+
from lib.kg_hybrid_graph_rag import kg_hybrid_graph_rag
297+
298+
postgres = _FakePostgresPhraseMatch()
299+
embedding = _FakeEmbedding()
300+
301+
out = kg_hybrid_graph_rag(
302+
postgres=postgres,
303+
embedding_client=embedding,
304+
query="Future Barbados recent 2026 budget debates",
305+
hops=1,
306+
seed_k=5,
307+
max_edges=10,
308+
max_citations=5,
309+
)
310+
311+
seed_labels = {s["label"] for s in out["seeds"]}
312+
assert "Future Barbados" in seed_labels
313+
314+
273315
def test_kg_hybrid_graph_rag_respects_bill_citation_limit() -> None:
274316
from lib.kg_hybrid_graph_rag import kg_hybrid_graph_rag_with_bills
275317

0 commit comments

Comments
 (0)