Skip to content

Commit a2920bf

Browse files
committed
test(relationships): add test for standalone file yielding no extra chunks on expansion
1 parent c1fe53a commit a2920bf

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

openrag/components/indexer/vectordb/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ def get_file_ancestors(self, partition: str, file_id: str, max_ancestor_depth: i
578578
**(row.file_metadata or {}),
579579
}
580580
for row in result
581+
if row.relationship_id is not None # Only include files that are part of a relationship
581582
]
582583

583584
def get_ancestor_file_ids(self, partition: str, file_id: str, max_ancestor_depth: int | None = None) -> list[str]:

openrag/components/test_relationships.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,50 @@ def test_get_ancestor_file_ids_with_max_ancestor_depth(self, file_manager):
580580
assert ancestor_ids == ["chain_2", "chain_3"]
581581

582582

583+
class TestStandaloneFileNoExpansion:
584+
"""Test that a file indexed without relationship_id yields no additional chunks
585+
when include_related and include_ancestors are both active."""
586+
587+
def test_no_extra_chunks_for_file_without_relationship_id(self, file_manager):
588+
"""A standalone file (no relationship_id, no parent_id) must not bring
589+
additional files when both include_related and include_ancestors are activated.
590+
591+
Mirrors the logic in _expand_with_related_chunks:
592+
- include_related: the guard `metadata.get("relationship_id")` is falsy,
593+
so no related lookup is issued and the related task set stays empty.
594+
- include_ancestors: get_file_ancestors returns only the file itself when
595+
there is no parent, so it is already in seen_ids — nothing new is added.
596+
"""
597+
file_manager.add_file_to_partition(
598+
partition="test_partition",
599+
file_id="standalone",
600+
file_metadata={"filename": "standalone.pdf"},
601+
# No relationship_id, no parent_id
602+
)
603+
604+
# Verify the file has no relationship_id (the falsy guard that prevents
605+
# the include_related lookup from being issued at all).
606+
files = file_manager.get_files_by_relationship(
607+
partition="test_partition",
608+
relationship_id="standalone", # non-existent → empty
609+
)
610+
assert files == [], "No files should share a relationship with a standalone file"
611+
612+
with file_manager.Session() as session:
613+
row = session.execute(text("SELECT relationship_id FROM files WHERE file_id = 'standalone'")).fetchone()
614+
assert not row[0], "relationship_id must be falsy so include_related is skipped"
615+
616+
# include_ancestors path: returns only the file itself → nothing new to add
617+
ancestors = file_manager.get_file_ancestors(
618+
partition="test_partition",
619+
file_id="standalone",
620+
)
621+
assert len(ancestors) == 1
622+
assert ancestors[0]["file_id"] == "standalone", (
623+
"Only the file itself should be returned — no ancestors to expand with"
624+
)
625+
626+
583627
class TestFileModelFields:
584628
"""Test that File model correctly handles relationship fields."""
585629

0 commit comments

Comments
 (0)