@@ -143,6 +143,7 @@ def get_file_ancestors(self, partition: str, file_id: str, max_ancestor_depth: i
143143 "parent_id" : row .parent_id ,
144144 }
145145 for row in rows
146+ if row .relationship_id is not None # Only include files that are part of a relationship
146147 ]
147148
148149 def get_ancestor_file_ids (self , partition : str , file_id : str , max_ancestor_depth : int | None = None ) -> list [str ]:
@@ -322,11 +323,12 @@ class TestGetFileAncestors:
322323 """Test retrieving ancestor chain for a file."""
323324
324325 def test_get_file_ancestors_single_file (self , file_manager ):
325- """Test that a file with no parent returns only itself."""
326+ """Test that a file with no parent but with a relationship_id returns only itself."""
326327 file_manager .add_file_to_partition (
327328 partition = "test_partition" ,
328329 file_id = "root_email" ,
329330 file_metadata = {"filename" : "root.eml" },
331+ relationship_id = "thread_single" ,
330332 )
331333
332334 ancestors = file_manager .get_file_ancestors (
@@ -378,24 +380,28 @@ def test_get_file_ancestors_returns_ordered_path(self, file_manager):
378380 partition = "test_partition" ,
379381 file_id = "file_a" ,
380382 file_metadata = {"filename" : "a.txt" },
383+ relationship_id = "thread_ordered" ,
381384 )
382385 file_manager .add_file_to_partition (
383386 partition = "test_partition" ,
384387 file_id = "file_b" ,
385388 file_metadata = {"filename" : "b.txt" },
386389 parent_id = "file_a" ,
390+ relationship_id = "thread_ordered" ,
387391 )
388392 file_manager .add_file_to_partition (
389393 partition = "test_partition" ,
390394 file_id = "file_c" ,
391395 file_metadata = {"filename" : "c.txt" },
392396 parent_id = "file_b" ,
397+ relationship_id = "thread_ordered" ,
393398 )
394399 file_manager .add_file_to_partition (
395400 partition = "test_partition" ,
396401 file_id = "file_d" ,
397402 file_metadata = {"filename" : "d.txt" },
398403 parent_id = "file_c" ,
404+ relationship_id = "thread_ordered" ,
399405 )
400406
401407 ancestors = file_manager .get_file_ancestors (
@@ -424,12 +430,14 @@ def test_get_ancestor_file_ids(self, file_manager):
424430 partition = "test_partition" ,
425431 file_id = "parent_file" ,
426432 file_metadata = {"filename" : "parent.txt" },
433+ relationship_id = "thread_ids" ,
427434 )
428435 file_manager .add_file_to_partition (
429436 partition = "test_partition" ,
430437 file_id = "child_file" ,
431438 file_metadata = {"filename" : "child.txt" },
432439 parent_id = "parent_file" ,
440+ relationship_id = "thread_ids" ,
433441 )
434442
435443 ancestor_ids = file_manager .get_ancestor_file_ids (
@@ -447,13 +455,15 @@ def test_get_file_ancestors_max_ancestor_depth_none_returns_all(self, file_manag
447455 partition = "test_partition" ,
448456 file_id = "level_0" ,
449457 file_metadata = {"filename" : "root.txt" },
458+ relationship_id = "thread_depth_none" ,
450459 )
451460 for i in range (1 , 6 ):
452461 file_manager .add_file_to_partition (
453462 partition = "test_partition" ,
454463 file_id = f"level_{ i } " ,
455464 file_metadata = {"filename" : f"level_{ i } .txt" },
456465 parent_id = f"level_{ i - 1 } " ,
466+ relationship_id = "thread_depth_none" ,
457467 )
458468
459469 # Without max_ancestor_depth (None), should return all 6 levels
@@ -474,13 +484,15 @@ def test_get_file_ancestors_max_ancestor_depth_limits_traversal(self, file_manag
474484 partition = "test_partition" ,
475485 file_id = "node_0" ,
476486 file_metadata = {"filename" : "root.txt" },
487+ relationship_id = "thread_depth_limit" ,
477488 )
478489 for i in range (1 , 6 ):
479490 file_manager .add_file_to_partition (
480491 partition = "test_partition" ,
481492 file_id = f"node_{ i } " ,
482493 file_metadata = {"filename" : f"node_{ i } .txt" },
483494 parent_id = f"node_{ i - 1 } " ,
495+ relationship_id = "thread_depth_limit" ,
484496 )
485497
486498 # With max_ancestor_depth=2, should return target (depth 0) + 2 ancestors
@@ -501,12 +513,14 @@ def test_get_file_ancestors_max_ancestor_depth_zero_returns_only_target(self, fi
501513 partition = "test_partition" ,
502514 file_id = "root" ,
503515 file_metadata = {"filename" : "root.txt" },
516+ relationship_id = "thread_depth_zero" ,
504517 )
505518 file_manager .add_file_to_partition (
506519 partition = "test_partition" ,
507520 file_id = "child" ,
508521 file_metadata = {"filename" : "child.txt" },
509522 parent_id = "root" ,
523+ relationship_id = "thread_depth_zero" ,
510524 )
511525
512526 # max_ancestor_depth=0 means no traversal beyond the target
@@ -527,18 +541,21 @@ def test_get_file_ancestors_max_ancestor_depth_exceeds_chain_length(self, file_m
527541 partition = "test_partition" ,
528542 file_id = "short_0" ,
529543 file_metadata = {"filename" : "a.txt" },
544+ relationship_id = "thread_short" ,
530545 )
531546 file_manager .add_file_to_partition (
532547 partition = "test_partition" ,
533548 file_id = "short_1" ,
534549 file_metadata = {"filename" : "b.txt" },
535550 parent_id = "short_0" ,
551+ relationship_id = "thread_short" ,
536552 )
537553 file_manager .add_file_to_partition (
538554 partition = "test_partition" ,
539555 file_id = "short_2" ,
540556 file_metadata = {"filename" : "c.txt" },
541557 parent_id = "short_1" ,
558+ relationship_id = "thread_short" ,
542559 )
543560
544561 # max_ancestor_depth=100 but chain is only 3 levels
@@ -560,13 +577,15 @@ def test_get_ancestor_file_ids_with_max_ancestor_depth(self, file_manager):
560577 partition = "test_partition" ,
561578 file_id = "chain_0" ,
562579 file_metadata = {"filename" : "a.txt" },
580+ relationship_id = "thread_chain" ,
563581 )
564582 for i in range (1 , 4 ):
565583 file_manager .add_file_to_partition (
566584 partition = "test_partition" ,
567585 file_id = f"chain_{ i } " ,
568586 file_metadata = {"filename" : f"{ chr (97 + i )} .txt" },
569587 parent_id = f"chain_{ i - 1 } " ,
588+ relationship_id = "thread_chain" ,
570589 )
571590
572591 # With max_ancestor_depth=1, should get target + 1 ancestor
@@ -580,6 +599,49 @@ def test_get_ancestor_file_ids_with_max_ancestor_depth(self, file_manager):
580599 assert ancestor_ids == ["chain_2" , "chain_3" ]
581600
582601
602+ class TestStandaloneFileNoExpansion :
603+ """Test that a file indexed without relationship_id yields no additional chunks
604+ when include_related and include_ancestors are both active."""
605+
606+ def test_no_extra_chunks_for_file_without_relationship_id (self , file_manager ):
607+ """A standalone file (no relationship_id, no parent_id) must not bring
608+ additional files when both include_related and include_ancestors are activated.
609+
610+ Mirrors the logic in _expand_with_related_chunks:
611+ - include_related: the guard `metadata.get("relationship_id")` is falsy,
612+ so no related lookup is issued and the related task set stays empty.
613+ - include_ancestors: get_file_ancestors returns only the file itself when
614+ there is no parent, so it is already in seen_ids — nothing new is added.
615+ """
616+ file_manager .add_file_to_partition (
617+ partition = "test_partition" ,
618+ file_id = "standalone" ,
619+ file_metadata = {"filename" : "standalone.pdf" },
620+ # No relationship_id, no parent_id
621+ )
622+
623+ # Verify the file has no relationship_id (the falsy guard that prevents
624+ # the include_related lookup from being issued at all).
625+ files = file_manager .get_files_by_relationship (
626+ partition = "test_partition" ,
627+ relationship_id = "standalone" , # non-existent → empty
628+ )
629+ assert files == [], "No files should share a relationship with a standalone file"
630+
631+ with file_manager .Session () as session :
632+ row = session .execute (text ("SELECT relationship_id FROM files WHERE file_id = 'standalone'" )).fetchone ()
633+ assert not row [0 ], "relationship_id must be falsy so include_related is skipped"
634+
635+ ancestors = file_manager .get_file_ancestors (
636+ partition = "test_partition" ,
637+ file_id = "standalone" ,
638+ )
639+ assert len (ancestors ) == 0 , (
640+ "Standalone file has no relationship_id, so ancestor list must be empty — "
641+ "the relationship_id filter in get_file_ancestors excludes it"
642+ )
643+
644+
583645class TestFileModelFields :
584646 """Test that File model correctly handles relationship fields."""
585647
0 commit comments