@@ -257,16 +257,16 @@ def _handle_overlaps(
257257 existing_file = cast ("FileNode" , existing_node )
258258 new_file = cast ("FileNode" , new_node )
259259
260- if existing_file ["hash" ] != new_file ["hash" ]:
261- # Find the parent directory and update the file
260+ # Always propagate URIs between files with identical hash
261+ if existing_file ["hash" ] == new_file ["hash" ]:
262+ self ._propagate_uri (existing_file , new_file )
263+ merged = True
264+ else :
265+ # Different hash - find the parent directory and update the file
262266 for tree in self ._merged_trees :
263267 if self ._update_file_in_tree (tree , existing_file , new_file ):
264268 merged = True
265269 break
266- else :
267- # Same hash - ensure URI is propagated
268- self ._propagate_uri (existing_file , new_file )
269- merged = True
270270
271271 return merged
272272
@@ -367,6 +367,26 @@ def _build_maps(self, new_tree: DirectoryNode | None = None) -> None:
367367 else :
368368 for tree in self ._merged_trees :
369369 self ._build_path_and_hash_maps (tree , self ._path_map , self ._hash_map )
370+ self ._propagate_uris_by_hash ()
371+
372+ def _propagate_uris_by_hash (self ) -> None :
373+ """
374+ Ensure all files with the same hash have the same URI.
375+
376+ This function ensures that if multiple file nodes have the same hash,
377+ but only some have URIs, the URI is propagated to all instances.
378+ """
379+ for file_nodes in self ._hash_map .values ():
380+ if len (file_nodes ) <= 1 :
381+ continue
382+
383+ uri = next ((node ["uri" ] for node in file_nodes if node ["uri" ]), "" )
384+ if not uri :
385+ continue
386+
387+ for node in file_nodes :
388+ if not node ["uri" ]:
389+ node ["uri" ] = uri
370390
371391 def _build_path_and_hash_maps (
372392 self ,
@@ -530,7 +550,11 @@ def _merge_file_child(
530550 if existing_child ["type" ] == "file" :
531551 # Propagate URI if needed
532552 self ._propagate_uri (cast ("FileNode" , existing_child ), source_file )
533- # Keep both files since they're at different paths
553+
554+ if source_file ["uri" ] and file_hash in self ._hash_map :
555+ for other_file in self ._hash_map [file_hash ]:
556+ if not other_file ["uri" ]:
557+ other_file ["uri" ] = source_file ["uri" ]
534558 target_dir ["children" ].append (source_file )
535559 else :
536560 # File only in source - add to target
@@ -562,9 +586,9 @@ def _update_directory_hash(self, dir_node: DirectoryNode) -> str:
562586
563587 for child in dir_node ["children" ]:
564588 if child ["type" ] == "file" :
565- child_hashes .append (cast (FileNode , child )["hash" ]) # noqa: TC006
589+ child_hashes .append (cast (" FileNode" , child )["hash" ])
566590 else :
567- child_hash = self ._update_directory_hash (cast (DirectoryNode , child )) # noqa: TC006
591+ child_hash = self ._update_directory_hash (cast (" DirectoryNode" , child ))
568592 child_hashes .append (child_hash )
569593
570594 child_hashes .sort () # Ensure consistent hash regardless of order
0 commit comments