Skip to content

Commit c443c14

Browse files
authored
Merge pull request #23 from dreadnode/users/raja/fix-artifact-few-missing-object-uris
fix: Few Missing S3 URIs While Merging Trees
2 parents ec260df + e8d4133 commit c443c14

File tree

1 file changed

+33
-9
lines changed

1 file changed

+33
-9
lines changed

dreadnode/artifact/merger.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -257,16 +257,16 @@ def _handle_overlaps(
257257
existing_file = cast("FileNode", existing_node)
258258
new_file = cast("FileNode", new_node)
259259

260-
if existing_file["hash"] != new_file["hash"]:
261-
# Find the parent directory and update the file
260+
# Always propagate URIs between files with identical hash
261+
if existing_file["hash"] == new_file["hash"]:
262+
self._propagate_uri(existing_file, new_file)
263+
merged = True
264+
else:
265+
# Different hash - find the parent directory and update the file
262266
for tree in self._merged_trees:
263267
if self._update_file_in_tree(tree, existing_file, new_file):
264268
merged = True
265269
break
266-
else:
267-
# Same hash - ensure URI is propagated
268-
self._propagate_uri(existing_file, new_file)
269-
merged = True
270270

271271
return merged
272272

@@ -367,6 +367,26 @@ def _build_maps(self, new_tree: DirectoryNode | None = None) -> None:
367367
else:
368368
for tree in self._merged_trees:
369369
self._build_path_and_hash_maps(tree, self._path_map, self._hash_map)
370+
self._propagate_uris_by_hash()
371+
372+
def _propagate_uris_by_hash(self) -> None:
373+
"""
374+
Ensure all files with the same hash have the same URI.
375+
376+
This function ensures that if multiple file nodes have the same hash,
377+
but only some have URIs, the URI is propagated to all instances.
378+
"""
379+
for file_nodes in self._hash_map.values():
380+
if len(file_nodes) <= 1:
381+
continue
382+
383+
uri = next((node["uri"] for node in file_nodes if node["uri"]), "")
384+
if not uri:
385+
continue
386+
387+
for node in file_nodes:
388+
if not node["uri"]:
389+
node["uri"] = uri
370390

371391
def _build_path_and_hash_maps(
372392
self,
@@ -530,7 +550,11 @@ def _merge_file_child(
530550
if existing_child["type"] == "file":
531551
# Propagate URI if needed
532552
self._propagate_uri(cast("FileNode", existing_child), source_file)
533-
# Keep both files since they're at different paths
553+
554+
if source_file["uri"] and file_hash in self._hash_map:
555+
for other_file in self._hash_map[file_hash]:
556+
if not other_file["uri"]:
557+
other_file["uri"] = source_file["uri"]
534558
target_dir["children"].append(source_file)
535559
else:
536560
# File only in source - add to target
@@ -562,9 +586,9 @@ def _update_directory_hash(self, dir_node: DirectoryNode) -> str:
562586

563587
for child in dir_node["children"]:
564588
if child["type"] == "file":
565-
child_hashes.append(cast(FileNode, child)["hash"]) # noqa: TC006
589+
child_hashes.append(cast("FileNode", child)["hash"])
566590
else:
567-
child_hash = self._update_directory_hash(cast(DirectoryNode, child)) # noqa: TC006
591+
child_hash = self._update_directory_hash(cast("DirectoryNode", child))
568592
child_hashes.append(child_hash)
569593

570594
child_hashes.sort() # Ensure consistent hash regardless of order

0 commit comments

Comments
 (0)