Skip to content

Commit 2a1d874

Browse files
committed
use stream hashing instead of direct comparisons
1 parent a4f1267 commit 2a1d874

File tree

1 file changed

+10
-12
lines changed

1 file changed

+10
-12
lines changed

airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,13 @@ def _replace_parent_streams_with_refs(self) -> None:
153153
replace any 'stream' fields in those configs that are dicts and deeply equal to another stream object
154154
with a $ref to the correct stream index.
155155
"""
156-
import copy
157-
158156
streams = self._normalized_manifest.get(STREAMS_TAG, [])
159-
# Use deep copy for comparison to avoid mutation issues
160-
stream_copies = [copy.deepcopy(s) for s in streams]
157+
158+
# Build a hash-to-index mapping for O(1) lookups
159+
stream_hash_to_index = {}
160+
for idx, stream in enumerate(streams):
161+
stream_hash = self._hash_object(stream)
162+
stream_hash_to_index[stream_hash] = idx
161163

162164
for idx, stream in enumerate(streams):
163165
retriever = stream.get("retriever")
@@ -182,14 +184,10 @@ def _replace_parent_streams_with_refs(self) -> None:
182184
continue
183185
stream_ref = parent_config.get("stream")
184186
# Only replace if it's a dict and matches any stream in the manifest
185-
for other_idx, other_stream in enumerate(stream_copies):
186-
if (
187-
stream_ref is not None
188-
and isinstance(stream_ref, dict)
189-
and stream_ref == other_stream
190-
):
191-
parent_config["stream"] = {"$ref": f"#/streams/{other_idx}"}
192-
break
187+
if stream_ref is not None and isinstance(stream_ref, dict):
188+
stream_ref_hash = self._hash_object(stream_ref)
189+
if stream_ref_hash in stream_hash_to_index:
190+
parent_config["stream"] = {"$ref": f"#/streams/{stream_hash_to_index[stream_ref_hash]}"}
193191

194192
def _clean_dangling_fields(self) -> None:
195193
"""

0 commit comments

Comments
 (0)