Skip to content

Commit 826d689

Browse files
sukantaroy01ajitpratap0uditchaudhary
authored
Fix: Resolve issue where PyAirbyte would fail if property names contain the dot character ('.'), e.g. with source-google-ads (#343)
Co-authored-by: Ajit Pratap Singh <18012955+ajitpratap0@users.noreply.github.com> Co-authored-by: uditchaudhary <udit.chaudhary@pixis.ai>
1 parent 1775cb4 commit 826d689

File tree

4 files changed

+54
-6
lines changed

4 files changed

+54
-6
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,6 @@ dmypy.json
130130

131131
# Cython debug symbols
132132
cython_debug/
133+
134+
# Pycharm
135+
.idea

airbyte/shared/catalog_providers.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717

1818
from airbyte import exceptions as exc
19+
from airbyte._util.name_normalizers import LowerCaseNormalizer
1920
from airbyte.strategies import WriteMethod, WriteStrategy
2021

2122

@@ -149,13 +150,24 @@ def get_primary_keys(
149150
if not pks:
150151
return []
151152

152-
joined_pks = [".".join(pk) for pk in pks]
153-
for pk in joined_pks:
154-
if "." in pk:
155-
msg = f"Nested primary keys are not yet supported. Found: {pk}"
156-
raise NotImplementedError(msg)
153+
normalized_pks: list[list[str]] = [
154+
[LowerCaseNormalizer.normalize(c) for c in pk] for pk in pks
155+
]
157156

158-
return joined_pks
157+
for pk_nodes in normalized_pks:
158+
if len(pk_nodes) != 1:
159+
raise exc.AirbyteError(
160+
message=(
161+
"Nested primary keys are not supported. "
162+
"Each PK column should have exactly one node. "
163+
),
164+
context={
165+
"stream_name": stream_name,
166+
"primary_key_nodes": pk_nodes,
167+
},
168+
)
169+
170+
return [pk_nodes[0] for pk_nodes in normalized_pks]
159171

160172
def get_cursor_key(
161173
self,

tests/integration_tests/fixtures/source-test/source_test/run.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,23 @@
6565
},
6666
},
6767
},
68+
{
69+
"name": "primary-key-with-dot",
70+
"description": "This stream has a primary key with dot similar what is there in GAds.",
71+
"source_defined_primary_key": [["table1.Column1"]],
72+
"source_defined_cursor": False,
73+
"supported_sync_modes": ["full_refresh"],
74+
"json_schema": {
75+
"$schema": "http://json-schema.org/draft-07/schema#",
76+
"type": "object",
77+
"properties": {
78+
"table1.Column1": {"type": "string"},
79+
"table1.Column2": {"type": "number"},
80+
"table1.empty_column": {"type": "string"},
81+
"table1.big_number": {"type": "number"},
82+
},
83+
},
84+
},
6885
]
6986
},
7087
}
@@ -137,6 +154,19 @@
137154
"emitted_at": 1704067200,
138155
},
139156
}
157+
sample_record_primary_key_with_dot = {
158+
"type": "RECORD",
159+
"record": {
160+
"data": {
161+
"table1.Column1": "value1",
162+
"table1.Column2": 1,
163+
"table1.empty_column": None,
164+
"table1.big_number": 1234567890123456,
165+
},
166+
"stream": "primary-key-with-dot",
167+
"emitted_at": 1704067200,
168+
},
169+
}
140170

141171

142172
def parse_args():
@@ -184,3 +214,5 @@ def run():
184214
print(json.dumps(sample_record2_stream1))
185215
elif stream["stream"]["name"] == "stream2":
186216
print(json.dumps(sample_record_stream2))
217+
elif stream["stream"]["name"] == "primary-key-with-dot":
218+
print(json.dumps(sample_record_primary_key_with_dot))

tests/unit_tests/test_text_normalization.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ def test_case_insensitive_w_pretty_keys(
211211
("", "", True),
212212
("*", "", True),
213213
("!@$", "", True),
214+
("some.col", "some_col", False),
214215
],
215216
)
216217
def test_lower_case_normalizer(

0 commit comments

Comments
 (0)