Skip to content

Commit 81bd744

Browse files
feat: add MinMaxDatetime robust parsing and UI field hiding
- Add robust datetime parsing fallback to MinMaxDatetime using ab_datetime_try_parse - Hide cursor_datetime_formats and datetime_format fields in UI with airbyte_hidden: true - Expand parametrized tests with robust fallback scenarios that now succeed - Maintain backward compatibility while simplifying configuration - Keep datetime_format for API output formatting, decouple from cursor storage Co-Authored-By: AJ Steers <[email protected]>
1 parent 9b2d88b commit 81bd744

File tree

3 files changed

+62
-8
lines changed

3 files changed

+62
-8
lines changed

airbyte_cdk/sources/declarative/datetime/min_max_datetime.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
1010
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
11+
from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse
1112

1213

1314
@dataclass
@@ -65,15 +66,23 @@ def get_datetime(
6566
if not datetime_format:
6667
datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z"
6768

68-
time = self._parser.parse(
69-
str(
70-
self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval"
71-
config,
72-
**additional_parameters,
69+
datetime_str = str(
70+
self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval"
71+
config,
72+
**additional_parameters,
73+
)
74+
)
75+
76+
try:
77+
time = self._parser.parse(datetime_str, datetime_format)
78+
except ValueError:
79+
parsed_dt = ab_datetime_try_parse(datetime_str)
80+
if parsed_dt is not None:
81+
time = parsed_dt
82+
else:
83+
raise ValueError(
84+
f"Unable to parse datetime '{datetime_str}' with format '{datetime_format}' or robust parsing"
7385
)
74-
),
75-
datetime_format,
76-
) # type: ignore # datetime is always cast to an interpolated string
7786

7887
if self.min_datetime:
7988
min_time = str(self.min_datetime.eval(config, **additional_parameters)) # type: ignore # min_datetime is always cast to an interpolated string

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,7 @@ definitions:
943943
type: array
944944
items:
945945
type: string
946+
airbyte_hidden: true
946947
description: |
947948
The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it.
948949
If none of the specified formats match, the system will attempt to parse the value using robust datetime parsing that handles most ISO8601/RFC3339 compliant formats.
@@ -2805,6 +2806,7 @@ definitions:
28052806
- "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%SZ') }}"
28062807
datetime_format:
28072808
title: Datetime Format
2809+
airbyte_hidden: true
28082810
description: |
28092811
Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:
28102812
* **%s**: Epoch unix timestamp - `1686218963`

unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,30 @@ def test_parse_date_legacy_merge_datetime_format_in_cursor_datetime_format(
997997
["%Y-%m-%dT%H:%M:%S.%f%z", "%s"],
998998
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
999999
),
1000+
(
1001+
"test_robust_fallback_z_suffix",
1002+
"2021-01-01T00:00:00Z",
1003+
["%Y-%m-%d"],
1004+
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
1005+
),
1006+
(
1007+
"test_robust_fallback_iso_with_colon_tz",
1008+
"2021-01-01T00:00:00+00:00",
1009+
["%Y-%m-%d"],
1010+
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
1011+
),
1012+
(
1013+
"test_robust_fallback_date_only",
1014+
"2021-01-01",
1015+
["%s"],
1016+
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
1017+
),
1018+
(
1019+
"test_robust_fallback_unix_timestamp_string",
1020+
"1609459200",
1021+
["%Y-%m-%d"],
1022+
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
1023+
),
10001024
],
10011025
)
10021026
def test_parse_date(test_name, input_date, date_formats, expected_output_date):
@@ -1024,6 +1048,25 @@ def test_given_unknown_format_when_parse_date_then_raise_error():
10241048
slicer.parse_date("not-a-valid-datetime-string")
10251049

10261050

1051+
def test_minmax_datetime_robust_fallback():
1052+
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
1053+
1054+
test_cases = [
1055+
("2021-01-01T00:00:00Z", "%Y-%m-%d"),
1056+
("2021-01-01T00:00:00+00:00", "%Y-%m-%d"),
1057+
("1609459200", "%Y-%m-%d"),
1058+
]
1059+
1060+
for input_date, incompatible_format in test_cases:
1061+
min_max_dt = MinMaxDatetime(
1062+
datetime=input_date, datetime_format=incompatible_format, parameters={}
1063+
)
1064+
result = min_max_dt.get_datetime({})
1065+
assert result.year == 2021
1066+
assert result.month == 1
1067+
assert result.day == 1
1068+
1069+
10271070
@pytest.mark.parametrize(
10281071
"test_name, input_dt, datetimeformat, datetimeformat_granularity, expected_output",
10291072
[

0 commit comments

Comments
 (0)