Skip to content

Commit 260198f

Browse files
feat: add support for multiple formats in datetime helper functions
Co-Authored-By: Aaron <AJ> Steers <[email protected]>
1 parent fe2f9a5 commit 260198f

File tree

2 files changed

+82
-3
lines changed

2 files changed

+82
-3
lines changed

airbyte_cdk/utils/datetime_helpers.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def ab_datetime_now() -> AirbyteDateTime:
358358
return AirbyteDateTime.from_datetime(datetime.now(timezone.utc))
359359

360360

361-
def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
361+
def ab_datetime_parse(dt_str: str | int, formats: list[str] | None = None, disallow_other_formats: bool = False) -> AirbyteDateTime:
362362
"""Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness.
363363
364364
This implementation is as flexible as possible to handle various datetime formats.
@@ -374,6 +374,10 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
374374
Args:
375375
dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
376376
or other recognizable datetime format.
377+
formats: Optional list of format strings to try before falling back to more
378+
forgiving parsing logic. If provided, each format will be tried in order.
379+
disallow_other_formats: If True, only try the provided formats and raise an error
380+
if none match. If False (default), fall back to more forgiving parsing logic.
377381
378382
Returns:
379383
AirbyteDateTime: A timezone-aware datetime object.
@@ -388,6 +392,10 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
388392
'2023-03-14T15:00:00+00:00'
389393
>>> ab_datetime_parse("2023-03-14") # Date-only
390394
'2023-03-14T00:00:00+00:00'
395+
>>> ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d", "%Y/%m/%d"]) # With specific formats
396+
'2023-03-14T00:00:00+00:00'
397+
>>> ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d"], disallow_other_formats=True) # Only try specific formats
398+
'2023-03-14T00:00:00+00:00'
391399
"""
392400
try:
393401
# Handle numeric values as Unix timestamps (UTC)
@@ -408,6 +416,21 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
408416
f"Could not parse datetime string: expected string or integer, got {type(dt_str)}"
409417
)
410418

419+
# Try specific formats first if provided
420+
if formats:
421+
for fmt in formats:
422+
try:
423+
parsed = datetime.strptime(dt_str, fmt)
424+
if parsed.tzinfo is None:
425+
parsed = parsed.replace(tzinfo=timezone.utc)
426+
return AirbyteDateTime.from_datetime(parsed)
427+
except ValueError:
428+
continue
429+
430+
# If disallow_other_formats is True and none of the formats matched, raise an error
431+
if disallow_other_formats:
432+
raise ValueError(f"No format in {formats} matching {dt_str}")
433+
411434
# Handle date-only format first
412435
if ":" not in dt_str and dt_str.count("-") == 2 and "/" not in dt_str:
413436
try:
@@ -439,24 +462,39 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
439462
raise
440463
if "Timestamp value too large" in str(e):
441464
raise
465+
if "No format in" in str(e):
466+
raise
442467
raise ValueError(f"Could not parse datetime string: {dt_str}")
443468

444469

445-
def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
470+
def ab_datetime_try_parse(dt_str: str, formats: list[str] | None = None, disallow_other_formats: bool = False) -> AirbyteDateTime | None:
446471
"""Try to parse the input as a datetime, failing gracefully instead of raising an exception.
447472
448473
This is a thin wrapper around `ab_datetime_parse()` that catches parsing errors and
449474
returns `None` instead of raising an exception.
450475
The implementation is as flexible as possible to handle various datetime formats.
451476
Always returns a timezone-aware datetime (defaults to `UTC` if no timezone specified).
452477
478+
Args:
479+
dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
480+
or other recognizable datetime format.
481+
formats: Optional list of format strings to try before falling back to more
482+
forgiving parsing logic. If provided, each format will be tried in order.
483+
disallow_other_formats: If True, only try the provided formats and return None
484+
if none match. If False (default), fall back to more forgiving parsing logic.
485+
486+
Returns:
487+
AirbyteDateTime | None: A timezone-aware datetime object, or None if parsing fails.
488+
453489
Example:
454490
>>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
455491
>>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing 'T' delimiter still parsable
456492
>>> ab_datetime_try_parse("2023-03-14") # Returns midnight UTC time
493+
>>> ab_datetime_try_parse("2023-03-14", formats=["%Y-%m-%d", "%Y/%m/%d"]) # With specific formats
494+
>>> ab_datetime_try_parse("2023-03-14", formats=["%Y-%m-%d"], disallow_other_formats=True) # Only try specific formats
457495
"""
458496
try:
459-
return ab_datetime_parse(dt_str)
497+
return ab_datetime_parse(dt_str, formats, disallow_other_formats)
460498
except (ValueError, TypeError):
461499
return None
462500

unit_tests/utils/test_datetime_helpers.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,44 @@ def test_epoch_millis():
262262
# Test roundtrip conversion
263263
dt3 = AirbyteDateTime.from_epoch_millis(dt.to_epoch_millis())
264264
assert dt3 == dt
265+
266+
267+
def test_parse_with_formats():
268+
"""Test parsing with specific formats."""
269+
# Test with a single format that matches
270+
dt = ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d"])
271+
assert str(dt) == "2023-03-14T00:00:00+00:00"
272+
273+
# Test with multiple formats where the first one matches
274+
dt = ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d", "%Y/%m/%d"])
275+
assert str(dt) == "2023-03-14T00:00:00+00:00"
276+
277+
# Test with multiple formats where the second one matches
278+
dt = ab_datetime_parse("2023/03/14", formats=["%Y-%m-%d", "%Y/%m/%d"])
279+
assert str(dt) == "2023-03-14T00:00:00+00:00"
280+
281+
# Test with disallow_other_formats=False (default) where no format matches but fallback parsing works
282+
dt = ab_datetime_parse("2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"])
283+
assert str(dt) == "2023-03-14T15:09:26+00:00"
284+
285+
# Test with disallow_other_formats=True where no format matches
286+
with pytest.raises(ValueError, match="No format in"):
287+
ab_datetime_parse("2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"], disallow_other_formats=True)
288+
289+
290+
def test_try_parse_with_formats():
291+
"""Test try_parse with formats and disallow_other_formats parameter."""
292+
# Test try_parse with formats
293+
dt = ab_datetime_try_parse("2023-03-14", formats=["%Y-%m-%d"])
294+
assert str(dt) == "2023-03-14T00:00:00+00:00"
295+
296+
# Test try_parse with multiple formats where the second one matches
297+
dt = ab_datetime_try_parse("2023/03/14", formats=["%Y-%m-%d", "%Y/%m/%d"])
298+
assert str(dt) == "2023-03-14T00:00:00+00:00"
299+
300+
# Test try_parse with disallow_other_formats=False (default) where no format matches but fallback parsing works
301+
dt = ab_datetime_try_parse("2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"])
302+
assert str(dt) == "2023-03-14T15:09:26+00:00"
303+
304+
# Test try_parse with disallow_other_formats=True where no format matches
305+
assert ab_datetime_try_parse("2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"], disallow_other_formats=True) is None

0 commit comments

Comments
 (0)