Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 45 additions & 3 deletions airbyte_cdk/utils/datetime_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,9 @@ def ab_datetime_now() -> AirbyteDateTime:
return AirbyteDateTime.from_datetime(datetime.now(timezone.utc))


def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
def ab_datetime_parse(
dt_str: str | int, formats: list[str] | None = None, disallow_other_formats: bool = False
) -> AirbyteDateTime:
"""Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness.

This implementation is as flexible as possible to handle various datetime formats.
Expand All @@ -374,6 +376,10 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
Args:
dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
or other recognizable datetime format.
formats: Optional list of format strings to try before falling back to more
forgiving parsing logic. If provided, each format will be tried in order.
disallow_other_formats: If True, only try the provided formats and raise an error
if none match. If False (default), fall back to more forgiving parsing logic.

Returns:
AirbyteDateTime: A timezone-aware datetime object.
Expand All @@ -388,6 +394,10 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
'2023-03-14T15:00:00+00:00'
>>> ab_datetime_parse("2023-03-14") # Date-only
'2023-03-14T00:00:00+00:00'
>>> ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d", "%Y/%m/%d"]) # With specific formats
'2023-03-14T00:00:00+00:00'
>>> ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d"], disallow_other_formats=True) # Only try specific formats
'2023-03-14T00:00:00+00:00'
"""
try:
# Handle numeric values as Unix timestamps (UTC)
Expand All @@ -408,6 +418,21 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
f"Could not parse datetime string: expected string or integer, got {type(dt_str)}"
)

# Try specific formats first if provided
if formats:
for fmt in formats:
try:
parsed = datetime.strptime(dt_str, fmt)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return AirbyteDateTime.from_datetime(parsed)
except ValueError:
continue

# If disallow_other_formats is True and none of the formats matched, raise an error
if disallow_other_formats:
raise ValueError(f"No format in {formats} matching {dt_str}")

# Handle date-only format first
if ":" not in dt_str and dt_str.count("-") == 2 and "/" not in dt_str:
try:
Expand Down Expand Up @@ -439,24 +464,41 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
raise
if "Timestamp value too large" in str(e):
raise
if "No format in" in str(e):
raise
raise ValueError(f"Could not parse datetime string: {dt_str}")


def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
def ab_datetime_try_parse(
dt_str: str, formats: list[str] | None = None, disallow_other_formats: bool = False
) -> AirbyteDateTime | None:
"""Try to parse the input as a datetime, failing gracefully instead of raising an exception.

This is a thin wrapper around `ab_datetime_parse()` that catches parsing errors and
returns `None` instead of raising an exception.
The implementation is as flexible as possible to handle various datetime formats.
Always returns a timezone-aware datetime (defaults to `UTC` if no timezone specified).

Args:
dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
or other recognizable datetime format.
formats: Optional list of format strings to try before falling back to more
forgiving parsing logic. If provided, each format will be tried in order.
disallow_other_formats: If True, only try the provided formats and return None
if none match. If False (default), fall back to more forgiving parsing logic.

Returns:
AirbyteDateTime | None: A timezone-aware datetime object, or None if parsing fails.

Example:
>>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
>>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing 'T' delimiter still parsable
>>> ab_datetime_try_parse("2023-03-14") # Returns midnight UTC time
>>> ab_datetime_try_parse("2023-03-14", formats=["%Y-%m-%d", "%Y/%m/%d"]) # With specific formats
>>> ab_datetime_try_parse("2023-03-14", formats=["%Y-%m-%d"], disallow_other_formats=True) # Only try specific formats
"""
try:
return ab_datetime_parse(dt_str)
return ab_datetime_parse(dt_str, formats, disallow_other_formats)
except (ValueError, TypeError):
return None

Expand Down
48 changes: 48 additions & 0 deletions unit_tests/utils/test_datetime_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,51 @@ def test_epoch_millis():
# Test roundtrip conversion
dt3 = AirbyteDateTime.from_epoch_millis(dt.to_epoch_millis())
assert dt3 == dt


def test_parse_with_formats():
"""Test parsing with specific formats."""
# Test with a single format that matches
dt = ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d"])
assert str(dt) == "2023-03-14T00:00:00+00:00"

# Test with multiple formats where the first one matches
dt = ab_datetime_parse("2023-03-14", formats=["%Y-%m-%d", "%Y/%m/%d"])
assert str(dt) == "2023-03-14T00:00:00+00:00"

# Test with multiple formats where the second one matches
dt = ab_datetime_parse("2023/03/14", formats=["%Y-%m-%d", "%Y/%m/%d"])
assert str(dt) == "2023-03-14T00:00:00+00:00"

# Test with disallow_other_formats=False (default) where no format matches but fallback parsing works
dt = ab_datetime_parse("2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"])
assert str(dt) == "2023-03-14T15:09:26+00:00"

# Test with disallow_other_formats=True where no format matches
with pytest.raises(ValueError, match="No format in"):
ab_datetime_parse(
"2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"], disallow_other_formats=True
)


def test_try_parse_with_formats():
"""Test try_parse with formats and disallow_other_formats parameter."""
# Test try_parse with formats
dt = ab_datetime_try_parse("2023-03-14", formats=["%Y-%m-%d"])
assert str(dt) == "2023-03-14T00:00:00+00:00"

# Test try_parse with multiple formats where the second one matches
dt = ab_datetime_try_parse("2023/03/14", formats=["%Y-%m-%d", "%Y/%m/%d"])
assert str(dt) == "2023-03-14T00:00:00+00:00"

# Test try_parse with disallow_other_formats=False (default) where no format matches but fallback parsing works
dt = ab_datetime_try_parse("2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"])
assert str(dt) == "2023-03-14T15:09:26+00:00"

# Test try_parse with disallow_other_formats=True where no format matches
assert (
ab_datetime_try_parse(
"2023-03-14T15:09:26Z", formats=["%Y-%m-%d", "%Y/%m/%d"], disallow_other_formats=True
)
is None
)
Loading