Skip to content

Commit 50bb81f

Browse files
authored
update datetime parse logic
1 parent bd60307 commit 50bb81f

File tree

1 file changed

+59
-79
lines changed

1 file changed

+59
-79
lines changed

airbyte_cdk/utils/datetime_helpers.py

Lines changed: 59 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484
from datetime import datetime, timedelta, timezone
8585
from typing import Any, Optional, Union, overload
8686

87-
from dateutil import parser
87+
from dateutil import parser as dateutil_parser
8888
from typing_extensions import Never
8989
from whenever import Instant, LocalDateTime, OffsetDateTime, ZonedDateTime
9090

@@ -358,7 +358,12 @@ def ab_datetime_now() -> AirbyteDateTime:
358358
return AirbyteDateTime.from_datetime(datetime.now(timezone.utc))
359359

360360

361-
def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
361+
def ab_datetime_parse(
362+
dt_str: str | int,
363+
formats: list[str] | None = None,
364+
*,
365+
disallow_other_formats: bool = False,
366+
) -> AirbyteDateTime:
362367
"""Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness.
363368
364369
This implementation is as flexible as possible to handle various datetime formats.
@@ -389,89 +394,64 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
389394
>>> ab_datetime_parse("2023-03-14") # Date-only
390395
'2023-03-14T00:00:00+00:00'
391396
"""
392-
try:
393-
# Handle numeric values as Unix timestamps (UTC)
394-
if isinstance(dt_str, int) or (
395-
isinstance(dt_str, str)
396-
and (dt_str.isdigit() or (dt_str.startswith("-") and dt_str[1:].isdigit()))
397-
):
398-
timestamp = int(dt_str)
399-
if timestamp < 0:
400-
raise ValueError("Timestamp cannot be negative")
401-
if len(str(abs(timestamp))) > 10:
402-
raise ValueError("Timestamp value too large")
403-
instant = Instant.from_timestamp(timestamp)
404-
return AirbyteDateTime.from_datetime(instant.py_datetime())
405-
406-
if not isinstance(dt_str, str):
407-
raise ValueError(
408-
f"Could not parse datetime string: expected string or integer, got {type(dt_str)}"
409-
)
410-
411-
# Handle date-only format first
412-
if ":" not in dt_str and dt_str.count("-") == 2 and "/" not in dt_str:
397+
# Handle numeric values as Unix timestamps (UTC)
398+
if isinstance(dt_str, int):
399+
if timestamp < 0:
400+
raise ValueError("Timestamp cannot be negative")
401+
if len(str(abs(timestamp))) > 10:
402+
raise ValueError("Timestamp value too large")
403+
instant = Instant.from_timestamp(timestamp)
404+
return AirbyteDateTime.from_datetime(instant.py_datetime())
405+
406+
if formats:
407+
ex_list: list[Exception] = []
408+
for format in formats:
413409
try:
414-
year, month, day = map(int, dt_str.split("-"))
415-
if not (1 <= month <= 12 and 1 <= day <= 31):
416-
raise ValueError(f"Invalid date format: {dt_str}")
417-
instant = Instant.from_utc(year, month, day, 0, 0, 0)
418-
return AirbyteDateTime.from_datetime(instant.py_datetime())
419-
except (ValueError, TypeError):
420-
raise ValueError(f"Invalid date format: {dt_str}")
421-
422-
# Reject time-only strings without date
423-
if ":" in dt_str and dt_str.count("-") < 2 and dt_str.count("/") < 2:
424-
raise ValueError(f"Missing date part in datetime string: {dt_str}")
425-
426-
# Try parsing standard ISO/RFC formats with whenever
427-
# Only attempt whenever parsing for specific ISO/RFC formats
428-
if (
429-
isinstance(dt_str, str)
430-
and "/" not in dt_str # Exclude non-standard date separators
431-
and (
432-
# ISO format with T delimiter and Z timezone or +00:00 timezone
433-
(("T" in dt_str) and ("Z" in dt_str or "+" in dt_str or "-" in dt_str))
434-
# ISO format with space delimiter and Z timezone
435-
or (" " in dt_str and "Z" in dt_str)
410+
result = OffsetDateTime.strptime(dt_str, format)
411+
except Exception as ex:
412+
ex_list.append(ex)
413+
else:
414+
# No exception
415+
return result
416+
417+
if disallow_other_formats:
418+
raise ValueError(
419+
f"Could not parse datetime string. {str(ex_list)}"
436420
)
437-
):
438-
# First try Instant.parse_common_iso for UTC formats
439-
try:
440-
instant = Instant.parse_common_iso(dt_str)
441-
return AirbyteDateTime.from_datetime(instant.py_datetime())
442-
except Exception:
443-
pass
444421

445-
# Then try Instant.parse_rfc3339 which is more flexible
446-
try:
447-
instant = Instant.parse_rfc3339(dt_str)
448-
return AirbyteDateTime.from_datetime(instant.py_datetime())
449-
except Exception:
450-
pass
422+
if not isinstance(dt_str, str):
423+
raise ValueError(
424+
f"Could not parse datetime string: expected string or integer, got {type(dt_str)}"
425+
)
451426

452-
# Try OffsetDateTime for non-UTC timezones
453-
try:
454-
offset_dt = OffsetDateTime.parse_common_iso(dt_str)
455-
return AirbyteDateTime.from_datetime(offset_dt.py_datetime())
456-
except Exception:
457-
pass
427+
# Else, value is a string
458428

459-
# Fall back to dateutil for other formats
429+
# Try parsing standard ISO/RFC formats with whenever
430+
try:
431+
instant = Instant.parse_common_iso(dt_str)
432+
return AirbyteDateTime.from_datetime(instant.py_datetime())
433+
except Exception:
434+
pass
435+
436+
# Handle int-like strings
437+
if (
438+
isinstance(dt_str, str) and (
439+
dt_str.isdigit() or (dt_str.startswith("-") and dt_str[1:].isdigit())
440+
)
441+
):
460442
try:
461-
parsed = parser.parse(dt_str)
462-
if parsed.tzinfo is None:
463-
parsed = parsed.replace(tzinfo=timezone.utc)
464-
465-
return AirbyteDateTime.from_datetime(parsed)
466-
except (ValueError, TypeError):
467-
raise ValueError(f"Could not parse datetime string: {dt_str}")
468-
except ValueError as e:
469-
if "Invalid date format:" in str(e):
470-
raise
471-
if "Timestamp cannot be negative" in str(e):
472-
raise
473-
if "Timestamp value too large" in str(e):
474-
raise
443+
return ab_datetime_format(int(dt_str))
444+
except Exception:
445+
pass
446+
447+
# Fall back to dateutil for other formats
448+
try:
449+
parsed = dateutil_parser.parse(dt_str)
450+
if parsed.tzinfo is None:
451+
parsed = parsed.replace(tzinfo=timezone.utc)
452+
453+
return AirbyteDateTime.from_datetime(parsed)
454+
except (ValueError, TypeError):
475455
raise ValueError(f"Could not parse datetime string: {dt_str}")
476456

477457

0 commit comments

Comments
 (0)