|
84 | 84 | from datetime import datetime, timedelta, timezone |
85 | 85 | from typing import Any, Optional, Union, overload |
86 | 86 |
|
87 | | -from dateutil import parser |
| 87 | +from dateutil import parser as dateutil_parser |
88 | 88 | from typing_extensions import Never |
89 | 89 | from whenever import Instant, LocalDateTime, OffsetDateTime, ZonedDateTime |
90 | 90 |
|
@@ -358,7 +358,12 @@ def ab_datetime_now() -> AirbyteDateTime: |
358 | 358 | return AirbyteDateTime.from_datetime(datetime.now(timezone.utc)) |
359 | 359 |
|
360 | 360 |
|
361 | | -def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime: |
| 361 | +def ab_datetime_parse( |
| 362 | + dt_str: str | int, |
| 363 | + formats: list[str] | None = None, |
| 364 | + *, |
| 365 | + disallow_other_formats: bool = False, |
| 366 | +) -> AirbyteDateTime: |
362 | 367 | """Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness. |
363 | 368 |
|
364 | 369 | This implementation is as flexible as possible to handle various datetime formats. |
@@ -389,89 +394,64 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime: |
389 | 394 | >>> ab_datetime_parse("2023-03-14") # Date-only |
390 | 395 | '2023-03-14T00:00:00+00:00' |
391 | 396 | """ |
392 | | - try: |
393 | | - # Handle numeric values as Unix timestamps (UTC) |
394 | | - if isinstance(dt_str, int) or ( |
395 | | - isinstance(dt_str, str) |
396 | | - and (dt_str.isdigit() or (dt_str.startswith("-") and dt_str[1:].isdigit())) |
397 | | - ): |
398 | | - timestamp = int(dt_str) |
399 | | - if timestamp < 0: |
400 | | - raise ValueError("Timestamp cannot be negative") |
401 | | - if len(str(abs(timestamp))) > 10: |
402 | | - raise ValueError("Timestamp value too large") |
403 | | - instant = Instant.from_timestamp(timestamp) |
404 | | - return AirbyteDateTime.from_datetime(instant.py_datetime()) |
405 | | - |
406 | | - if not isinstance(dt_str, str): |
407 | | - raise ValueError( |
408 | | - f"Could not parse datetime string: expected string or integer, got {type(dt_str)}" |
409 | | - ) |
410 | | - |
411 | | - # Handle date-only format first |
412 | | - if ":" not in dt_str and dt_str.count("-") == 2 and "/" not in dt_str: |
| 397 | + # Handle numeric values as Unix timestamps (UTC) |
| 398 | + if isinstance(dt_str, int): |
| 399 | + if timestamp < 0: |
| 400 | + raise ValueError("Timestamp cannot be negative") |
| 401 | + if len(str(abs(timestamp))) > 10: |
| 402 | + raise ValueError("Timestamp value too large") |
| 403 | + instant = Instant.from_timestamp(timestamp) |
| 404 | + return AirbyteDateTime.from_datetime(instant.py_datetime()) |
| 405 | + |
| 406 | + if formats: |
| 407 | + ex_list: list[Exception] = [] |
| 408 | + for format in formats: |
413 | 409 | try: |
414 | | - year, month, day = map(int, dt_str.split("-")) |
415 | | - if not (1 <= month <= 12 and 1 <= day <= 31): |
416 | | - raise ValueError(f"Invalid date format: {dt_str}") |
417 | | - instant = Instant.from_utc(year, month, day, 0, 0, 0) |
418 | | - return AirbyteDateTime.from_datetime(instant.py_datetime()) |
419 | | - except (ValueError, TypeError): |
420 | | - raise ValueError(f"Invalid date format: {dt_str}") |
421 | | - |
422 | | - # Reject time-only strings without date |
423 | | - if ":" in dt_str and dt_str.count("-") < 2 and dt_str.count("/") < 2: |
424 | | - raise ValueError(f"Missing date part in datetime string: {dt_str}") |
425 | | - |
426 | | - # Try parsing standard ISO/RFC formats with whenever |
427 | | - # Only attempt whenever parsing for specific ISO/RFC formats |
428 | | - if ( |
429 | | - isinstance(dt_str, str) |
430 | | - and "/" not in dt_str # Exclude non-standard date separators |
431 | | - and ( |
432 | | - # ISO format with T delimiter and Z timezone or +00:00 timezone |
433 | | - (("T" in dt_str) and ("Z" in dt_str or "+" in dt_str or "-" in dt_str)) |
434 | | - # ISO format with space delimiter and Z timezone |
435 | | - or (" " in dt_str and "Z" in dt_str) |
| 410 | + result = OffsetDateTime.strptime(dt_str, format) |
| 411 | + except Exception as ex: |
| 412 | + ex_list.append(ex) |
| 413 | + else: |
| 414 | + # No exception |
| 415 | + return result |
| 416 | + |
| 417 | + if disallow_other_formats: |
| 418 | + raise ValueError( |
| 419 | + f"Could not parse datetime string. {str(ex_list)}" |
436 | 420 | ) |
437 | | - ): |
438 | | - # First try Instant.parse_common_iso for UTC formats |
439 | | - try: |
440 | | - instant = Instant.parse_common_iso(dt_str) |
441 | | - return AirbyteDateTime.from_datetime(instant.py_datetime()) |
442 | | - except Exception: |
443 | | - pass |
444 | 421 |
|
445 | | - # Then try Instant.parse_rfc3339 which is more flexible |
446 | | - try: |
447 | | - instant = Instant.parse_rfc3339(dt_str) |
448 | | - return AirbyteDateTime.from_datetime(instant.py_datetime()) |
449 | | - except Exception: |
450 | | - pass |
| 422 | + if not isinstance(dt_str, str): |
| 423 | + raise ValueError( |
| 424 | + f"Could not parse datetime string: expected string or integer, got {type(dt_str)}" |
| 425 | + ) |
451 | 426 |
|
452 | | - # Try OffsetDateTime for non-UTC timezones |
453 | | - try: |
454 | | - offset_dt = OffsetDateTime.parse_common_iso(dt_str) |
455 | | - return AirbyteDateTime.from_datetime(offset_dt.py_datetime()) |
456 | | - except Exception: |
457 | | - pass |
| 427 | + # Else, value is a string |
458 | 428 |
|
459 | | - # Fall back to dateutil for other formats |
| 429 | + # Try parsing standard ISO/RFC formats with whenever |
| 430 | + try: |
| 431 | + instant = Instant.parse_common_iso(dt_str) |
| 432 | + return AirbyteDateTime.from_datetime(instant.py_datetime()) |
| 433 | + except Exception: |
| 434 | + pass |
| 435 | + |
| 436 | + # Handle int-like strings |
| 437 | + if ( |
| 438 | + isinstance(dt_str, str) and ( |
| 439 | + dt_str.isdigit() or (dt_str.startswith("-") and dt_str[1:].isdigit()) |
| 440 | + ) |
| 441 | + ): |
460 | 442 | try: |
461 | | - parsed = parser.parse(dt_str) |
462 | | - if parsed.tzinfo is None: |
463 | | - parsed = parsed.replace(tzinfo=timezone.utc) |
464 | | - |
465 | | - return AirbyteDateTime.from_datetime(parsed) |
466 | | - except (ValueError, TypeError): |
467 | | - raise ValueError(f"Could not parse datetime string: {dt_str}") |
468 | | - except ValueError as e: |
469 | | - if "Invalid date format:" in str(e): |
470 | | - raise |
471 | | - if "Timestamp cannot be negative" in str(e): |
472 | | - raise |
473 | | - if "Timestamp value too large" in str(e): |
474 | | - raise |
| 443 | + return ab_datetime_format(int(dt_str)) |
| 444 | + except Exception: |
| 445 | + pass |
| 446 | + |
| 447 | + # Fall back to dateutil for other formats |
| 448 | + try: |
| 449 | + parsed = dateutil_parser.parse(dt_str) |
| 450 | + if parsed.tzinfo is None: |
| 451 | + parsed = parsed.replace(tzinfo=timezone.utc) |
| 452 | + |
| 453 | + return AirbyteDateTime.from_datetime(parsed) |
| 454 | + except (ValueError, TypeError): |
475 | 455 | raise ValueError(f"Could not parse datetime string: {dt_str}") |
476 | 456 |
|
477 | 457 |
|
|
0 commit comments