Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions airbyte_cdk/utils/datetime_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@

from dateutil import parser
from typing_extensions import Never
from whenever import Instant, LocalDateTime, ZonedDateTime
from whenever import Instant, LocalDateTime, OffsetDateTime, ZonedDateTime


class AirbyteDateTime(datetime):
Expand Down Expand Up @@ -423,7 +423,40 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
if ":" in dt_str and dt_str.count("-") < 2 and dt_str.count("/") < 2:
raise ValueError(f"Missing date part in datetime string: {dt_str}")

# Try parsing with dateutil for timezone handling
# Try parsing standard ISO/RFC formats with whenever
# Only attempt whenever parsing for specific ISO/RFC formats
if (
isinstance(dt_str, str)
and "/" not in dt_str # Exclude non-standard date separators
and (
# ISO format with T delimiter and Z timezone or +00:00 timezone
(("T" in dt_str) and ("Z" in dt_str or "+" in dt_str or "-" in dt_str))
# ISO format with space delimiter and Z timezone
or (" " in dt_str and "Z" in dt_str)
)
):
# First try Instant.parse_common_iso for UTC formats
try:
instant = Instant.parse_common_iso(dt_str)
return AirbyteDateTime.from_datetime(instant.py_datetime())
except Exception:
pass

# Then try Instant.parse_rfc3339 which is more flexible
try:
instant = Instant.parse_rfc3339(dt_str)
return AirbyteDateTime.from_datetime(instant.py_datetime())
except Exception:
pass

# Try OffsetDateTime for non-UTC timezones
try:
offset_dt = OffsetDateTime.parse_common_iso(dt_str)
return AirbyteDateTime.from_datetime(offset_dt.py_datetime())
except Exception:
pass

# Fall back to dateutil for other formats
try:
parsed = parser.parse(dt_str)
if parsed.tzinfo is None:
Expand Down
196 changes: 196 additions & 0 deletions performance_test_datetime_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import statistics
import time
from datetime import datetime, timezone

from dateutil import parser
from whenever import Instant, OffsetDateTime

from airbyte_cdk.utils.datetime_helpers import ab_datetime_parse

# Test formats
test_formats = [
"2023-03-14T15:09:26Z", # ISO format with T delimiter and Z timezone
"2023-03-14T15:09:26+00:00", # ISO format with +00:00 timezone
"2023-03-14T15:09:26.123456Z", # ISO format with microseconds
"2023-03-14T15:09:26-04:00", # ISO format with non-UTC timezone
"2023-03-14 15:09:26Z", # Missing T delimiter
"2023-03-14 15:09:26", # Missing T delimiter and timezone
"2023-03-14", # Date-only format
"14/03/2023 15:09:26", # Different date format
"2023/03/14T15:09:26Z", # Non-standard date separator
]

# Number of iterations for each test
iterations = 1000

print("Performance Test: Datetime Parsing")
print("=" * 60)
print(f"Running {iterations} iterations for each format")
print("-" * 60)

results = {}

for dt_str in test_formats:
print(f"\nFormat: {dt_str}")

# Test whenever parsing
whenever_times = []
whenever_success = False

# Try Instant.parse_common_iso
try:
# Warmup
Instant.parse_common_iso(dt_str)

for _ in range(iterations):
start = time.perf_counter()
Instant.parse_common_iso(dt_str)
end = time.perf_counter()
whenever_times.append((end - start) * 1000)

whenever_success = True
whenever_method = "Instant.parse_common_iso"
except Exception:
pass

# If parse_common_iso failed, try parse_rfc3339
if not whenever_success:
try:
# Warmup
Instant.parse_rfc3339(dt_str)

whenever_times = []
for _ in range(iterations):
start = time.perf_counter()
Instant.parse_rfc3339(dt_str)
end = time.perf_counter()
whenever_times.append((end - start) * 1000)

whenever_success = True
whenever_method = "Instant.parse_rfc3339"
except Exception:
pass

# If both Instant methods failed, try OffsetDateTime.parse_common_iso
if not whenever_success:
try:
# Warmup
OffsetDateTime.parse_common_iso(dt_str)

whenever_times = []
for _ in range(iterations):
start = time.perf_counter()
OffsetDateTime.parse_common_iso(dt_str)
end = time.perf_counter()
whenever_times.append((end - start) * 1000)

whenever_success = True
whenever_method = "OffsetDateTime.parse_common_iso"
except Exception:
whenever_method = "None"

# Test dateutil parsing
dateutil_times = []
try:
# Warmup
parser.parse(dt_str)

for _ in range(iterations):
start = time.perf_counter()
parser.parse(dt_str)
end = time.perf_counter()
dateutil_times.append((end - start) * 1000)

dateutil_success = True
except Exception:
dateutil_success = False

# Test ab_datetime_parse
ab_times = []
try:
# Warmup
ab_datetime_parse(dt_str)

for _ in range(iterations):
start = time.perf_counter()
ab_datetime_parse(dt_str)
end = time.perf_counter()
ab_times.append((end - start) * 1000)

ab_success = True
except Exception:
ab_success = False

# Print results
if whenever_success:
whenever_avg = statistics.mean(whenever_times)
whenever_min = min(whenever_times)
whenever_max = max(whenever_times)
print(
f" {whenever_method}: avg={whenever_avg:.3f}ms, min={whenever_min:.3f}ms, max={whenever_max:.3f}ms"
)
else:
print(f" whenever: Failed to parse")

if dateutil_success:
dateutil_avg = statistics.mean(dateutil_times)
dateutil_min = min(dateutil_times)
dateutil_max = max(dateutil_times)
print(
f" dateutil.parser.parse: avg={dateutil_avg:.3f}ms, min={dateutil_min:.3f}ms, max={dateutil_max:.3f}ms"
)
else:
print(f" dateutil.parser.parse: Failed to parse")

if ab_success:
ab_avg = statistics.mean(ab_times)
ab_min = min(ab_times)
ab_max = max(ab_times)
print(f" ab_datetime_parse: avg={ab_avg:.3f}ms, min={ab_min:.3f}ms, max={ab_max:.3f}ms")
else:
print(f" ab_datetime_parse: Failed to parse")

# Compare performance if both succeeded
if whenever_success and dateutil_success:
speedup = dateutil_avg / whenever_avg
print(f" Performance: whenever is {speedup:.2f}x faster than dateutil")

# Store results for summary
results[dt_str] = {
"whenever": {
"success": whenever_success,
"method": whenever_method,
"avg": statistics.mean(whenever_times) if whenever_success else None,
},
"dateutil": {
"success": dateutil_success,
"avg": statistics.mean(dateutil_times) if dateutil_success else None,
},
"ab_datetime_parse": {
"success": ab_success,
"avg": statistics.mean(ab_times) if ab_success else None,
},
}

# Print summary
print("\n\nSummary")
print("=" * 60)
print("Format | whenever | dateutil | ab_datetime_parse | Speedup")
print("-" * 60)

for dt_str, result in results.items():
whenever_avg = result["whenever"]["avg"]
dateutil_avg = result["dateutil"]["avg"]
ab_avg = result["ab_datetime_parse"]["avg"]

whenever_str = f"{whenever_avg:.3f}ms" if whenever_avg else "N/A"
dateutil_str = f"{dateutil_avg:.3f}ms" if dateutil_avg else "N/A"
ab_str = f"{ab_avg:.3f}ms" if ab_avg else "N/A"

if whenever_avg and dateutil_avg:
speedup = dateutil_avg / whenever_avg
speedup_str = f"{speedup:.2f}x"
else:
speedup_str = "N/A"

print(f"{dt_str} | {whenever_str} | {dateutil_str} | {ab_str} | {speedup_str}")
110 changes: 110 additions & 0 deletions unit_tests/utils/test_datetime_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import freezegun
import pytest
from dateutil import parser
from whenever import Instant

from airbyte_cdk.utils.datetime_helpers import (
AirbyteDateTime,
Expand Down Expand Up @@ -262,3 +264,111 @@ def test_epoch_millis():
# Test roundtrip conversion
dt3 = AirbyteDateTime.from_epoch_millis(dt.to_epoch_millis())
assert dt3 == dt


@pytest.mark.parametrize(
"input_value,expected_parser",
[
# Formats that use the whenever parser
("2023-03-14", "whenever"), # Date-only format
(1678806566, "whenever"), # Unix timestamp
("2023-03-14T15:09:26Z", "whenever"), # ISO format with T delimiter
("2023-03-14T15:09:26+00:00", "whenever"), # ISO format with timezone
("2023-03-14T15:09:26.123456Z", "whenever"), # ISO format with microseconds
("2023-03-14T15:09:26-04:00", "whenever"), # ISO format with non-UTC timezone
("2023-03-14 15:09:26Z", "whenever"), # Missing T delimiter but with Z
# Formats that still use the dateutil parser
("2023-03-14 15:09:26", "dateutil"), # Missing T delimiter and timezone
("14/03/2023 15:09:26", "dateutil"), # Different date format
("2023/03/14T15:09:26Z", "dateutil"), # Non-standard date separator
],
)
def test_datetime_parser_selection(input_value, expected_parser, monkeypatch):
"""Test that the correct parser is used based on the input format."""
# Create tracking variables
whenever_called = False
dateutil_called = False

# Store original functions
original_instant = __import__("whenever").Instant
original_offset_dt = __import__("whenever").OffsetDateTime
original_parser_parse = parser.parse

# Create spies for whenever methods
def spy_from_timestamp(*args, **kwargs):
nonlocal whenever_called
whenever_called = True
return original_instant.from_timestamp(*args, **kwargs)

def spy_from_utc(*args, **kwargs):
nonlocal whenever_called
whenever_called = True
return original_instant.from_utc(*args, **kwargs)

def spy_parse_common_iso(*args, **kwargs):
nonlocal whenever_called
whenever_called = True
return original_instant.parse_common_iso(*args, **kwargs)

def spy_parse_rfc3339(*args, **kwargs):
nonlocal whenever_called
whenever_called = True
return original_instant.parse_rfc3339(*args, **kwargs)

def spy_offset_parse_common_iso(*args, **kwargs):
nonlocal whenever_called
whenever_called = True
return original_offset_dt.parse_common_iso(*args, **kwargs)

# Create a spy for parser.parse
def spy_parser_parse(*args, **kwargs):
nonlocal dateutil_called
dateutil_called = True
return original_parser_parse(*args, **kwargs)

# Create mock classes with our spy methods
class MockInstant:
@staticmethod
def from_timestamp(*args, **kwargs):
return spy_from_timestamp(*args, **kwargs)

@staticmethod
def from_utc(*args, **kwargs):
return spy_from_utc(*args, **kwargs)

@staticmethod
def parse_common_iso(*args, **kwargs):
return spy_parse_common_iso(*args, **kwargs)

@staticmethod
def parse_rfc3339(*args, **kwargs):
return spy_parse_rfc3339(*args, **kwargs)

@staticmethod
def py_datetime():
return original_instant.py_datetime()

class MockOffsetDateTime:
@staticmethod
def parse_common_iso(*args, **kwargs):
return spy_offset_parse_common_iso(*args, **kwargs)

@staticmethod
def py_datetime():
return original_offset_dt.py_datetime()

# Apply the mocks at the module level
monkeypatch.setattr("airbyte_cdk.utils.datetime_helpers.Instant", MockInstant)
monkeypatch.setattr("airbyte_cdk.utils.datetime_helpers.OffsetDateTime", MockOffsetDateTime)
monkeypatch.setattr("airbyte_cdk.utils.datetime_helpers.parser.parse", spy_parser_parse)

# Parse the datetime
ab_datetime_parse(input_value)

# Check which parser was used
if expected_parser == "whenever":
assert whenever_called, f"Expected whenever parser to be used for {input_value}"
assert not dateutil_called, f"Did not expect dateutil parser to be used for {input_value}"
else:
assert dateutil_called, f"Expected dateutil parser to be used for {input_value}"
assert not whenever_called, f"Did not expect whenever parser to be used for {input_value}"
Loading