diff --git a/airbyte_cdk/sources/declarative/datetime/__init__.py b/airbyte_cdk/sources/declarative/datetime/__init__.py index bf1f13e1e..c941b3045 100644 --- a/airbyte_cdk/sources/declarative/datetime/__init__.py +++ b/airbyte_cdk/sources/declarative/datetime/__init__.py @@ -1,7 +1,3 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - -from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime - -__all__ = ["MinMaxDatetime"] diff --git a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 130406fcc..2707ffe11 100644 --- a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -29,6 +29,8 @@ def parse(self, date: Union[str, int], format: str) -> datetime.datetime: return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc) elif format == "%s_as_float": return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc) + elif format == "%epoch_microseconds": + return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date)) elif format == "%ms": return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date)) elif "%_ms" in format: @@ -46,6 +48,8 @@ def format(self, dt: datetime.datetime, format: str) -> str: return str(int(dt.timestamp())) if format == "%s_as_float": return str(float(dt.timestamp())) + if format == "%epoch_microseconds": + return str(int(dt.timestamp() * 1_000_000)) if format == "%ms": # timstamp() returns a float representing the number of seconds since the unix epoch return str(int(dt.timestamp() * 1000)) diff --git a/airbyte_cdk/sources/declarative/interpolation/macros.py b/airbyte_cdk/sources/declarative/interpolation/macros.py index 02982584a..382958b86 100644 --- a/airbyte_cdk/sources/declarative/interpolation/macros.py +++ b/airbyte_cdk/sources/declarative/interpolation/macros.py @@ -12,6 +12,8 @@ from dateutil import parser from isodate import parse_duration +from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser + """ This file contains macros that can be evaluated by a `JinjaInterpolation` object """ @@ -171,11 +173,7 @@ def format_datetime( dt_datetime = ( datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt) ) - if format == "%s": - return str(int(dt_datetime.timestamp())) - elif format == "%ms": - return str(int(dt_datetime.timestamp() * 1_000_000)) - return dt_datetime.strftime(format) + return DatetimeParser().format(dt=dt_datetime, format=format) _macros_list = [ diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 39058f834..1c2289c17 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -56,7 +56,7 @@ ) from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel -from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime +from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.decoders import ( Decoder, diff --git a/unit_tests/sources/declarative/datetime/test_datetime_parser.py b/unit_tests/sources/declarative/datetime/test_datetime_parser.py index 640abd6c2..b9da9852f 100644 --- a/unit_tests/sources/declarative/datetime/test_datetime_parser.py +++ b/unit_tests/sources/declarative/datetime/test_datetime_parser.py @@ -10,16 +10,14 @@ @pytest.mark.parametrize( - "test_name, input_date, date_format, expected_output_date", + "input_date, date_format, expected_output_date", [ ( - "test_parse_date_iso", "2021-01-01T00:00:00.000000+0000", "%Y-%m-%dT%H:%M:%S.%f%z", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), ( - "test_parse_date_iso_with_timezone_not_utc", "2021-01-01T00:00:00.000000+0400", "%Y-%m-%dT%H:%M:%S.%f%z", datetime.datetime( @@ -27,85 +25,103 @@ ), ), ( - "test_parse_timestamp", "1609459200", "%s", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), ( - "test_parse_timestamp_as_float", "1675092508.873709", "%s_as_float", datetime.datetime(2023, 1, 30, 15, 28, 28, 873709, tzinfo=datetime.timezone.utc), ), ( - "test_parse_ms_timestamp", + "1675092508873709", + "%epoch_microseconds", + datetime.datetime(2023, 1, 30, 15, 28, 28, 873709, tzinfo=datetime.timezone.utc), + ), + ( "1609459200001", "%ms", datetime.datetime(2021, 1, 1, 0, 0, 0, 1000, tzinfo=datetime.timezone.utc), ), ( - "test_parse_date_ms", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), ( - "test_parse_format_datetime_with__ms", "2021-11-22T08:41:55.640Z", "%Y-%m-%dT%H:%M:%S.%_msZ", datetime.datetime(2021, 11, 22, 8, 41, 55, 640000, tzinfo=datetime.timezone.utc), ), ], + ids=[ + "test_parse_date_iso", + "test_parse_date_iso_with_timezone_not_utc", + "test_parse_timestamp", + "test_parse_timestamp_as_float", + "test_parse_timestamp_microseconds", + "test_parse_ms_timestamp", + "test_parse_date_ms", + "test_parse_format_datetime_with__ms", + ], ) -def test_parse_date(test_name, input_date, date_format, expected_output_date): +def test_parse_date(input_date: str, date_format: str, expected_output_date: datetime.datetime): parser = DatetimeParser() output_date = parser.parse(input_date, date_format) assert output_date == expected_output_date @pytest.mark.parametrize( - "test_name, input_dt, datetimeformat, expected_output", + "input_dt, datetimeformat, expected_output", [ ( - "test_format_timestamp", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%s", "1609459200", ), ( - "test_format_timestamp_ms", datetime.datetime(2021, 1, 1, 0, 0, 0, 1000, tzinfo=datetime.timezone.utc), "%ms", "1609459200001", ), ( - "test_format_timestamp_as_float", datetime.datetime(2023, 1, 30, 15, 28, 28, 873709, tzinfo=datetime.timezone.utc), "%s_as_float", "1675092508.873709", ), ( - "test_format_string", + datetime.datetime(2023, 1, 30, 15, 28, 28, 873709, tzinfo=datetime.timezone.utc), + "%epoch_microseconds", + "1675092508873709", + ), + ( datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y-%m-%d", "2021-01-01", ), ( - "test_format_to_number", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y%m%d", "20210101", ), ( - "test_parse_format_datetime_with__ms", datetime.datetime(2021, 11, 22, 8, 41, 55, 640000, tzinfo=datetime.timezone.utc), "%Y-%m-%dT%H:%M:%S.%_msZ", "2021-11-22T08:41:55.640Z", ), ], + ids=[ + "test_format_timestamp", + "test_format_timestamp_ms", + "test_format_timestamp_as_float", + "test_format_timestamp_microseconds", + "test_format_string", + "test_format_to_number", + "test_parse_format_datetime_with__ms", + ], ) -def test_format_datetime(test_name, input_dt, datetimeformat, expected_output): +def test_format_datetime(input_dt: datetime.datetime, datetimeformat: str, expected_output: str): parser = DatetimeParser() output_date = parser.format(input_dt, datetimeformat) assert output_date == expected_output diff --git a/unit_tests/sources/declarative/extractors/test_record_filter.py b/unit_tests/sources/declarative/extractors/test_record_filter.py index 24956078f..03274e732 100644 --- a/unit_tests/sources/declarative/extractors/test_record_filter.py +++ b/unit_tests/sources/declarative/extractors/test_record_filter.py @@ -5,7 +5,7 @@ import pytest -from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime +from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime from airbyte_cdk.sources.declarative.extractors.record_filter import ( ClientSideIncrementalRecordFilterDecorator, RecordFilter, diff --git a/unit_tests/sources/declarative/interpolation/test_macros.py b/unit_tests/sources/declarative/interpolation/test_macros.py index 8543cd507..5fbea2601 100644 --- a/unit_tests/sources/declarative/interpolation/test_macros.py +++ b/unit_tests/sources/declarative/interpolation/test_macros.py @@ -79,7 +79,7 @@ def test_macros_export(test_name, fn_name, found_in_macros): ), ( "2022-01-01T01:01:01Z", - "%ms", + "%epoch_microseconds", "%Y-%m-%dT%H:%M:%SZ", "1640998861000000", ), diff --git a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py index be9177638..b1a9cad2c 100644 --- a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +++ b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py @@ -32,7 +32,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import SessionTokenProvider from airbyte_cdk.sources.declarative.checks import CheckStream from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel -from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime +from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.decoders import JsonDecoder, PaginationDecoderDecorator from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordFilter, RecordSelector