11#
22# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33#
4+ from datetime import datetime , timedelta , timezone
45from typing import List , Mapping , Optional
6+ from unittest .mock import Mock
57
68import pytest
79
1113 RecordFilter ,
1214)
1315from airbyte_cdk .sources .declarative .incremental import (
14- CursorFactory ,
15- DatetimeBasedCursor ,
16- GlobalSubstreamCursor ,
17- PerPartitionWithGlobalCursor ,
16+ ConcurrentPerPartitionCursor ,
17+ ConcurrentCursorFactory ,
1818)
19+ from airbyte_cdk .sources .streams .concurrent .cursor import ConcurrentCursor , CursorField
1920from airbyte_cdk .sources .declarative .interpolation import InterpolatedString
2021from airbyte_cdk .sources .declarative .models import (
2122 CustomRetriever ,
2425)
2526from airbyte_cdk .sources .declarative .partition_routers import SubstreamPartitionRouter
2627from airbyte_cdk .sources .declarative .types import StreamSlice
28+ from airbyte_cdk .sources .streams .concurrent .state_converters .datetime_stream_state_converter import (
29+ CustomFormatConcurrentStreamStateConverter ,
30+ )
2731from airbyte_cdk .sources .types import Record
32+ from airbyte_cdk .utils .datetime_helpers import ab_datetime_parse , ab_datetime_now
2833
2934DATE_FORMAT = "%Y-%m-%d"
3035RECORDS_TO_FILTER_DATE_FORMAT = [
@@ -272,25 +277,27 @@ def test_client_side_record_filter_decorator_no_parent_stream(
272277 records_to_filter : List [Mapping ],
273278 expected_record_ids : List [int ],
274279):
275- date_time_based_cursor = DatetimeBasedCursor (
276- start_datetime = MinMaxDatetime (
277- datetime = "2021-01-01" , datetime_format = DATE_FORMAT , parameters = {}
280+ datetime_based_cursor = ConcurrentCursor (
281+ stream_name = "any_stream" ,
282+ stream_namespace = None ,
283+ stream_state = stream_state ,
284+ message_repository = Mock (),
285+ connector_state_manager = Mock (),
286+ connector_state_converter = CustomFormatConcurrentStreamStateConverter (
287+ datetime_format = datetime_format
278288 ),
279- end_datetime = MinMaxDatetime (datetime = end_datetime , parameters = {}) if end_datetime else None ,
280- step = "P10Y" ,
281- cursor_field = InterpolatedString .create ("created_at" , parameters = {}),
282- datetime_format = datetime_format ,
283- cursor_granularity = "P1D" ,
284- config = {},
285- parameters = {},
289+ cursor_field = CursorField ("created_at" ),
290+ slice_boundary_fields = ("start" , "end" ),
291+ start = datetime (2021 , 1 , 1 , tzinfo = timezone .utc ),
292+ end_provider = lambda : ab_datetime_parse (end_datetime ) if end_datetime else ab_datetime_now (),
293+ slice_range = timedelta (days = 365 * 10 ),
286294 )
287- date_time_based_cursor .set_initial_state (stream_state )
288295
289296 record_filter_decorator = ClientSideIncrementalRecordFilterDecorator (
290297 config = {},
291298 condition = record_filter_expression ,
292299 parameters = {},
293- cursor = date_time_based_cursor ,
300+ cursor = datetime_based_cursor ,
294301 )
295302
296303 filtered_records = list (
@@ -341,7 +348,7 @@ def test_client_side_record_filter_decorator_no_parent_stream(
341348 }
342349 ],
343350 },
344- "per_partition_with_global " ,
351+ "global_substream " ,
345352 [2 , 3 ],
346353 ),
347354 # Use PerPartitionWithGlobalCursor with partition state missing, global cursor used
@@ -363,23 +370,26 @@ def test_client_side_record_filter_decorator_no_parent_stream(
363370def test_client_side_record_filter_decorator_with_cursor_types (
364371 stream_state : Optional [Mapping ], cursor_type : str , expected_record_ids : List [int ]
365372):
366- def date_time_based_cursor_factory () -> DatetimeBasedCursor :
367- return DatetimeBasedCursor (
368- start_datetime = MinMaxDatetime (
369- datetime = "2021-01-01" , datetime_format = DATE_FORMAT , parameters = {}
370- ),
371- end_datetime = MinMaxDatetime (
372- datetime = "2021-01-05" , datetime_format = DATE_FORMAT , parameters = {}
373+ def date_time_based_cursor_factory (stream_state , runtime_lookback_window ) -> ConcurrentCursor :
374+ return ConcurrentCursor (
375+ stream_name = "any_stream" ,
376+ stream_namespace = None ,
377+ stream_state = stream_state ,
378+ message_repository = Mock (),
379+ connector_state_manager = Mock (),
380+ connector_state_converter = CustomFormatConcurrentStreamStateConverter (
381+ datetime_format = DATE_FORMAT
373382 ),
374- step = "P10Y" ,
375- cursor_field = InterpolatedString .create ("created_at" , parameters = {}),
376- datetime_format = DATE_FORMAT ,
377- cursor_granularity = "P1D" ,
378- config = {},
379- parameters = {},
383+ cursor_field = CursorField ("created_at" ),
384+ slice_boundary_fields = ("start" , "end" ),
385+ start = datetime (2021 , 1 , 1 , tzinfo = timezone .utc ),
386+ end_provider = lambda : datetime (2021 , 1 , 5 , tzinfo = timezone .utc ),
387+ slice_range = timedelta (days = 365 * 10 ),
388+ cursor_granularity = timedelta (days = 1 ),
389+ lookback_window = runtime_lookback_window ,
380390 )
381391
382- date_time_based_cursor = date_time_based_cursor_factory ()
392+ date_time_based_cursor = date_time_based_cursor_factory (stream_state , timedelta ( 0 ) )
383393
384394 substream_cursor = None
385395 partition_router = SubstreamPartitionRouter (
@@ -401,29 +411,26 @@ def date_time_based_cursor_factory() -> DatetimeBasedCursor:
401411 if cursor_type == "datetime" :
402412 # Use only DatetimeBasedCursor
403413 pass # No additional cursor needed
404- elif cursor_type == "global_substream" :
414+ elif cursor_type in [ "global_substream" , "per_partition_with_global" ] :
405415 # Create GlobalSubstreamCursor instance
406- substream_cursor = GlobalSubstreamCursor (
407- stream_cursor = date_time_based_cursor ,
416+ substream_cursor = ConcurrentPerPartitionCursor (
417+ cursor_factory = ConcurrentCursorFactory ( date_time_based_cursor_factory ) ,
408418 partition_router = partition_router ,
409- )
410- if stream_state :
411- substream_cursor .set_initial_state (stream_state )
412- elif cursor_type == "per_partition_with_global" :
413- # Create PerPartitionWithGlobalCursor instance
414- substream_cursor = PerPartitionWithGlobalCursor (
415- cursor_factory = CursorFactory (date_time_based_cursor_factory ),
416- partition_router = partition_router ,
417- stream_cursor = date_time_based_cursor ,
419+ stream_name = "a_stream" ,
420+ stream_namespace = None ,
421+ stream_state = stream_state ,
422+ message_repository = Mock (),
423+ connector_state_manager = Mock (),
424+ connector_state_converter = CustomFormatConcurrentStreamStateConverter (
425+ datetime_format = DATE_FORMAT
426+ ),
427+ cursor_field = CursorField ("created_at" ),
428+ use_global_cursor = cursor_type == "global_substream" ,
429+ attempt_to_create_cursor_if_not_provided = True ,
418430 )
419431 else :
420432 raise ValueError (f"Unsupported cursor type: { cursor_type } " )
421433
422- if substream_cursor and stream_state :
423- substream_cursor .set_initial_state (stream_state )
424- elif stream_state :
425- date_time_based_cursor .set_initial_state (stream_state )
426-
427434 # Create the record_filter_decorator with appropriate cursor
428435 record_filter_decorator = ClientSideIncrementalRecordFilterDecorator (
429436 config = {},
0 commit comments