|
20 | 20 | ClientSideIncrementalRecordFilterDecorator, |
21 | 21 | ) |
22 | 22 | from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor |
| 23 | +from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import ( |
| 24 | + PerPartitionWithGlobalCursor, |
| 25 | +) |
23 | 26 | from airbyte_cdk.sources.declarative.interpolation import InterpolatedString |
24 | 27 | from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource |
25 | 28 | from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( |
|
32 | 35 | ModelToComponentFactory, |
33 | 36 | ) |
34 | 37 | from airbyte_cdk.sources.declarative.requesters import HttpRequester |
35 | | -from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever |
| 38 | +from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever |
36 | 39 | from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import ( |
37 | 40 | DeclarativePartitionFactory, |
38 | 41 | StreamSlicerPartitionGenerator, |
@@ -231,21 +234,7 @@ def _group_streams( |
231 | 234 | stream_state=stream_state, |
232 | 235 | ) |
233 | 236 |
|
234 | | - retriever = declarative_stream.retriever |
235 | | - |
236 | | - # This is an optimization so that we don't invoke any cursor or state management flows within the |
237 | | - # low-code framework because state management is handled through the ConcurrentCursor. |
238 | | - if declarative_stream and isinstance(retriever, SimpleRetriever): |
239 | | - # Also a temporary hack. In the legacy Stream implementation, as part of the read, |
240 | | - # set_initial_state() is called to instantiate incoming state on the cursor. Although we no |
241 | | - # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components |
242 | | - # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator |
243 | | - # still rely on a DatetimeBasedCursor that is properly initialized with state. |
244 | | - if retriever.cursor: |
245 | | - retriever.cursor.set_initial_state(stream_state=stream_state) |
246 | | - # We zero it out here, but since this is a cursor reference, the state is still properly |
247 | | - # instantiated for the other components that reference it |
248 | | - retriever.cursor = None |
| 237 | + retriever = self._get_retriever(declarative_stream, stream_state) |
249 | 238 |
|
250 | 239 | partition_generator = StreamSlicerPartitionGenerator( |
251 | 240 | DeclarativePartitionFactory( |
@@ -305,6 +294,60 @@ def _group_streams( |
305 | 294 | cursor=final_state_cursor, |
306 | 295 | ) |
307 | 296 | ) |
| 297 | + elif ( |
| 298 | + incremental_sync_component_definition |
| 299 | + and incremental_sync_component_definition.get("type", "") |
| 300 | + == DatetimeBasedCursorModel.__name__ |
| 301 | + and self._stream_supports_concurrent_partition_processing( |
| 302 | + declarative_stream=declarative_stream |
| 303 | + ) |
| 304 | + and hasattr(declarative_stream.retriever, "stream_slicer") |
| 305 | + and isinstance( |
| 306 | + declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor |
| 307 | + ) |
| 308 | + ): |
| 309 | + stream_state = state_manager.get_stream_state( |
| 310 | + stream_name=declarative_stream.name, namespace=declarative_stream.namespace |
| 311 | + ) |
| 312 | + partition_router = declarative_stream.retriever.stream_slicer._partition_router |
| 313 | + |
| 314 | + perpartition_cursor = ( |
| 315 | + self._constructor.create_concurrent_cursor_from_perpartition_cursor( |
| 316 | + state_manager=state_manager, |
| 317 | + model_type=DatetimeBasedCursorModel, |
| 318 | + component_definition=incremental_sync_component_definition, |
| 319 | + stream_name=declarative_stream.name, |
| 320 | + stream_namespace=declarative_stream.namespace, |
| 321 | + config=config or {}, |
| 322 | + stream_state=stream_state, |
| 323 | + partition_router=partition_router, |
| 324 | + ) |
| 325 | + ) |
| 326 | + |
| 327 | + retriever = self._get_retriever(declarative_stream, stream_state) |
| 328 | + |
| 329 | + partition_generator = StreamSlicerPartitionGenerator( |
| 330 | + DeclarativePartitionFactory( |
| 331 | + declarative_stream.name, |
| 332 | + declarative_stream.get_json_schema(), |
| 333 | + retriever, |
| 334 | + self.message_repository, |
| 335 | + ), |
| 336 | + perpartition_cursor, |
| 337 | + ) |
| 338 | + |
| 339 | + concurrent_streams.append( |
| 340 | + DefaultStream( |
| 341 | + partition_generator=partition_generator, |
| 342 | + name=declarative_stream.name, |
| 343 | + json_schema=declarative_stream.get_json_schema(), |
| 344 | + availability_strategy=AlwaysAvailableAvailabilityStrategy(), |
| 345 | + primary_key=get_primary_key_from_stream(declarative_stream.primary_key), |
| 346 | + cursor_field=perpartition_cursor.cursor_field.cursor_field_key, |
| 347 | + logger=self.logger, |
| 348 | + cursor=perpartition_cursor, |
| 349 | + ) |
| 350 | + ) |
308 | 351 | else: |
309 | 352 | synchronous_streams.append(declarative_stream) |
310 | 353 | else: |
@@ -395,6 +438,27 @@ def _stream_supports_concurrent_partition_processing( |
395 | 438 | return False |
396 | 439 | return True |
397 | 440 |
|
| 441 | + def _get_retriever( |
| 442 | + self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any] |
| 443 | + ) -> Retriever: |
| 444 | + retriever = declarative_stream.retriever |
| 445 | + |
| 446 | + # This is an optimization so that we don't invoke any cursor or state management flows within the |
| 447 | + # low-code framework because state management is handled through the ConcurrentCursor. |
| 448 | + if declarative_stream and isinstance(retriever, SimpleRetriever): |
| 449 | + # Also a temporary hack. In the legacy Stream implementation, as part of the read, |
| 450 | + # set_initial_state() is called to instantiate incoming state on the cursor. Although we no |
| 451 | + # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components |
| 452 | + # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator |
| 453 | + # still rely on a DatetimeBasedCursor that is properly initialized with state. |
| 454 | + if retriever.cursor: |
| 455 | + retriever.cursor.set_initial_state(stream_state=stream_state) |
| 456 | + # We zero it out here, but since this is a cursor reference, the state is still properly |
| 457 | + # instantiated for the other components that reference it |
| 458 | + retriever.cursor = None |
| 459 | + |
| 460 | + return retriever |
| 461 | + |
398 | 462 | @staticmethod |
399 | 463 | def _select_streams( |
400 | 464 | streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog |
|
0 commit comments