|
9 | 9 | from collections import OrderedDict |
10 | 10 | from copy import deepcopy |
11 | 11 | from datetime import timedelta |
12 | | -from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional |
| 12 | +from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union |
13 | 13 |
|
| 14 | +from airbyte_cdk.models import AirbyteStateMessage, AirbyteStateBlob, AirbyteStreamState, AirbyteStateType, StreamDescriptor |
14 | 15 | from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager |
15 | 16 | from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import ( |
16 | 17 | Timer, |
17 | 18 | iterate_with_last_flag_and_state, |
18 | 19 | ) |
| 20 | +# It is interesting that this file depends on the declarative stuff. If we ever think that per partition cursors will ever be needed outside the declarative package, we would need to add an interface here to ensure that we avoid circular dependencies |
19 | 21 | from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter |
20 | 22 | from airbyte_cdk.sources.message import MessageRepository |
21 | 23 | from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import ( |
@@ -124,6 +126,7 @@ def __init__( |
124 | 126 | self._timer = Timer() |
125 | 127 |
|
126 | 128 | self._set_initial_state(stream_state) |
| 129 | + self._synced_some_data = False |
127 | 130 |
|
128 | 131 | @property |
129 | 132 | def cursor_field(self) -> CursorField: |
@@ -154,6 +157,7 @@ def state(self) -> MutableMapping[str, Any]: |
154 | 157 |
|
155 | 158 | def close_partition(self, partition: Partition) -> None: |
156 | 159 | # Attempt to retrieve the stream slice |
| 160 | + logger.warning(f"GODO: stream {self._stream_name} closing partition {partition.to_slice()}") |
157 | 161 | stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment] |
158 | 162 |
|
159 | 163 | # Ensure stream_slice is not None |
@@ -209,8 +213,10 @@ def ensure_at_least_one_state_emitted(self) -> None: |
209 | 213 | if not any( |
210 | 214 | semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items() |
211 | 215 | ): |
212 | | - self._global_cursor = self._new_global_cursor |
213 | | - self._lookback_window = self._timer.finish() |
| 216 | + if self._synced_some_data: |
| 217 | + # we only update those if we actually synced some data |
| 218 | + self._global_cursor = self._new_global_cursor |
| 219 | + self._lookback_window = self._timer.finish() |
214 | 220 | self._parent_state = self._partition_router.get_stream_state() |
215 | 221 | self._emit_state_message(throttle=False) |
216 | 222 |
|
@@ -454,6 +460,7 @@ def observe(self, record: Record) -> None: |
454 | 460 | except ValueError: |
455 | 461 | return |
456 | 462 |
|
| 463 | + self._synced_some_data = True |
457 | 464 | record_cursor = self._connector_state_converter.output_format( |
458 | 465 | self._connector_state_converter.parse_value(record_cursor_value) |
459 | 466 | ) |
@@ -522,3 +529,23 @@ def _get_cursor(self, record: Record) -> ConcurrentCursor: |
522 | 529 |
|
523 | 530 | def limit_reached(self) -> bool: |
524 | 531 | return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT |
| 532 | + |
| 533 | + @staticmethod |
| 534 | + def get_parent_state(stream_state: Optional[StreamState], parent_stream_name: str) -> Optional[AirbyteStateMessage]: |
| 535 | + return AirbyteStateMessage( |
| 536 | + type=AirbyteStateType.STREAM, |
| 537 | + stream=AirbyteStreamState( |
| 538 | + stream_descriptor=StreamDescriptor(parent_stream_name, None), |
| 539 | + stream_state=AirbyteStateBlob(stream_state["parent_state"][parent_stream_name]) |
| 540 | + ) |
| 541 | + ) if stream_state and "parent_state" in stream_state else None |
| 542 | + |
| 543 | + @staticmethod |
| 544 | + def get_global_state(stream_state: Optional[StreamState], parent_stream_name: str) -> Optional[AirbyteStateMessage]: |
| 545 | + return AirbyteStateMessage( |
| 546 | + type=AirbyteStateType.STREAM, |
| 547 | + stream=AirbyteStreamState( |
| 548 | + stream_descriptor=StreamDescriptor(parent_stream_name, None), |
| 549 | + stream_state=AirbyteStateBlob(stream_state["state"]) |
| 550 | + ) |
| 551 | + ) if stream_state and "state" in stream_state else None |
0 commit comments