Skip to content

Commit 509ea05

Browse files
committed
Refactored switching logic
1 parent 8d3bfce commit 509ea05

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ class ConcurrentPerPartitionCursor(Cursor):
5858
CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
5959
"""
6060

61-
DEFAULT_MAX_PARTITIONS_NUMBER = 1000
61+
DEFAULT_MAX_PARTITIONS_NUMBER = 10_000
62+
SWITCH_TO_GLOBAL_LIMIT = 1000
6263
_NO_STATE: Mapping[str, Any] = {}
6364
_NO_CURSOR_STATE: Mapping[str, Any] = {}
6465
_GLOBAL_STATE_KEY = "state"
@@ -99,7 +100,7 @@ def __init__(
99100
self._new_global_cursor: Optional[StreamState] = None
100101
self._lookback_window: int = 0
101102
self._parent_state: Optional[StreamState] = None
102-
self._over_limit: int = 0
103+
self._number_of_partitions: int = 0
103104
self._use_global_cursor: bool = False
104105
self._partition_serializer = PerPartitionKeySerializer()
105106

@@ -233,8 +234,8 @@ def _ensure_partition_limit(self) -> None:
233234
or removed due to being the oldest.
234235
"""
235236
with self._lock:
237+
self._number_of_partitions += 1
236238
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237-
self._over_limit += 1
238239
# Try removing finished partitions first
239240
for partition_key in list(self._cursor_per_partition.keys()):
240241
if (
@@ -245,7 +246,7 @@ def _ensure_partition_limit(self) -> None:
245246
partition_key
246247
) # Remove the oldest partition
247248
logger.warning(
248-
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
249+
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
249250
)
250251
break
251252
else:
@@ -254,7 +255,7 @@ def _ensure_partition_limit(self) -> None:
254255
1
255256
] # Remove the oldest partition
256257
logger.warning(
257-
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
258+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions}."
258259
)
259260

260261
def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -355,6 +356,10 @@ def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
355356

356357
def observe(self, record: Record) -> None:
357358
if not self._use_global_cursor and self.limit_reached():
359+
logger.info(
360+
f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
361+
f"Switching to global cursor for {self._stream_name}."
362+
)
358363
self._use_global_cursor = True
359364

360365
if not record.associated_slice:
@@ -397,4 +402,4 @@ def _get_cursor(self, record: Record) -> ConcurrentCursor:
397402
return cursor
398403

399404
def limit_reached(self) -> bool:
400-
return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
405+
return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT

0 commit comments

Comments
 (0)