Skip to content

Commit 8ea12d3

Browse files
Pijukatelvdusek
authored andcommitted
fix: Fix incorrect use of desired concurrency ratio (apify#780)
### Description Concurrency ratio was not correctly used in Python version of autoscaled pool. Align it with the Javascript implementation of autoscaled pool. Add test. ### Issues - Closes: apify#759 --------- Co-authored-by: Vlada Dusek <[email protected]>
1 parent 319c1ff commit 8ea12d3

File tree

2 files changed

+56
-1
lines changed

2 files changed

+56
-1
lines changed

src/crawlee/_autoscaling/autoscaled_pool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def _autoscale(self) -> None:
195195
"""Inspect system load status and adjust desired concurrency if necessary. Do not call directly."""
196196
status = self._system_status.get_historical_system_info()
197197

198-
min_current_concurrency = math.floor(self._desired_concurrency_ratio * self.current_concurrency)
198+
min_current_concurrency = math.floor(self._desired_concurrency_ratio * self.desired_concurrency)
199199
should_scale_up = (
200200
status.is_system_idle
201201
and self._desired_concurrency < self._max_concurrency

tests/unit/_autoscaling/test_autoscaled_pool.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import asyncio
66
from contextlib import suppress
77
from datetime import datetime, timedelta, timezone
8+
from itertools import chain, repeat
89
from typing import TYPE_CHECKING, TypeVar, cast
910
from unittest.mock import Mock
1011

@@ -208,6 +209,60 @@ def get_historical_system_info() -> SystemInfo:
208209
await pool_run_task
209210

210211

212+
async def test_autoscales_uses_desired_concurrency_ratio(system_status: SystemStatus | Mock) -> None:
213+
"""Test that desired concurrency ratio can limit desired concurrency.
214+
215+
This test creates situation where only one task is ready and then no other task is ever ready.
216+
This creates situation where the system could scale up desired concurrency, but it will not do so because
217+
desired_concurrency_ratio=1 means that first the system would have to increase current concurrency to same number as
218+
desired concurrency and due to no other task ever being ready, it will never happen. Thus desired concurrency will
219+
stay 2 as was the initial setup, even though other conditions would allow the increase. (max_concurrency=4,
220+
system being idle)."""
221+
222+
async def run() -> None:
223+
await asyncio.sleep(0.1)
224+
225+
is_task_ready_iterator = chain([future(True)], repeat(future(False)))
226+
227+
def is_task_ready_function() -> Awaitable[bool]:
228+
return next(is_task_ready_iterator)
229+
230+
def get_historical_system_info() -> SystemInfo:
231+
return SystemInfo(
232+
cpu_info=LoadRatioInfo(limit_ratio=0.9, actual_ratio=0.3),
233+
memory_info=LoadRatioInfo(limit_ratio=0.9, actual_ratio=0.3),
234+
event_loop_info=LoadRatioInfo(limit_ratio=0.9, actual_ratio=0.3),
235+
client_info=LoadRatioInfo(limit_ratio=0.9, actual_ratio=0.3),
236+
)
237+
238+
cast(Mock, system_status.get_historical_system_info).side_effect = get_historical_system_info
239+
240+
pool = AutoscaledPool(
241+
system_status=system_status,
242+
run_task_function=run,
243+
is_task_ready_function=is_task_ready_function,
244+
is_finished_function=lambda: future(False),
245+
concurrency_settings=ConcurrencySettings(
246+
min_concurrency=2,
247+
desired_concurrency=2,
248+
max_concurrency=4,
249+
),
250+
autoscale_interval=timedelta(seconds=0.1),
251+
desired_concurrency_ratio=1,
252+
)
253+
254+
pool_run_task = asyncio.create_task(pool.run(), name='pool run task')
255+
try:
256+
for _ in range(5):
257+
assert pool.desired_concurrency == 2
258+
await asyncio.sleep(0.1)
259+
260+
finally:
261+
pool_run_task.cancel()
262+
with suppress(asyncio.CancelledError):
263+
await pool_run_task
264+
265+
211266
async def test_max_tasks_per_minute_works(system_status: SystemStatus | Mock) -> None:
212267
done_count = 0
213268

0 commit comments

Comments
 (0)