Skip to content

Commit 71174a6

Browse files
committed
Update some flaky tests
1 parent 4c8dd10 commit 71174a6

File tree

4 files changed

+19
-10
lines changed

4 files changed

+19
-10
lines changed

src/crawlee/_autoscaling/autoscaled_pool.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ async def run(self) -> None:
142142

143143
logger.info('Waiting for remaining tasks to finish')
144144

145-
for task in run.worker_tasks:
145+
tasks_to_wait = list(run.worker_tasks)
146+
for task in tasks_to_wait:
146147
if not task.done():
147148
with suppress(BaseException):
148149
await task

tests/unit/_autoscaling/test_autoscaled_pool.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ async def run() -> None:
328328
assert done_count == 4
329329

330330
done_count = 0
331+
await asyncio.sleep(0.2) # Allow any lingering callbacks to complete
332+
done_count = 0 # Reset again to ensure clean state
331333

332334
await pool.run()
333335
assert done_count == 4

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,23 +1269,30 @@ async def test_lock_with_get_robots_txt_file_for_url(server_url: URL) -> None:
12691269
assert spy.call_count == 1
12701270

12711271

1272-
async def test_reduced_logs_from_timed_out_request_handler(
1273-
monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
1274-
) -> None:
1272+
async def test_reduced_logs_from_timed_out_request_handler(caplog: pytest.LogCaptureFixture) -> None:
12751273
caplog.set_level(logging.INFO)
1276-
crawler = BasicCrawler(configure_logging=False, request_handler_timeout=timedelta(seconds=1))
1274+
crawler = BasicCrawler(
1275+
configure_logging=False,
1276+
request_handler_timeout=timedelta(seconds=1),
1277+
)
12771278

12781279
@crawler.router.default_handler
12791280
async def handler(context: BasicCrawlingContext) -> None:
1281+
# Intentionally add a delay longer than the timeout to trigger the timeout mechanism
12801282
await asyncio.sleep(10) # INJECTED DELAY
12811283

1282-
await crawler.run([Request.from_url('http://a.com/')])
1284+
# Capture all logs from the 'crawlee' logger at INFO level or higher
1285+
with caplog.at_level(logging.INFO, logger='crawlee'):
1286+
await crawler.run([Request.from_url('http://a.com/')])
12831287

1288+
# Check for the timeout message in any of the logs
1289+
found_timeout_message = False
12841290
for record in caplog.records:
1285-
if record.funcName == '_handle_failed_request':
1291+
if record.message and 'timed out after 1.0 seconds' in record.message:
12861292
full_message = (record.message or '') + (record.exc_text or '')
12871293
assert Counter(full_message)['\n'] < 10
12881294
assert '# INJECTED DELAY' in full_message
1295+
found_timeout_message = True
12891296
break
1290-
else:
1291-
raise AssertionError('Expected log message about request handler error was not found.')
1297+
1298+
assert found_timeout_message, 'Expected log message about request handler error was not found.'

tests/unit/crawlers/_playwright/test_playwright_crawler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,6 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
553553
assert snapshot.html == HELLO_WORLD.decode('utf-8')
554554

555555

556-
# TODO: failing
557556
async def test_error_snapshot_through_statistics(server_url: URL) -> None:
558557
"""Test correct use of error snapshotter by the Playwright crawler.
559558

0 commit comments

Comments
 (0)