From 6afdb345275f643419b09f4da086e78a581330b7 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Thu, 4 Dec 2025 00:16:46 +0000 Subject: [PATCH 1/3] reduce the number of warnings in tests --- pyproject.toml | 4 ++++ src/crawlee/_utils/context.py | 4 ++-- src/crawlee/_utils/recurring_task.py | 3 ++- src/crawlee/events/_event_manager.py | 4 +--- tests/unit/_utils/test_system.py | 3 +++ .../test_playwright_browser_controller.py | 6 +++++- tests/unit/conftest.py | 1 + tests/unit/storages/test_key_value_store.py | 21 +++++++++++-------- 8 files changed, 30 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f5a619268..ae6c029e51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -221,6 +221,10 @@ timeout = 300 markers = [ "run_alone: marks tests that must run in isolation", ] +filterwarnings = [ + "ignore:websockets.legacy is deprecated:DeprecationWarning", + "ignore:websockets.server.WebSocketServerProtocol is deprecated:DeprecationWarning", +] [tool.mypy] python_version = "3.10" diff --git a/src/crawlee/_utils/context.py b/src/crawlee/_utils/context.py index 43c95f62e2..fb750cf0e7 100644 --- a/src/crawlee/_utils/context.py +++ b/src/crawlee/_utils/context.py @@ -1,6 +1,6 @@ from __future__ import annotations -import asyncio +import inspect from collections.abc import Callable from functools import wraps from typing import Any, TypeVar @@ -44,4 +44,4 @@ async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: return await method(self, *args, **kwargs) - return async_wrapper if asyncio.iscoroutinefunction(method) else sync_wrapper # type: ignore[return-value] + return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper # type: ignore[return-value] diff --git a/src/crawlee/_utils/recurring_task.py b/src/crawlee/_utils/recurring_task.py index d0c20249e9..3a6553b6c0 100644 --- a/src/crawlee/_utils/recurring_task.py +++ b/src/crawlee/_utils/recurring_task.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import inspect from logging import getLogger from typing import TYPE_CHECKING @@ -49,7 +50,7 @@ async def _wrapper(self) -> None: """ sleep_time_secs = self.delay.total_seconds() while True: - await self.func() if asyncio.iscoroutinefunction(self.func) else self.func() + await self.func() if inspect.iscoroutinefunction(self.func) else self.func() await asyncio.sleep(sleep_time_secs) def start(self) -> None: diff --git a/src/crawlee/events/_event_manager.py b/src/crawlee/events/_event_manager.py index 65a41dd0d0..c623b341c1 100644 --- a/src/crawlee/events/_event_manager.py +++ b/src/crawlee/events/_event_manager.py @@ -174,11 +174,9 @@ async def listener_wrapper(event_data: EventData) -> None: # to avoid blocking the event loop coro = ( listener(*bound_args.args, **bound_args.kwargs) - if asyncio.iscoroutinefunction(listener) + if inspect.iscoroutinefunction(listener) else asyncio.to_thread(cast('Callable[..., None]', listener), *bound_args.args, **bound_args.kwargs) ) - # Note: use `asyncio.iscoroutinefunction` rather then `inspect.iscoroutinefunction` since it works with - # unittests.mock.AsyncMock. See https://github.com/python/cpython/issues/84753. listener_task = asyncio.create_task(coro, name=f'Task-{event.value}-{listener.__name__}') self._listener_tasks.add(listener_task) diff --git a/tests/unit/_utils/test_system.py b/tests/unit/_utils/test_system.py index 1813b151a6..2f33de5510 100644 --- a/tests/unit/_utils/test_system.py +++ b/tests/unit/_utils/test_system.py @@ -26,6 +26,9 @@ def test_get_cpu_info_returns_valid_values() -> None: assert 0 <= cpu_info.used_ratio <= 1 +# Suppress the warning because there were no problems with using `fork' in our test environment. +# However, there are serialization issues in other cases. +@pytest.mark.filterwarnings('ignore:This process .* is multi-threaded, use of fork:DeprecationWarning') @pytest.mark.skipif(sys.platform != 'linux', reason='Improved estimation available only on Linux') def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None: """Test that memory usage estimation is not overestimating memory usage by counting shared memory multiple times. diff --git a/tests/unit/browsers/test_playwright_browser_controller.py b/tests/unit/browsers/test_playwright_browser_controller.py index e7b8d517ff..7f8e513a83 100644 --- a/tests/unit/browsers/test_playwright_browser_controller.py +++ b/tests/unit/browsers/test_playwright_browser_controller.py @@ -6,7 +6,7 @@ from unittest.mock import AsyncMock import pytest -from playwright.async_api import Browser, Playwright, async_playwright +from playwright.async_api import Browser, BrowserContext, Page, Playwright, async_playwright from crawlee.browsers import PlaywrightBrowserController, PlaywrightPersistentBrowser @@ -115,6 +115,10 @@ async def test_memory_leak_on_concurrent_context_creation() -> None: # Prepare mocked browser with relevant methods and attributes mocked_browser = AsyncMock() mocked_context_launcher = AsyncMock() + mocked_context = AsyncMock(spec=BrowserContext) + + mocked_context_launcher.return_value = mocked_context + mocked_context.new_page.return_value = AsyncMock(spec=Page) async def delayed_launch_persistent_context(*args: Any, **kwargs: Any) -> Any: """Ensure that both calls to create context overlap in time.""" diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 6a22d0cf79..ed8c4a720d 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -185,6 +185,7 @@ def redirect_http_server(unused_tcp_port_factory: Callable[[], int]) -> Iterator timeout_graceful_shutdown=10, log_level='error', access_log=False, + ws='websockets-sansio', ) server = TestServer(config=config) yield from serve_in_thread(server) diff --git a/tests/unit/storages/test_key_value_store.py b/tests/unit/storages/test_key_value_store.py index 3324bbc853..5b789d2843 100644 --- a/tests/unit/storages/test_key_value_store.py +++ b/tests/unit/storages/test_key_value_store.py @@ -1095,25 +1095,28 @@ async def test_validate_name(storage_client: StorageClient, name: str, *, is_val @pytest.mark.parametrize( - 'tested_storage_client', + 'tested_storage_client_class', [ - pytest.param(MemoryStorageClient(), id='tested=MemoryStorageClient'), - pytest.param(FileSystemStorageClient(), id='tested=FileSystemStorageClient'), - pytest.param(SqlStorageClient(), id='tested=SqlStorageClient'), + pytest.param(MemoryStorageClient, id='tested=MemoryStorageClient'), + pytest.param(FileSystemStorageClient, id='tested=FileSystemStorageClient'), + pytest.param(SqlStorageClient, id='tested=SqlStorageClient'), ], ) @pytest.mark.parametrize( - 'global_storage_client', + 'global_storage_client_class', [ - pytest.param(MemoryStorageClient(), id='global=MemoryStorageClient'), - pytest.param(FileSystemStorageClient(), id='global=FileSystemStorageClient'), - pytest.param(SqlStorageClient(), id='global=SqlStorageClient'), + pytest.param(MemoryStorageClient, id='global=MemoryStorageClient'), + pytest.param(FileSystemStorageClient, id='global=FileSystemStorageClient'), + pytest.param(SqlStorageClient, id='global=SqlStorageClient'), ], ) async def test_get_auto_saved_value_various_global_clients( - tmp_path: Path, tested_storage_client: StorageClient, global_storage_client: StorageClient + tmp_path: Path, tested_storage_client_class: type[StorageClient], global_storage_client_class: type[StorageClient] ) -> None: """Ensure that persistence is working for all clients regardless of what is set in service locator.""" + tested_storage_client = tested_storage_client_class() + global_storage_client = global_storage_client_class() + service_locator.set_configuration( Configuration( storage_dir=str(tmp_path), From e95bb5b6f69bb9d26df5521d8eb7027cf2a707f0 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Thu, 4 Dec 2025 15:14:47 +0000 Subject: [PATCH 2/3] do not suppress warning about `fork` --- tests/unit/_utils/test_system.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/unit/_utils/test_system.py b/tests/unit/_utils/test_system.py index 2f33de5510..1813b151a6 100644 --- a/tests/unit/_utils/test_system.py +++ b/tests/unit/_utils/test_system.py @@ -26,9 +26,6 @@ def test_get_cpu_info_returns_valid_values() -> None: assert 0 <= cpu_info.used_ratio <= 1 -# Suppress the warning because there were no problems with using `fork' in our test environment. -# However, there are serialization issues in other cases. -@pytest.mark.filterwarnings('ignore:This process .* is multi-threaded, use of fork:DeprecationWarning') @pytest.mark.skipif(sys.platform != 'linux', reason='Improved estimation available only on Linux') def test_memory_estimation_does_not_overestimate_due_to_shared_memory() -> None: """Test that memory usage estimation is not overestimating memory usage by counting shared memory multiple times. From 9191e4554b84f613bcbc78c77d897eb6a57a06ff Mon Sep 17 00:00:00 2001 From: Max Bohomolov <34358312+Mantisus@users.noreply.github.com> Date: Fri, 5 Dec 2025 15:44:42 +0200 Subject: [PATCH 3/3] Update pyproject.toml Co-authored-by: Vlada Dusek --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index ae6c029e51..f28b1e0620 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -221,6 +221,9 @@ timeout = 300 markers = [ "run_alone: marks tests that must run in isolation", ] +# Ignore DeprecationWarnings coming from Uvicorn's internal imports. Uvicorn relies on deprecated +# modules from `websockets`, which triggers warnings during tests. These are safe to ignore until +# Uvicorn updates its internals. filterwarnings = [ "ignore:websockets.legacy is deprecated:DeprecationWarning", "ignore:websockets.server.WebSocketServerProtocol is deprecated:DeprecationWarning",