diff --git a/pyproject.toml b/pyproject.toml index de32bba8..0bfcc017 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,13 +39,14 @@ dependencies = [ "crawlee@git+https://github.com/apify/crawlee-python.git@master", "cachetools>=5.5.0", "cryptography>=42.0.0", - "httpx>=0.27.0", # TODO: ensure compatibility with the latest version of lazy-object-proxy # https://github.com/apify/apify-sdk-python/issues/460 + "impit>=0.5.3", "lazy-object-proxy<1.11.0", "more_itertools>=10.2.0", "typing-extensions>=4.1.0", "websockets>=14.0", + "yarl>=1.18.0", ] [project.optional-dependencies] @@ -81,7 +82,6 @@ dev = [ "types-cachetools~=6.0.0.20250525", "uvicorn[standard]", "werkzeug~=3.1.0", # Werkzeug is used by httpserver - "yarl~=1.20.0", # yarl is used by crawlee ] [tool.hatch.build.targets.wheel] @@ -213,12 +213,12 @@ exclude = [] [[tool.mypy.overrides]] module = [ - 'bs4', - 'lazy_object_proxy', - 'nest_asyncio', - 'playwright.*', - 'scrapy.*', - 'selenium.*', + 'bs4', # Documentation + 'httpx', # Documentation + 'lazy_object_proxy', # Untyped and stubs not available + 'playwright.*', # Documentation + 'scrapy.*', # Untyped and stubs not available + 'selenium.*', # Documentation ] ignore_missing_imports = true diff --git a/src/apify/_proxy_configuration.py b/src/apify/_proxy_configuration.py index 730c76ab..a654cdd8 100644 --- a/src/apify/_proxy_configuration.py +++ b/src/apify/_proxy_configuration.py @@ -1,13 +1,15 @@ from __future__ import annotations import ipaddress +import json import re from dataclasses import dataclass, field from re import Pattern from typing import TYPE_CHECKING, Any from urllib.parse import urljoin, urlparse -import httpx +import impit +from yarl import URL from apify_shared.consts import ApifyEnvVars from crawlee.proxy_configuration import ProxyConfiguration as CrawleeProxyConfiguration @@ -231,7 +233,7 @@ async def new_proxy_info( return None if self._uses_apify_proxy: - parsed_url = httpx.URL(proxy_info.url) + parsed_url = URL(proxy_info.url) username = self._get_username(session_id) return ProxyInfo( @@ -275,11 +277,11 @@ async def _check_access(self) -> None: return status = None - async with httpx.AsyncClient(proxy=proxy_info.url, timeout=10) as client: + async with impit.AsyncClient(proxy=proxy_info.url, timeout=10) as client: for _ in range(2): try: response = await client.get(proxy_status_url) - status = response.json() + status = json.loads(response.text) break except Exception: # noqa: S110 # retry on connection errors diff --git a/src/apify/log.py b/src/apify/log.py index d5440345..ac32fcd4 100644 --- a/src/apify/log.py +++ b/src/apify/log.py @@ -27,13 +27,6 @@ def _configure_logging() -> None: else: apify_client_logger.setLevel(level) - # Silence HTTPX logger unless debug logging is requested - httpx_logger = logging.getLogger('httpx') - if level > logging.DEBUG: - httpx_logger.setLevel(logging.WARNING) - else: - httpx_logger.setLevel(level) - # Use configured log level for apify logger apify_logger = logging.getLogger('apify') configure_logger(apify_logger, remove_old_handlers=True) diff --git a/src/apify/scrapy/_logging_config.py b/src/apify/scrapy/_logging_config.py index caf30313..c72f2619 100644 --- a/src/apify/scrapy/_logging_config.py +++ b/src/apify/scrapy/_logging_config.py @@ -10,7 +10,7 @@ # Define logger names. _PRIMARY_LOGGERS = ['apify', 'apify_client', 'scrapy'] -_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'httpx', 'protego', 'twisted'] +_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted'] _ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS @@ -37,9 +37,6 @@ def initialize_logging() -> None: for logger_name in [None, *_ALL_LOGGERS]: _configure_logger(logger_name, logging_level, handler) - # Set the 'httpx' logger to a less verbose level. - logging.getLogger('httpx').setLevel('WARNING') - # Monkey-patch Scrapy's logging configuration to re-apply our settings. original_configure_logging = scrapy_logging.configure_logging diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 07b6c758..66ced9c3 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -96,14 +96,9 @@ def apify_token() -> str: return api_token -@pytest.fixture +@pytest.fixture(scope='session') def apify_client_async(apify_token: str) -> ApifyClientAsync: - """Create an instance of the ApifyClientAsync. - - This fixture can't be session-scoped, because then you start getting `RuntimeError: Event loop is closed` errors, - because `httpx.AsyncClient` in `ApifyClientAsync` tries to reuse the same event loop across requests, - but `pytest-asyncio` closes the event loop after each test, and uses a new one for the next test. - """ + """Create an instance of the ApifyClientAsync.""" api_url = os.getenv(_API_URL_ENV_VAR) return ApifyClientAsync(apify_token, api_url=api_url) diff --git a/tests/unit/actor/test_actor_create_proxy_configuration.py b/tests/unit/actor/test_actor_create_proxy_configuration.py index a079ba38..4fd49a9e 100644 --- a/tests/unit/actor/test_actor_create_proxy_configuration.py +++ b/tests/unit/actor/test_actor_create_proxy_configuration.py @@ -25,7 +25,7 @@ def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) -> return ApifyClientAsync() -@pytest.mark.usefixtures('patched_httpx_client') +@pytest.mark.usefixtures('patched_impit_client') async def test_basic_proxy_configuration_creation( monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer, @@ -68,7 +68,7 @@ def request_handler(request: Request, response: Response) -> Response: await Actor.exit() -@pytest.mark.usefixtures('patched_httpx_client') +@pytest.mark.usefixtures('patched_impit_client') async def test_proxy_configuration_with_actor_proxy_input( monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer, diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index d44aa986..02b8868e 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -7,7 +7,7 @@ from logging import getLogger from typing import TYPE_CHECKING, Any, get_type_hints -import httpx +import impit import pytest from pytest_httpserver import HTTPServer @@ -193,14 +193,15 @@ def httpserver(make_httpserver: HTTPServer) -> Iterator[HTTPServer]: @pytest.fixture -def patched_httpx_client(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]: - """Patch httpx client to drop proxy settings.""" +def patched_impit_client(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]: + """Patch impit client to drop proxy settings.""" - class ProxylessAsyncClient(httpx.AsyncClient): - def __init__(self, *args: Any, **kwargs: Any) -> None: - kwargs.pop('proxy', None) - super().__init__(*args, **kwargs) + original_async_client = impit.AsyncClient - monkeypatch.setattr(httpx, 'AsyncClient', ProxylessAsyncClient) + def proxyless_async_client(*args: Any, **kwargs: Any) -> impit.AsyncClient: + kwargs.pop('proxy', None) + return original_async_client(*args, **kwargs) + + monkeypatch.setattr(impit, 'AsyncClient', proxyless_async_client) yield monkeypatch.undo() diff --git a/tests/unit/test_proxy_configuration.py b/tests/unit/test_proxy_configuration.py index 0a232ad8..506f586a 100644 --- a/tests/unit/test_proxy_configuration.py +++ b/tests/unit/test_proxy_configuration.py @@ -377,7 +377,7 @@ async def test_new_proxy_info_rotating_urls_with_sessions() -> None: assert proxy_info.url == proxy_urls[0] -@pytest.mark.usefixtures('patched_httpx_client') +@pytest.mark.usefixtures('patched_impit_client') async def test_initialize_with_valid_configuration( monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer, @@ -420,7 +420,7 @@ async def test_initialize_without_password_or_token() -> None: await proxy_configuration.initialize() -@pytest.mark.usefixtures('patched_httpx_client') +@pytest.mark.usefixtures('patched_impit_client') async def test_initialize_with_manual_password(monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer) -> None: dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/') monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url) @@ -442,7 +442,7 @@ async def test_initialize_with_manual_password(monkeypatch: pytest.MonkeyPatch, assert proxy_configuration.is_man_in_the_middle is False -@pytest.mark.usefixtures('patched_httpx_client') +@pytest.mark.usefixtures('patched_impit_client') async def test_initialize_prefering_password_from_env_over_calling_api( monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer, @@ -471,7 +471,7 @@ async def test_initialize_prefering_password_from_env_over_calling_api( assert len(patched_apify_client.calls['user']['get']) == 0 # type: ignore[attr-defined] -@pytest.mark.usefixtures('patched_httpx_client') +@pytest.mark.usefixtures('patched_impit_client') @pytest.mark.skip(reason='There are issues with log propagation to caplog, see issue #462.') async def test_initialize_with_manual_password_different_than_user_one( monkeypatch: pytest.MonkeyPatch, @@ -506,7 +506,7 @@ async def test_initialize_with_manual_password_different_than_user_one( assert 'The Apify Proxy password you provided belongs to a different user' in caplog.records[0].message -@pytest.mark.usefixtures('patched_httpx_client') +@pytest.mark.usefixtures('patched_impit_client') async def test_initialize_when_not_connected(monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer) -> None: dummy_connection_error = 'DUMMY_CONNECTION_ERROR' dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/') diff --git a/uv.lock b/uv.lock index 36bfa94a..46c1a472 100644 --- a/uv.lock +++ b/uv.lock @@ -36,11 +36,12 @@ dependencies = [ { name = "cachetools" }, { name = "crawlee" }, { name = "cryptography" }, - { name = "httpx" }, + { name = "impit" }, { name = "lazy-object-proxy" }, { name = "more-itertools" }, { name = "typing-extensions" }, { name = "websockets" }, + { name = "yarl" }, ] [package.optional-dependencies] @@ -68,7 +69,6 @@ dev = [ { name = "types-cachetools" }, { name = "uvicorn", extra = ["standard"] }, { name = "werkzeug" }, - { name = "yarl" }, ] [package.metadata] @@ -78,12 +78,13 @@ requires-dist = [ { name = "cachetools", specifier = ">=5.5.0" }, { name = "crawlee", git = "https://github.com/apify/crawlee-python.git?rev=master" }, { name = "cryptography", specifier = ">=42.0.0" }, - { name = "httpx", specifier = ">=0.27.0" }, + { name = "impit", specifier = ">=0.5.3" }, { name = "lazy-object-proxy", specifier = "<1.11.0" }, { name = "more-itertools", specifier = ">=10.2.0" }, { name = "scrapy", marker = "extra == 'scrapy'", specifier = ">=2.11.0" }, { name = "typing-extensions", specifier = ">=4.1.0" }, { name = "websockets", specifier = ">=14.0" }, + { name = "yarl", specifier = ">=1.18.0" }, ] provides-extras = ["scrapy"] @@ -107,7 +108,6 @@ dev = [ { name = "types-cachetools", specifier = "~=6.0.0.20250525" }, { name = "uvicorn", extras = ["standard"] }, { name = "werkzeug", specifier = "~=3.1.0" }, - { name = "yarl", specifier = "~=1.20.0" }, ] [[package]] @@ -724,19 +724,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] -[[package]] -name = "httpcore" -version = "1.0.9" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "h11" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, -] - [[package]] name = "httptools" version = "0.6.4" @@ -773,21 +760,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/dc/7decab5c404d1d2cdc1bb330b1bf70e83d6af0396fd4fc76fc60c0d522bf/httptools-0.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:28908df1b9bb8187393d5b5db91435ccc9c8e891657f9cbb42a2541b44c82fc8", size = 87682, upload-time = "2024-10-16T19:44:46.46Z" }, ] -[[package]] -name = "httpx" -version = "0.28.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "certifi" }, - { name = "httpcore" }, - { name = "idna" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, -] - [[package]] name = "hyperlink" version = "21.0.0"