Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ dependencies = [
"crawlee@git+https://github.com/apify/crawlee-python.git@master",
"cachetools>=5.5.0",
"cryptography>=42.0.0",
"httpx>=0.27.0",
# TODO: ensure compatibility with the latest version of lazy-object-proxy
# https://github.com/apify/apify-sdk-python/issues/460
"impit>=0.5.3",
"lazy-object-proxy<1.11.0",
"more_itertools>=10.2.0",
"typing-extensions>=4.1.0",
"websockets>=14.0",
"yarl>=1.18.0",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -81,7 +82,6 @@ dev = [
"types-cachetools~=6.0.0.20250525",
"uvicorn[standard]",
"werkzeug~=3.1.0", # Werkzeug is used by httpserver
"yarl~=1.20.0", # yarl is used by crawlee
]

[tool.hatch.build.targets.wheel]
Expand Down Expand Up @@ -213,12 +213,12 @@ exclude = []

[[tool.mypy.overrides]]
module = [
'bs4',
'lazy_object_proxy',
'nest_asyncio',
'playwright.*',
'scrapy.*',
'selenium.*',
'bs4', # Documentation
'httpx', # Documentation
'lazy_object_proxy', # Untyped and stubs not available
'playwright.*', # Documentation
'scrapy.*', # Untyped and stubs not available
'selenium.*', # Documentation
]
ignore_missing_imports = true

Expand Down
10 changes: 6 additions & 4 deletions src/apify/_proxy_configuration.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import annotations

import ipaddress
import json
import re
from dataclasses import dataclass, field
from re import Pattern
from typing import TYPE_CHECKING, Any
from urllib.parse import urljoin, urlparse

import httpx
import impit
from yarl import URL

from apify_shared.consts import ApifyEnvVars
from crawlee.proxy_configuration import ProxyConfiguration as CrawleeProxyConfiguration
Expand Down Expand Up @@ -231,7 +233,7 @@ async def new_proxy_info(
return None

if self._uses_apify_proxy:
parsed_url = httpx.URL(proxy_info.url)
parsed_url = URL(proxy_info.url)
username = self._get_username(session_id)

return ProxyInfo(
Expand Down Expand Up @@ -275,11 +277,11 @@ async def _check_access(self) -> None:
return

status = None
async with httpx.AsyncClient(proxy=proxy_info.url, timeout=10) as client:
async with impit.AsyncClient(proxy=proxy_info.url, timeout=10) as client:
for _ in range(2):
try:
response = await client.get(proxy_status_url)
status = response.json()
status = json.loads(response.text)
break
except Exception: # noqa: S110
# retry on connection errors
Expand Down
7 changes: 0 additions & 7 deletions src/apify/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,6 @@ def _configure_logging() -> None:
else:
apify_client_logger.setLevel(level)

# Silence HTTPX logger unless debug logging is requested
httpx_logger = logging.getLogger('httpx')
if level > logging.DEBUG:
httpx_logger.setLevel(logging.WARNING)
else:
httpx_logger.setLevel(level)

# Use configured log level for apify logger
apify_logger = logging.getLogger('apify')
configure_logger(apify_logger, remove_old_handlers=True)
5 changes: 1 addition & 4 deletions src/apify/scrapy/_logging_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# Define logger names.
_PRIMARY_LOGGERS = ['apify', 'apify_client', 'scrapy']
_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'httpx', 'protego', 'twisted']
_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted']
_ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS


Expand All @@ -37,9 +37,6 @@ def initialize_logging() -> None:
for logger_name in [None, *_ALL_LOGGERS]:
_configure_logger(logger_name, logging_level, handler)

# Set the 'httpx' logger to a less verbose level.
logging.getLogger('httpx').setLevel('WARNING')

# Monkey-patch Scrapy's logging configuration to re-apply our settings.
original_configure_logging = scrapy_logging.configure_logging

Expand Down
9 changes: 2 additions & 7 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,9 @@ def apify_token() -> str:
return api_token


@pytest.fixture
@pytest.fixture(scope='session')
def apify_client_async(apify_token: str) -> ApifyClientAsync:
"""Create an instance of the ApifyClientAsync.

This fixture can't be session-scoped, because then you start getting `RuntimeError: Event loop is closed` errors,
because `httpx.AsyncClient` in `ApifyClientAsync` tries to reuse the same event loop across requests,
but `pytest-asyncio` closes the event loop after each test, and uses a new one for the next test.
"""
"""Create an instance of the ApifyClientAsync."""
api_url = os.getenv(_API_URL_ENV_VAR)

return ApifyClientAsync(apify_token, api_url=api_url)
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/actor/test_actor_create_proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) ->
return ApifyClientAsync()


@pytest.mark.usefixtures('patched_httpx_client')
@pytest.mark.usefixtures('patched_impit_client')
async def test_basic_proxy_configuration_creation(
monkeypatch: pytest.MonkeyPatch,
httpserver: HTTPServer,
Expand Down Expand Up @@ -68,7 +68,7 @@ def request_handler(request: Request, response: Response) -> Response:
await Actor.exit()


@pytest.mark.usefixtures('patched_httpx_client')
@pytest.mark.usefixtures('patched_impit_client')
async def test_proxy_configuration_with_actor_proxy_input(
monkeypatch: pytest.MonkeyPatch,
httpserver: HTTPServer,
Expand Down
17 changes: 9 additions & 8 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from logging import getLogger
from typing import TYPE_CHECKING, Any, get_type_hints

import httpx
import impit
import pytest
from pytest_httpserver import HTTPServer

Expand Down Expand Up @@ -193,14 +193,15 @@ def httpserver(make_httpserver: HTTPServer) -> Iterator[HTTPServer]:


@pytest.fixture
def patched_httpx_client(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
"""Patch httpx client to drop proxy settings."""
def patched_impit_client(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
"""Patch impit client to drop proxy settings."""

class ProxylessAsyncClient(httpx.AsyncClient):
def __init__(self, *args: Any, **kwargs: Any) -> None:
kwargs.pop('proxy', None)
super().__init__(*args, **kwargs)
original_async_client = impit.AsyncClient

monkeypatch.setattr(httpx, 'AsyncClient', ProxylessAsyncClient)
def proxyless_async_client(*args: Any, **kwargs: Any) -> impit.AsyncClient:
kwargs.pop('proxy', None)
return original_async_client(*args, **kwargs)

monkeypatch.setattr(impit, 'AsyncClient', proxyless_async_client)
yield
monkeypatch.undo()
10 changes: 5 additions & 5 deletions tests/unit/test_proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ async def test_new_proxy_info_rotating_urls_with_sessions() -> None:
assert proxy_info.url == proxy_urls[0]


@pytest.mark.usefixtures('patched_httpx_client')
@pytest.mark.usefixtures('patched_impit_client')
async def test_initialize_with_valid_configuration(
monkeypatch: pytest.MonkeyPatch,
httpserver: HTTPServer,
Expand Down Expand Up @@ -420,7 +420,7 @@ async def test_initialize_without_password_or_token() -> None:
await proxy_configuration.initialize()


@pytest.mark.usefixtures('patched_httpx_client')
@pytest.mark.usefixtures('patched_impit_client')
async def test_initialize_with_manual_password(monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer) -> None:
dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/')
monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url)
Expand All @@ -442,7 +442,7 @@ async def test_initialize_with_manual_password(monkeypatch: pytest.MonkeyPatch,
assert proxy_configuration.is_man_in_the_middle is False


@pytest.mark.usefixtures('patched_httpx_client')
@pytest.mark.usefixtures('patched_impit_client')
async def test_initialize_prefering_password_from_env_over_calling_api(
monkeypatch: pytest.MonkeyPatch,
httpserver: HTTPServer,
Expand Down Expand Up @@ -471,7 +471,7 @@ async def test_initialize_prefering_password_from_env_over_calling_api(
assert len(patched_apify_client.calls['user']['get']) == 0 # type: ignore[attr-defined]


@pytest.mark.usefixtures('patched_httpx_client')
@pytest.mark.usefixtures('patched_impit_client')
@pytest.mark.skip(reason='There are issues with log propagation to caplog, see issue #462.')
async def test_initialize_with_manual_password_different_than_user_one(
monkeypatch: pytest.MonkeyPatch,
Expand Down Expand Up @@ -506,7 +506,7 @@ async def test_initialize_with_manual_password_different_than_user_one(
assert 'The Apify Proxy password you provided belongs to a different user' in caplog.records[0].message


@pytest.mark.usefixtures('patched_httpx_client')
@pytest.mark.usefixtures('patched_impit_client')
async def test_initialize_when_not_connected(monkeypatch: pytest.MonkeyPatch, httpserver: HTTPServer) -> None:
dummy_connection_error = 'DUMMY_CONNECTION_ERROR'
dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/')
Expand Down
36 changes: 4 additions & 32 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading