Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ dev = [
"pydoc-markdown~=4.8.0",
"pytest-asyncio~=1.1.0",
"pytest-cov~=6.2.0",
"pytest-httpserver>=1.1.3",
"pytest-timeout>=2.4.0",
"pytest-xdist~=3.8.0",
"pytest~=8.4.0",
"respx~=0.22.0",
"ruff~=0.12.0",
"setuptools", # setuptools are used by pytest but not explicitly required
"uvicorn[standard]",
"werkzeug~=3.0.0", # Werkzeug is used by httpserver
"yarl~=1.20.0", # yarl is used by crawlee
]

[tool.hatch.build.targets.wheel]
Expand Down
65 changes: 37 additions & 28 deletions tests/unit/actor/test_actor_create_proxy_configuration.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from unittest.mock import Mock

import httpx
import pytest

from apify_client import ApifyClientAsync
Expand All @@ -11,7 +11,8 @@
from apify import Actor

if TYPE_CHECKING:
from respx import MockRouter
from pytest_httpserver import HTTPServer
from werkzeug import Request, Response

from ..conftest import ApifyClientAsyncPatcher

Expand All @@ -24,25 +25,29 @@ def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) ->
return ApifyClientAsync()


@pytest.mark.usefixtures('patched_httpx_client')
async def test_basic_proxy_configuration_creation(
monkeypatch: pytest.MonkeyPatch,
respx_mock: MockRouter,
httpserver: HTTPServer,
patched_apify_client: ApifyClientAsync,
) -> None:
dummy_proxy_status_url = 'http://dummy-proxy-status-url.com'
dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/')
monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN')
monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url)

route = respx_mock.get(dummy_proxy_status_url)
route.mock(
httpx.Response(
200,
json={
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
)
call_mock = Mock()

def request_handler(request: Request, response: Response) -> Response:
call_mock(request.url)
return response

httpserver.expect_oneshot_request('/').with_post_hook(request_handler).respond_with_json(
{
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
status=200,
)

groups = ['GROUP1', 'GROUP2']
Expand All @@ -58,32 +63,36 @@ async def test_basic_proxy_configuration_creation(
assert proxy_configuration._country_code == country_code

assert len(patched_apify_client.calls['user']['get']) == 1 # type: ignore[attr-defined]
assert len(route.calls) == 1
assert call_mock.call_count == 1

await Actor.exit()


@pytest.mark.usefixtures('patched_httpx_client')
async def test_proxy_configuration_with_actor_proxy_input(
monkeypatch: pytest.MonkeyPatch,
respx_mock: MockRouter,
httpserver: HTTPServer,
patched_apify_client: ApifyClientAsync,
) -> None:
dummy_proxy_status_url = 'http://dummy-proxy-status-url.com'
dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/')
dummy_proxy_url = 'http://dummy-proxy.com:8000'

monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN')
monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url)

route = respx_mock.get(dummy_proxy_status_url)
route.mock(
httpx.Response(
200,
json={
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
)
call_mock = Mock()

def request_handler(request: Request, response: Response) -> Response:
call_mock(request.url)
return response

httpserver.expect_request('/').with_post_hook(request_handler).respond_with_json(
{
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
status=200,
)

await Actor.init()
Expand Down Expand Up @@ -138,6 +147,6 @@ async def test_proxy_configuration_with_actor_proxy_input(
)

assert len(patched_apify_client.calls['user']['get']) == 2 # type: ignore[attr-defined]
assert len(route.calls) == 2
assert call_mock.call_count == 2

await Actor.exit()
57 changes: 35 additions & 22 deletions tests/unit/actor/test_request_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@

import re
from dataclasses import dataclass
from typing import Any, get_args
from typing import TYPE_CHECKING, Any, get_args
from unittest.mock import Mock

import pytest
import respx
from httpx import Response
from yarl import URL

from crawlee._request import UserData
from crawlee._types import HttpMethod

from apify.storages._request_list import URL_NO_COMMAS_REGEX, RequestList

if TYPE_CHECKING:
from pytest_httpserver import HTTPServer
from werkzeug import Request, Response


@pytest.mark.parametrize(
argnames='request_method',
Expand Down Expand Up @@ -67,37 +71,48 @@ async def test_request_list_open_request_types(
assert request.headers.root == optional_input.get('headers', {})


@respx.mock
async def test_request_list_open_from_url_correctly_send_requests() -> None:
async def test_request_list_open_from_url_correctly_send_requests(httpserver: HTTPServer) -> None:
"""Test that requests are sent to expected urls."""
request_list_sources_input: list[dict[str, Any]] = [
{
'requestsFromUrl': 'https://abc.dev/file.txt',
'requestsFromUrl': httpserver.url_for('/file.txt'),
'method': 'GET',
},
{
'requestsFromUrl': 'https://www.abc.dev/file2',
'requestsFromUrl': httpserver.url_for('/file2'),
'method': 'PUT',
},
{
'requestsFromUrl': 'https://www.something.som',
'requestsFromUrl': httpserver.url_for('/something'),
'method': 'POST',
'headers': {'key': 'value'},
'payload': 'some_payload',
'userData': {'another_key': 'another_value'},
},
]

routes = [respx.get(entry['requestsFromUrl']) for entry in request_list_sources_input]
routes: dict[str, Mock] = {}

def request_handler(request: Request, response: Response) -> Response:
routes[request.url]()
return response

for entry in request_list_sources_input:
path = str(URL(entry['requestsFromUrl']).path)
httpserver.expect_oneshot_request(path).with_post_hook(request_handler).respond_with_data(status=200)
routes[entry['requestsFromUrl']] = Mock()

await RequestList.open(request_list_sources_input=request_list_sources_input)

for route in routes:
assert route.called
assert len(routes) == len(request_list_sources_input)

for entity in request_list_sources_input:
entity_url = entity['requestsFromUrl']
assert entity_url in routes
assert routes[entity_url].called

@respx.mock
async def test_request_list_open_from_url() -> None:

async def test_request_list_open_from_url(httpserver: HTTPServer) -> None:
"""Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
expected_simple_url = 'https://www.someurl.com'
expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
Expand All @@ -111,11 +126,11 @@ class MockedUrlInfo:

mocked_urls = (
MockedUrlInfo(
'https://abc.dev/file.txt',
httpserver.url_for('/file.txt'),
'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1),
),
MockedUrlInfo(
'https://www.abc.dev/file2',
httpserver.url_for('/file2'),
'some stuff{} more stuff{} www.false_positive.com'.format(*expected_remote_urls_2),
),
)
Expand All @@ -132,7 +147,8 @@ class MockedUrlInfo:
},
]
for mocked_url in mocked_urls:
respx.get(mocked_url.url).mock(return_value=Response(200, text=mocked_url.response_text))
path = str(URL(mocked_url.url).path)
httpserver.expect_oneshot_request(path).respond_with_data(status=200, response_data=mocked_url.response_text)

request_list = await RequestList.open(request_list_sources_input=request_list_sources_input)
generated_requests = []
Expand All @@ -143,23 +159,20 @@ class MockedUrlInfo:
assert {generated_request.url for generated_request in generated_requests} == expected_urls


@respx.mock
async def test_request_list_open_from_url_additional_inputs() -> None:
async def test_request_list_open_from_url_additional_inputs(httpserver: HTTPServer) -> None:
"""Test that all generated request properties are correctly populated from input values."""
expected_url = 'https://www.someurl.com'
example_start_url_input: dict[str, Any] = {
'requestsFromUrl': 'https://crawlee.dev/file.txt',
'requestsFromUrl': httpserver.url_for('/file.txt'),
'method': 'POST',
'headers': {'key': 'value'},
'payload': 'some_payload',
'userData': {'another_key': 'another_value'},
}

respx.get(example_start_url_input['requestsFromUrl']).mock(return_value=Response(200, text=expected_url))
httpserver.expect_oneshot_request('/file.txt').respond_with_data(status=200, response_data=expected_url)

request_list = await RequestList.open(request_list_sources_input=[example_start_url_input])
request = await request_list.fetch_next_request()

# Check all properties correctly created for request
assert request
assert request.url == expected_url
Expand Down
39 changes: 38 additions & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
import inspect
import os
from collections import defaultdict
from logging import getLogger
from typing import TYPE_CHECKING, Any, get_type_hints

import httpx
import pytest
from pytest_httpserver import HTTPServer

from apify_client import ApifyClientAsync
from apify_shared.consts import ApifyEnvVars
Expand All @@ -18,7 +21,7 @@
import apify._actor

if TYPE_CHECKING:
from collections.abc import Callable
from collections.abc import Callable, Iterator
from pathlib import Path


Expand Down Expand Up @@ -187,3 +190,37 @@ def memory_storage_client() -> MemoryStorageClient:
configuration.write_metadata = True

return MemoryStorageClient.from_config(configuration)


@pytest.fixture(scope='session')
def make_httpserver() -> Iterator[HTTPServer]:
werkzeug_logger = getLogger('werkzeug')
werkzeug_logger.disabled = True

server = HTTPServer(threaded=True, host='127.0.0.1')
server.start()
yield server
server.clear() # type: ignore[no-untyped-call]
if server.is_running():
server.stop() # type: ignore[no-untyped-call]


@pytest.fixture
def httpserver(make_httpserver: HTTPServer) -> Iterator[HTTPServer]:
server = make_httpserver
yield server
server.clear() # type: ignore[no-untyped-call]


@pytest.fixture
def patched_httpx_client(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
"""Patch httpx client to drop proxy settings."""

class ProxylessAsyncClient(httpx.AsyncClient):
def __init__(self, *args: Any, **kwargs: Any) -> None:
kwargs.pop('proxy', None)
super().__init__(*args, **kwargs)

monkeypatch.setattr(httpx, 'AsyncClient', ProxylessAsyncClient)
yield
monkeypatch.undo()
Loading
Loading