Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ dev = [
"pydoc-markdown~=4.8.0",
"pytest-asyncio~=1.1.0",
"pytest-cov~=6.2.0",
"pytest-httpserver>=1.1.3",
"pytest-timeout>=2.4.0",
"pytest-xdist~=3.8.0",
"pytest~=8.4.0",
"respx~=0.22.0",
"ruff~=0.12.0",
"setuptools", # setuptools are used by pytest but not explicitly required
"uvicorn[standard]",
"werkzeug~=3.0.0", # Werkzeug is used by httpserver
"yarl~=1.20.0", # yarl is used by crawlee
]

[tool.hatch.build.targets.wheel]
Expand Down
82 changes: 54 additions & 28 deletions tests/unit/actor/test_actor_create_proxy_configuration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any
from unittest.mock import Mock

import httpx
import pytest
Expand All @@ -11,7 +12,10 @@
from apify import Actor

if TYPE_CHECKING:
from respx import MockRouter
from collections.abc import Iterator

from pytest_httpserver import HTTPServer
from werkzeug import Request, Response

from ..conftest import ApifyClientAsyncPatcher

Expand All @@ -24,25 +28,43 @@ def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) ->
return ApifyClientAsync()


@pytest.fixture
def patched_httpx_client(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
"""Patch httpx client to avoid actual network calls."""

class ProxylessAsyncClient(httpx.AsyncClient):
def __init__(self, *args: Any, **kwargs: Any) -> None:
kwargs.pop('proxy', None)
super().__init__(*args, **kwargs)

monkeypatch.setattr(httpx, 'AsyncClient', ProxylessAsyncClient)
yield
monkeypatch.undo()


@pytest.mark.usefixtures('patched_httpx_client')
async def test_basic_proxy_configuration_creation(
monkeypatch: pytest.MonkeyPatch,
respx_mock: MockRouter,
httpserver: HTTPServer,
patched_apify_client: ApifyClientAsync,
) -> None:
dummy_proxy_status_url = 'http://dummy-proxy-status-url.com'
dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/')
monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN')
monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url)

route = respx_mock.get(dummy_proxy_status_url)
route.mock(
httpx.Response(
200,
json={
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
)
call_mock = Mock()

def request_handler(request: Request, response: Response) -> Response:
call_mock(request.url)
return response

httpserver.expect_oneshot_request('/').with_post_hook(request_handler).respond_with_json(
{
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
status=200,
)

groups = ['GROUP1', 'GROUP2']
Expand All @@ -58,32 +80,36 @@ async def test_basic_proxy_configuration_creation(
assert proxy_configuration._country_code == country_code

assert len(patched_apify_client.calls['user']['get']) == 1 # type: ignore[attr-defined]
assert len(route.calls) == 1
assert call_mock.call_count == 1

await Actor.exit()


@pytest.mark.usefixtures('patched_httpx_client')
async def test_proxy_configuration_with_actor_proxy_input(
monkeypatch: pytest.MonkeyPatch,
respx_mock: MockRouter,
httpserver: HTTPServer,
patched_apify_client: ApifyClientAsync,
) -> None:
dummy_proxy_status_url = 'http://dummy-proxy-status-url.com'
dummy_proxy_status_url = str(httpserver.url_for('/')).removesuffix('/')
dummy_proxy_url = 'http://dummy-proxy.com:8000'

monkeypatch.setenv(ApifyEnvVars.TOKEN.value, 'DUMMY_TOKEN')
monkeypatch.setenv(ApifyEnvVars.PROXY_STATUS_URL.value, dummy_proxy_status_url)

route = respx_mock.get(dummy_proxy_status_url)
route.mock(
httpx.Response(
200,
json={
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
)
call_mock = Mock()

def request_handler(request: Request, response: Response) -> Response:
call_mock(request.url)
return response

httpserver.expect_request('/').with_post_hook(request_handler).respond_with_json(
{
'connected': True,
'connectionError': None,
'isManInTheMiddle': True,
},
status=200,
)

await Actor.init()
Expand Down Expand Up @@ -138,6 +164,6 @@ async def test_proxy_configuration_with_actor_proxy_input(
)

assert len(patched_apify_client.calls['user']['get']) == 2 # type: ignore[attr-defined]
assert len(route.calls) == 2
assert call_mock.call_count == 2

await Actor.exit()
57 changes: 35 additions & 22 deletions tests/unit/actor/test_request_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@

import re
from dataclasses import dataclass
from typing import Any, get_args
from typing import TYPE_CHECKING, Any, get_args
from unittest.mock import Mock

import pytest
import respx
from httpx import Response
from yarl import URL

from crawlee._request import UserData
from crawlee._types import HttpMethod

from apify.storages._request_list import URL_NO_COMMAS_REGEX, RequestList

if TYPE_CHECKING:
from pytest_httpserver import HTTPServer
from werkzeug import Request, Response


@pytest.mark.parametrize(
argnames='request_method',
Expand Down Expand Up @@ -67,37 +71,48 @@ async def test_request_list_open_request_types(
assert request.headers.root == optional_input.get('headers', {})


@respx.mock
async def test_request_list_open_from_url_correctly_send_requests() -> None:
async def test_request_list_open_from_url_correctly_send_requests(httpserver: HTTPServer) -> None:
"""Test that requests are sent to expected urls."""
request_list_sources_input: list[dict[str, Any]] = [
{
'requestsFromUrl': 'https://abc.dev/file.txt',
'requestsFromUrl': httpserver.url_for('/file.txt'),
'method': 'GET',
},
{
'requestsFromUrl': 'https://www.abc.dev/file2',
'requestsFromUrl': httpserver.url_for('/file2'),
'method': 'PUT',
},
{
'requestsFromUrl': 'https://www.something.som',
'requestsFromUrl': httpserver.url_for('/something'),
'method': 'POST',
'headers': {'key': 'value'},
'payload': 'some_payload',
'userData': {'another_key': 'another_value'},
},
]

routes = [respx.get(entry['requestsFromUrl']) for entry in request_list_sources_input]
routes: dict[str, Mock] = {}

def request_handler(request: Request, response: Response) -> Response:
routes[request.url]()
return response

for entry in request_list_sources_input:
path = str(URL(entry['requestsFromUrl']).path)
httpserver.expect_oneshot_request(path).with_post_hook(request_handler).respond_with_data(status=200)
routes[entry['requestsFromUrl']] = Mock()

await RequestList.open(request_list_sources_input=request_list_sources_input)

for route in routes:
assert route.called
assert len(routes) == len(request_list_sources_input)

for entity in request_list_sources_input:
entity_url = entity['requestsFromUrl']
assert entity_url in routes
assert routes[entity_url].called

@respx.mock
async def test_request_list_open_from_url() -> None:

async def test_request_list_open_from_url(httpserver: HTTPServer) -> None:
"""Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
expected_simple_url = 'https://www.someurl.com'
expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
Expand All @@ -111,11 +126,11 @@ class MockedUrlInfo:

mocked_urls = (
MockedUrlInfo(
'https://abc.dev/file.txt',
httpserver.url_for('/file.txt'),
'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1),
),
MockedUrlInfo(
'https://www.abc.dev/file2',
httpserver.url_for('/file2'),
'some stuff{} more stuff{} www.false_positive.com'.format(*expected_remote_urls_2),
),
)
Expand All @@ -132,7 +147,8 @@ class MockedUrlInfo:
},
]
for mocked_url in mocked_urls:
respx.get(mocked_url.url).mock(return_value=Response(200, text=mocked_url.response_text))
path = str(URL(mocked_url.url).path)
httpserver.expect_oneshot_request(path).respond_with_data(status=200, response_data=mocked_url.response_text)

request_list = await RequestList.open(request_list_sources_input=request_list_sources_input)
generated_requests = []
Expand All @@ -143,23 +159,20 @@ class MockedUrlInfo:
assert {generated_request.url for generated_request in generated_requests} == expected_urls


@respx.mock
async def test_request_list_open_from_url_additional_inputs() -> None:
async def test_request_list_open_from_url_additional_inputs(httpserver: HTTPServer) -> None:
"""Test that all generated request properties are correctly populated from input values."""
expected_url = 'https://www.someurl.com'
example_start_url_input: dict[str, Any] = {
'requestsFromUrl': 'https://crawlee.dev/file.txt',
'requestsFromUrl': httpserver.url_for('/file.txt'),
'method': 'POST',
'headers': {'key': 'value'},
'payload': 'some_payload',
'userData': {'another_key': 'another_value'},
}

respx.get(example_start_url_input['requestsFromUrl']).mock(return_value=Response(200, text=expected_url))
httpserver.expect_oneshot_request('/file.txt').respond_with_data(status=200, response_data=expected_url)

request_list = await RequestList.open(request_list_sources_input=[example_start_url_input])
request = await request_list.fetch_next_request()

# Check all properties correctly created for request
assert request
assert request.url == expected_url
Expand Down
24 changes: 23 additions & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import inspect
import os
from collections import defaultdict
from logging import getLogger
from typing import TYPE_CHECKING, Any, get_type_hints

import pytest
from pytest_httpserver import HTTPServer

from apify_client import ApifyClientAsync
from apify_shared.consts import ApifyEnvVars
Expand All @@ -18,7 +20,7 @@
import apify._actor

if TYPE_CHECKING:
from collections.abc import Callable
from collections.abc import Callable, Iterator
from pathlib import Path


Expand Down Expand Up @@ -187,3 +189,23 @@ def memory_storage_client() -> MemoryStorageClient:
configuration.write_metadata = True

return MemoryStorageClient.from_config(configuration)


@pytest.fixture(scope='session')
def make_httpserver() -> Iterator[HTTPServer]:
werkzeug_logger = getLogger('werkzeug')
werkzeug_logger.disabled = True

server = HTTPServer(threaded=True, host='127.0.0.1')
server.start()
yield server
server.clear() # type: ignore[no-untyped-call]
if server.is_running():
server.stop() # type: ignore[no-untyped-call]


@pytest.fixture
def httpserver(make_httpserver: HTTPServer) -> Iterator[HTTPServer]:
server = make_httpserver
yield server
server.clear() # type: ignore[no-untyped-call]
Loading
Loading