Skip to content

Commit dc49aa9

Browse files
authored
chore: add uvicorn server for tests (#1092)
### Description - Add uvicorn server for testing instead of making requests to external services - Adapted all tests that made requests to any external sites, for example `crawlee.dev` - Adapted almost all tests using `httpbin`. ~~Excludes:~~ - ~~test_send_request_with_proxy_disabled[curl, httpx] - proxy-py, instead of a `ProxyError` error, returns a 400 response when accessing `uvicorn`. The test does not make a real request to `httpbin.org`~~ - ~~test_proxy_disabled[curl, httpx] - same~~ - ~~test_http_2[httpx] - `uvicorn` only supports HTTP/1.1~~ - The `httpbin` fixture has been removed. The remaining tests make requests to `https://apify.com/. This is sufficient for testing, but it is much more stable. ### Issues - Closes: #929 - Closes: #743
1 parent 12b13e3 commit dc49aa9

16 files changed

+937
-348
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ dev = [
110110
"types-colorama~=0.4.15.20240106",
111111
"types-psutil~=7.0.0.20250218",
112112
"types-python-dateutil~=2.9.0.20240316",
113+
"uvicorn[standard]~=0.34.0",
113114
]
114115

115116
[tool.hatch.build.targets.wheel]

tests/__init__.py

Whitespace-only changes.

tests/unit/__init__.py

Whitespace-only changes.

tests/unit/browsers/test_browser_pool.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,49 +10,49 @@
1010
from yarl import URL
1111

1212

13-
async def test_default_plugin_new_page_creation(httpbin: URL) -> None:
13+
async def test_default_plugin_new_page_creation(server_url: URL) -> None:
1414
async with BrowserPool() as browser_pool:
1515
page_1 = await browser_pool.new_page()
16-
await page_1.page.goto(str(httpbin / 'get'))
16+
await page_1.page.goto(str(server_url))
1717
assert page_1.browser_type == 'chromium'
18-
assert page_1.page.url == str(httpbin / 'get')
18+
assert page_1.page.url == str(server_url)
1919
assert '<html' in await page_1.page.content() # there is some HTML content
2020
assert browser_pool.total_pages_count == 1
2121

2222
page_2 = await browser_pool.new_page()
23-
await page_2.page.goto(str(httpbin / 'status/200'))
23+
await page_2.page.goto(str(server_url / 'status/200'))
2424
assert page_2.browser_type == 'chromium'
25-
assert page_2.page.url == str(httpbin / 'status/200')
25+
assert page_2.page.url == str(server_url / 'status/200')
2626
assert '<html' in await page_1.page.content() # there is some HTML content
2727
assert browser_pool.total_pages_count == 2
2828

2929
await page_1.page.close()
3030
await page_2.page.close()
3131

3232

33-
async def test_multiple_plugins_new_page_creation(httpbin: URL) -> None:
33+
async def test_multiple_plugins_new_page_creation(server_url: URL) -> None:
3434
plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium')
3535
plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox')
3636

3737
async with BrowserPool([plugin_chromium, plugin_firefox]) as browser_pool:
3838
assert browser_pool.plugins == [plugin_chromium, plugin_firefox]
3939

4040
page_1 = await browser_pool.new_page()
41-
await page_1.page.goto(str(httpbin / 'get'))
41+
await page_1.page.goto(str(server_url))
4242
assert page_1.browser_type == 'chromium'
43-
assert page_1.page.url == str(httpbin / 'get')
43+
assert page_1.page.url == str(server_url)
4444
assert '<html' in await page_1.page.content() # there is some HTML content
4545

4646
page_2 = await browser_pool.new_page()
47-
await page_2.page.goto(str(httpbin / 'headers'))
47+
await page_2.page.goto(str(server_url / 'headers'))
4848
assert page_2.browser_type == 'firefox'
49-
assert page_2.page.url == str(httpbin / 'headers')
49+
assert page_2.page.url == str(server_url / 'headers')
5050
assert '<html' in await page_2.page.content() # there is some HTML content
5151

5252
page_3 = await browser_pool.new_page()
53-
await page_3.page.goto(str(httpbin / 'user-agent'))
53+
await page_3.page.goto(str(server_url / 'user-agent'))
5454
assert page_3.browser_type == 'chromium'
55-
assert page_3.page.url == str(httpbin / 'user-agent')
55+
assert page_3.page.url == str(server_url / 'user-agent')
5656
assert '<html' in await page_3.page.content() # there is some HTML content
5757

5858
await page_1.page.close()
@@ -62,7 +62,7 @@ async def test_multiple_plugins_new_page_creation(httpbin: URL) -> None:
6262
assert browser_pool.total_pages_count == 3
6363

6464

65-
async def test_new_page_with_each_plugin(httpbin: URL) -> None:
65+
async def test_new_page_with_each_plugin(server_url: URL) -> None:
6666
plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium')
6767
plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox')
6868

@@ -74,12 +74,12 @@ async def test_new_page_with_each_plugin(httpbin: URL) -> None:
7474
assert pages[0].browser_type == 'chromium'
7575
assert pages[1].browser_type == 'firefox'
7676

77-
await pages[0].page.goto(str(httpbin / 'get'))
78-
assert pages[0].page.url == str(httpbin / 'get')
77+
await pages[0].page.goto(str(server_url))
78+
assert pages[0].page.url == str(server_url)
7979
assert '<html' in await pages[0].page.content() # there is some HTML content
8080

81-
await pages[1].page.goto(str(httpbin / 'headers'))
82-
assert pages[1].page.url == str(httpbin / 'headers')
81+
await pages[1].page.goto(str(server_url / 'headers'))
82+
assert pages[1].page.url == str(server_url / 'headers')
8383
assert '<html' in await pages[1].page.content()
8484

8585
for page in pages:
@@ -88,16 +88,16 @@ async def test_new_page_with_each_plugin(httpbin: URL) -> None:
8888
assert browser_pool.total_pages_count == 2
8989

9090

91-
async def test_with_default_plugin_constructor(httpbin: URL) -> None:
91+
async def test_with_default_plugin_constructor(server_url: URL) -> None:
9292
async with BrowserPool.with_default_plugin(headless=True, browser_type='firefox') as browser_pool:
9393
assert len(browser_pool.plugins) == 1
9494
assert isinstance(browser_pool.plugins[0], PlaywrightBrowserPlugin)
9595

9696
page = await browser_pool.new_page()
9797
assert page.browser_type == 'firefox'
9898

99-
await page.page.goto(str(httpbin / 'get'))
100-
assert page.page.url == str(httpbin / 'get')
99+
await page.page.goto(str(server_url))
100+
assert page.page.url == str(server_url)
101101
assert '<html' in await page.page.content() # there is some HTML content
102102

103103
await page.page.close()
@@ -119,13 +119,13 @@ async def test_new_page_with_invalid_plugin() -> None:
119119
await browser_pool.new_page(browser_plugin=plugin_2)
120120

121121

122-
async def test_resource_management(httpbin: URL) -> None:
122+
async def test_resource_management(server_url: URL) -> None:
123123
playwright_plugin = PlaywrightBrowserPlugin(browser_type='chromium')
124124

125125
async with BrowserPool([playwright_plugin]) as browser_pool:
126126
page = await browser_pool.new_page()
127-
await page.page.goto(str(httpbin / 'get'))
128-
assert page.page.url == str(httpbin / 'get')
127+
await page.page.goto(str(server_url))
128+
assert page.page.url == str(server_url)
129129
assert '<html' in await page.page.content() # there is some HTML content
130130
assert browser_pool.total_pages_count == 1
131131

@@ -153,10 +153,10 @@ async def test_methods_raise_error_when_not_active() -> None:
153153
assert browser_pool.active is True
154154

155155

156-
async def test_with_plugin_contains_page_options(httpbin: URL) -> None:
156+
async def test_with_plugin_contains_page_options(server_url: URL) -> None:
157157
plugin = PlaywrightBrowserPlugin(browser_new_context_options={'user_agent': 'My Best User-Agent'})
158158
async with BrowserPool(plugins=[plugin]) as browser_pool:
159159
test_page = await browser_pool.new_page()
160-
await test_page.page.goto(str(httpbin / 'user-agent'))
160+
await test_page.page.goto(str(server_url / 'user-agent'))
161161
assert 'My Best User-Agent' in await test_page.page.content()
162162
await test_page.page.close()

tests/unit/browsers/test_playwright_browser_controller.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ async def test_initial_state(browser: Browser) -> None:
4646
assert controller.has_free_capacity
4747

4848

49-
async def test_open_and_close_page(controller: PlaywrightBrowserController, httpbin: URL) -> None:
49+
async def test_open_and_close_page(controller: PlaywrightBrowserController, server_url: URL) -> None:
5050
page = await controller.new_page()
51-
await page.goto(str(httpbin))
51+
await page.goto(str(server_url))
5252

5353
assert page in controller.pages
5454
assert controller.pages_count == 1

tests/unit/browsers/test_playwright_browser_plugin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ async def test_initial_state() -> None:
3434
assert plugin.max_open_pages_per_browser == 10
3535

3636

37-
async def test_new_browser(plugin: PlaywrightBrowserPlugin, httpbin: URL) -> None:
37+
async def test_new_browser(plugin: PlaywrightBrowserPlugin, server_url: URL) -> None:
3838
browser_controller = await plugin.new_browser()
3939

4040
assert browser_controller.is_browser_connected
4141

4242
page = await browser_controller.new_page()
43-
await page.goto(str(httpbin))
43+
await page.goto(str(server_url))
4444

4545
await page.close()
4646
await browser_controller.close()

tests/unit/conftest.py

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,26 @@
55

66
import logging
77
import os
8-
from typing import TYPE_CHECKING, Any, Callable, Optional, cast
8+
from typing import TYPE_CHECKING, Callable, Optional, cast
99

1010
import pytest
1111
from proxy import Proxy
12-
from yarl import URL
12+
from uvicorn.config import Config
1313

1414
from crawlee import service_locator
1515
from crawlee.configuration import Configuration
1616
from crawlee.fingerprint_suite._browserforge_adapter import get_available_header_network
1717
from crawlee.proxy_configuration import ProxyInfo
1818
from crawlee.storage_clients import MemoryStorageClient
1919
from crawlee.storages import KeyValueStore, _creation_management
20+
from tests.unit.server import TestServer, app, serve_in_thread
2021

2122
if TYPE_CHECKING:
22-
from collections.abc import AsyncGenerator
23+
from collections.abc import AsyncGenerator, Iterator
2324
from pathlib import Path
2425

26+
from yarl import URL
27+
2528

2629
@pytest.fixture
2730
def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]:
@@ -96,41 +99,6 @@ def _set_crawler_log_level(pytestconfig: pytest.Config, monkeypatch: pytest.Monk
9699
monkeypatch.setattr(_log_config, 'get_configured_log_level', lambda: getattr(logging, loglevel.upper()))
97100

98101

99-
@pytest.fixture
100-
def httpbin() -> URL:
101-
class URLWrapper:
102-
def __init__(self, url: URL) -> None:
103-
self.url = url
104-
105-
def __getattr__(self, name: str) -> Any:
106-
result = getattr(self.url, name)
107-
return_type = getattr(result, '__annotations__', {}).get('return', None)
108-
109-
if return_type == 'URL':
110-
111-
def wrapper(*args: Any, **kwargs: Any) -> URLWrapper:
112-
return URLWrapper(result(*args, **kwargs))
113-
114-
return wrapper
115-
116-
return result
117-
118-
def with_path(
119-
self, path: str, *, keep_query: bool = True, keep_fragment: bool = True, encoded: bool = False
120-
) -> URLWrapper:
121-
return URLWrapper(
122-
URL.with_path(self.url, path, keep_query=keep_query, keep_fragment=keep_fragment, encoded=encoded)
123-
)
124-
125-
def __truediv__(self, other: Any) -> URLWrapper:
126-
return self.with_path(other)
127-
128-
def __str__(self) -> str:
129-
return str(self.url)
130-
131-
return cast('URL', URLWrapper(URL(os.environ.get('HTTPBIN_URL', 'https://httpbin.org'))))
132-
133-
134102
@pytest.fixture
135103
async def proxy_info(unused_tcp_port: int) -> ProxyInfo:
136104
username = 'user'
@@ -199,3 +167,32 @@ async def key_value_store() -> AsyncGenerator[KeyValueStore, None]:
199167
kvs = await KeyValueStore.open()
200168
yield kvs
201169
await kvs.drop()
170+
171+
172+
@pytest.fixture(scope='session')
173+
def http_server(unused_tcp_port_factory: Callable[[], int]) -> Iterator[TestServer]:
174+
"""Create and start an HTTP test server."""
175+
config = Config(app=app, lifespan='off', loop='asyncio', port=unused_tcp_port_factory())
176+
server = TestServer(config=config)
177+
yield from serve_in_thread(server)
178+
179+
180+
@pytest.fixture(scope='session')
181+
def server_url(http_server: TestServer) -> URL:
182+
"""Provide the base URL of the test server."""
183+
return http_server.url
184+
185+
186+
# It is needed only in some tests, so we use the standard `scope=function`
187+
@pytest.fixture
188+
def redirect_http_server(unused_tcp_port_factory: Callable[[], int]) -> Iterator[TestServer]:
189+
"""Create and start an HTTP test server."""
190+
config = Config(app=app, lifespan='off', loop='asyncio', port=unused_tcp_port_factory())
191+
server = TestServer(config=config)
192+
yield from serve_in_thread(server)
193+
194+
195+
@pytest.fixture
196+
def redirect_server_url(redirect_http_server: TestServer) -> URL:
197+
"""Provide the base URL of the test server."""
198+
return redirect_http_server.url

0 commit comments

Comments
 (0)