Skip to content

Commit fd0193f

Browse files
authored
chore: improve http crawler tests and cookies tests (#962)
### Description - Improve cookies tests for more stability - Update `parametrize` in http crawler tests for use `pytest.param`
1 parent d357841 commit fd0193f

File tree

2 files changed

+26
-24
lines changed

2 files changed

+26
-24
lines changed

tests/unit/crawlers/_http/test_http_crawler.py

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -133,16 +133,14 @@ async def test_handles_redirects(
133133
@pytest.mark.parametrize(
134134
('additional_http_error_status_codes', 'ignore_http_error_status_codes', 'expected_number_error'),
135135
[
136-
([], [], 1),
137-
([403], [], 3),
138-
([], [403], 0),
139-
([403], [403], 3),
140-
],
141-
ids=[
142-
'default_behavior', # error without retry for all 4xx statuses
143-
'additional_status_codes', # make retry for codes in `additional_http_error_status_codes` list
144-
'ignore_error_status_codes', # take as successful status codes from the `ignore_http_error_status_codes` list
145-
'additional_and_ignore', # check precedence for `additional_http_error_status_codes`
136+
# error without retry for all 4xx statuses
137+
pytest.param([], [], 1, id='default_behavior'),
138+
# make retry for codes in `additional_http_error_status_codes` list
139+
pytest.param([403], [], 3, id='additional_status_codes'),
140+
# take as successful status codes from the `ignore_http_error_status_codes` list
141+
pytest.param([], [403], 0, id='ignore_error_status_codes'),
142+
# check precedence for `additional_http_error_status_codes`
143+
pytest.param([403], [403], 3, id='additional_and_ignore'),
146144
],
147145
)
148146
async def test_handles_client_errors(
@@ -258,9 +256,7 @@ async def test_http_status_statistics(crawler: HttpCrawler, server: respx.MockRo
258256

259257

260258
@pytest.mark.parametrize(
261-
'http_client_class',
262-
[CurlImpersonateHttpClient, HttpxHttpClient],
263-
ids=['curl', 'httpx'],
259+
'http_client_class', [pytest.param(CurlImpersonateHttpClient, id='curl'), pytest.param(HttpxHttpClient, id='httpx')]
264260
)
265261
async def test_sending_payload_as_raw_data(http_client_class: type[BaseHttpClient], httpbin: URL) -> None:
266262
http_client = http_client_class()
@@ -295,9 +291,7 @@ async def request_handler(context: HttpCrawlingContext) -> None:
295291

296292

297293
@pytest.mark.parametrize(
298-
'http_client_class',
299-
[CurlImpersonateHttpClient, HttpxHttpClient],
300-
ids=['curl', 'httpx'],
294+
'http_client_class', [pytest.param(CurlImpersonateHttpClient, id='curl'), pytest.param(HttpxHttpClient, id='httpx')]
301295
)
302296
async def test_sending_payload_as_form_data(http_client_class: type[BaseHttpClient], httpbin: URL) -> None:
303297
http_client = http_client_class()
@@ -327,9 +321,7 @@ async def request_handler(context: HttpCrawlingContext) -> None:
327321

328322

329323
@pytest.mark.parametrize(
330-
'http_client_class',
331-
[CurlImpersonateHttpClient, HttpxHttpClient],
332-
ids=['curl', 'httpx'],
324+
'http_client_class', [pytest.param(CurlImpersonateHttpClient, id='curl'), pytest.param(HttpxHttpClient, id='httpx')]
333325
)
334326
async def test_sending_payload_as_json(http_client_class: type[BaseHttpClient], httpbin: URL) -> None:
335327
http_client = http_client_class()
@@ -360,9 +352,7 @@ async def request_handler(context: HttpCrawlingContext) -> None:
360352

361353

362354
@pytest.mark.parametrize(
363-
'http_client_class',
364-
[CurlImpersonateHttpClient, HttpxHttpClient],
365-
ids=['curl', 'httpx'],
355+
'http_client_class', [pytest.param(CurlImpersonateHttpClient, id='curl'), pytest.param(HttpxHttpClient, id='httpx')]
366356
)
367357
async def test_sending_url_query_params(http_client_class: type[BaseHttpClient], httpbin: URL) -> None:
368358
http_client = http_client_class()
@@ -435,8 +425,14 @@ async def test_isolation_cookies(http_client_class: type[BaseHttpClient], httpbi
435425
response_cookies: dict[str, dict[str, str]] = {}
436426

437427
crawler = HttpCrawler(
438-
session_pool=SessionPool(max_pool_size=1),
428+
session_pool=SessionPool(
429+
max_pool_size=1,
430+
create_session_settings={
431+
'max_error_score': 50,
432+
},
433+
),
439434
http_client=http_client,
435+
max_request_retries=10,
440436
concurrency_settings=ConcurrencySettings(max_concurrency=1),
441437
)
442438

tests/unit/crawlers/_playwright/test_playwright_crawler.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,15 @@ async def test_isolation_cookies(*, use_incognito_pages: bool, httpbin: URL) ->
270270
response_cookies: dict[str, dict[str, str]] = {}
271271

272272
crawler = PlaywrightCrawler(
273-
session_pool=SessionPool(max_pool_size=1),
273+
session_pool=SessionPool(
274+
max_pool_size=1,
275+
create_session_settings={
276+
'max_error_score': 50,
277+
},
278+
),
274279
use_incognito_pages=use_incognito_pages,
275280
concurrency_settings=ConcurrencySettings(max_concurrency=1),
281+
max_request_retries=10,
276282
)
277283

278284
@crawler.router.default_handler

0 commit comments

Comments
 (0)