|
4 | 4 |
|
5 | 5 | from __future__ import annotations |
6 | 6 |
|
| 7 | +import asyncio |
7 | 8 | import json |
8 | 9 | import logging |
| 10 | +from datetime import timedelta |
9 | 11 | from typing import TYPE_CHECKING, Any, Literal |
10 | 12 | from unittest import mock |
11 | | -from unittest.mock import Mock |
| 13 | +from unittest.mock import AsyncMock, Mock |
12 | 14 |
|
13 | 15 | import pytest |
14 | 16 |
|
@@ -925,3 +927,43 @@ async def handler(context: PlaywrightCrawlingContext) -> None: |
925 | 927 | await context.enqueue_links(rq_id=queue_id, rq_name=queue_name, rq_alias=queue_alias) |
926 | 928 |
|
927 | 929 | await crawler.run([str(server_url / 'start_enqueue')]) |
| 930 | + |
| 931 | + |
| 932 | +async def test_navigation_timeout_on_slow_page_load(server_url: URL) -> None: |
| 933 | + crawler = PlaywrightCrawler( |
| 934 | + navigation_timeout=timedelta(seconds=1), |
| 935 | + max_request_retries=0, |
| 936 | + ) |
| 937 | + |
| 938 | + request_handler = AsyncMock() |
| 939 | + crawler.router.default_handler(request_handler) |
| 940 | + |
| 941 | + failed_request_handler = AsyncMock() |
| 942 | + crawler.failed_request_handler(failed_request_handler) |
| 943 | + |
| 944 | + result = await crawler.run([str((server_url / 'slow').with_query(delay=2))]) |
| 945 | + |
| 946 | + assert result.requests_failed == 1 |
| 947 | + assert result.requests_finished == 0 |
| 948 | + |
| 949 | + assert request_handler.call_count == 0 |
| 950 | + |
| 951 | + assert failed_request_handler.call_count == 1 |
| 952 | + assert isinstance(failed_request_handler.call_args[0][1], asyncio.TimeoutError) |
| 953 | + |
| 954 | + |
| 955 | +async def test_slow_navigation_does_not_count_toward_handler_timeout(server_url: URL) -> None: |
| 956 | + crawler = PlaywrightCrawler( |
| 957 | + request_handler_timeout=timedelta(seconds=0.5), |
| 958 | + max_request_retries=0, |
| 959 | + ) |
| 960 | + |
| 961 | + request_handler = AsyncMock() |
| 962 | + crawler.router.default_handler(request_handler) |
| 963 | + |
| 964 | + # Navigation takes 1 second (exceeds handler timeout), but should still succeed |
| 965 | + result = await crawler.run([str((server_url / 'slow').with_query(delay=1))]) |
| 966 | + |
| 967 | + assert result.requests_failed == 0 |
| 968 | + assert result.requests_finished == 1 |
| 969 | + assert request_handler.call_count == 1 |
0 commit comments