|
18 | 18 | import pytest |
19 | 19 |
|
20 | 20 | from crawlee import ConcurrencySettings, Glob, service_locator |
21 | | -from crawlee._request import Request |
| 21 | +from crawlee._request import Request, RequestState |
22 | 22 | from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, HttpMethod |
23 | 23 | from crawlee._utils.robots import RobotsTxtFile |
24 | 24 | from crawlee.configuration import Configuration |
@@ -1768,3 +1768,37 @@ async def handler(_: BasicCrawlingContext) -> None: |
1768 | 1768 |
|
1769 | 1769 | # Wait for crawler to finish |
1770 | 1770 | await crawler_task |
| 1771 | + |
| 1772 | + |
| 1773 | +async def test_new_request_error_handler() -> None: |
| 1774 | + """Test that error in new_request_handler is handled properly.""" |
| 1775 | + queue = await RequestQueue.open() |
| 1776 | + crawler = BasicCrawler( |
| 1777 | + request_manager=queue, |
| 1778 | + ) |
| 1779 | + |
| 1780 | + request = Request.from_url('https://a.placeholder.com') |
| 1781 | + |
| 1782 | + @crawler.router.default_handler |
| 1783 | + async def handler(context: BasicCrawlingContext) -> None: |
| 1784 | + if '|test' in context.request.unique_key: |
| 1785 | + return |
| 1786 | + raise ValueError('This error should not be handled by error handler') |
| 1787 | + |
| 1788 | + @crawler.error_handler |
| 1789 | + async def error_handler(context: BasicCrawlingContext, error: Exception) -> Request | None: |
| 1790 | + return Request.from_url( |
| 1791 | + context.request.url, |
| 1792 | + unique_key=f'{context.request.unique_key}|test', |
| 1793 | + ) |
| 1794 | + |
| 1795 | + await crawler.run([request]) |
| 1796 | + |
| 1797 | + check_original_request = await queue.get_request(request.unique_key) |
| 1798 | + check_error_request = await queue.get_request(f'{request.unique_key}|test') |
| 1799 | + |
| 1800 | + assert check_original_request is not None |
| 1801 | + assert check_original_request.state == RequestState.ERROR_HANDLER |
| 1802 | + |
| 1803 | + assert check_error_request is not None |
| 1804 | + assert check_error_request.state == RequestState.REQUEST_HANDLER |
0 commit comments