|
2 | 2 | import re |
3 | 3 | import traceback |
4 | 4 |
|
| 5 | +import crawlee.router |
| 6 | + |
5 | 7 |
|
6 | 8 | def _get_only_innermost_exception(error: BaseException) -> BaseException: |
7 | | - """Get innermost exception by following __cause__ and __context__ attributes of exception.""" |
| 9 | + """Get innermost exception by following __cause__ and __context__ attributes of exception. |
| 10 | +
|
| 11 | + If the innermost exception is UserHandlerTimeoutError, return whatever caused that if possible. |
| 12 | + """ |
| 13 | + if type(error) is crawlee.router.UserHandlerTimeoutError: |
| 14 | + if error.__cause__: |
| 15 | + return error.__cause__ |
| 16 | + if error.__context__: |
| 17 | + return error.__context__ |
| 18 | + return error |
| 19 | + |
8 | 20 | if error.__cause__: |
9 | 21 | return _get_only_innermost_exception(error.__cause__) |
10 | 22 | if error.__context__: |
@@ -43,13 +55,20 @@ def reduce_asyncio_timeout_error_to_relevant_traceback_parts( |
43 | 55 | def _get_traceback_parts_for_innermost_exception(error: Exception) -> list[str]: |
44 | 56 | innermost_error = _get_only_innermost_exception(error) |
45 | 57 | return traceback.format_exception( |
46 | | - type(innermost_error), value=innermost_error, tb=innermost_error.__traceback__, chain=True |
| 58 | + type(innermost_error), value=innermost_error, tb=innermost_error.__traceback__, chain=False |
47 | 59 | ) |
48 | 60 |
|
49 | 61 |
|
50 | 62 | def get_one_line_error_summary_if_possible(error: Exception) -> str: |
51 | 63 | if isinstance(error, asyncio.exceptions.TimeoutError): |
52 | | - most_relevant_part = ',' + reduce_asyncio_timeout_error_to_relevant_traceback_parts(error)[-1] |
| 64 | + relevant_part = reduce_asyncio_timeout_error_to_relevant_traceback_parts(error) |
| 65 | + most_relevant_part = (',' + relevant_part[-1]) if len(relevant_part) else '' |
| 66 | + elif isinstance(error, crawlee.router.UserHandlerTimeoutError): |
| 67 | + # Error is user defined handler. First two lines should be location of the `UserHandlerTimeoutError` in crawlee |
| 68 | + # code and third line the topmost user error |
| 69 | + traceback_parts = _get_traceback_parts_for_innermost_exception(error) |
| 70 | + relevant_index_from_start = 3 |
| 71 | + most_relevant_part = traceback_parts[2] if len(traceback_parts) >= relevant_index_from_start else '' |
53 | 72 | elif 'playwright._impl._errors.Error' in str(error.__class__): |
54 | 73 | # Playwright autogenerated errors are often very long, so we do not try to summarize them at all as they anyway |
55 | 74 | # point to deep internals. |
|
0 commit comments