|
2 | 2 |
|
3 | 3 | import asyncio
|
4 | 4 | import base64
|
| 5 | +import contextlib |
| 6 | +import io |
| 7 | +import json |
5 | 8 | import json as jsonlib
|
6 | 9 | import random
|
| 10 | +import re |
7 | 11 | import time
|
8 | 12 | from collections.abc import Callable
|
| 13 | +from datetime import datetime, timezone |
| 14 | +from enum import Enum |
9 | 15 | from http import HTTPStatus
|
10 | 16 | from typing import TYPE_CHECKING, Any, TypeVar, cast
|
11 | 17 |
|
12 | 18 | import impit
|
13 |
| -from apify_shared.utils import ( |
14 |
| - is_content_type_json, |
15 |
| - is_content_type_text, |
16 |
| - is_content_type_xml, |
17 |
| - is_file_or_bytes, |
18 |
| - maybe_extract_enum_member_value, |
19 |
| -) |
20 | 19 |
|
21 | 20 | from apify_client.errors import InvalidResponseBodyError
|
22 | 21 |
|
|
29 | 28 |
|
30 | 29 | PARSE_DATE_FIELDS_MAX_DEPTH = 3
|
31 | 30 | PARSE_DATE_FIELDS_KEY_SUFFIX = 'At'
|
32 |
| - |
33 | 31 | RECORD_NOT_FOUND_EXCEPTION_TYPES = ['record-not-found', 'record-or-token-not-found']
|
34 | 32 |
|
35 | 33 | T = TypeVar('T')
|
36 | 34 | StopRetryingType = Callable[[], None]
|
| 35 | +ListOrDict = TypeVar('ListOrDict', list, dict) |
| 36 | + |
| 37 | + |
| 38 | +def filter_out_none_values_recursively(dictionary: dict) -> dict: |
| 39 | + """Return copy of the dictionary, recursively omitting all keys for which values are None.""" |
| 40 | + return cast('dict', filter_out_none_values_recursively_internal(dictionary)) |
| 41 | + |
| 42 | + |
| 43 | +def filter_out_none_values_recursively_internal( |
| 44 | + dictionary: dict, |
| 45 | + *, |
| 46 | + remove_empty_dicts: bool | None = None, |
| 47 | +) -> dict | None: |
| 48 | + """Recursively filters out None values from a dictionary. |
| 49 | +
|
| 50 | + Unfortunately, it's necessary to have an internal function for the correct result typing, |
| 51 | + without having to create complicated overloads |
| 52 | + """ |
| 53 | + result = {} |
| 54 | + for k, v in dictionary.items(): |
| 55 | + if isinstance(v, dict): |
| 56 | + v = filter_out_none_values_recursively_internal( # noqa: PLW2901 |
| 57 | + v, remove_empty_dicts=remove_empty_dicts is True or remove_empty_dicts is None |
| 58 | + ) |
| 59 | + if v is not None: |
| 60 | + result[k] = v |
| 61 | + if not result and remove_empty_dicts: |
| 62 | + return None |
| 63 | + return result |
| 64 | + |
| 65 | + |
| 66 | +def parse_date_fields(data: ListOrDict, max_depth: int = PARSE_DATE_FIELDS_MAX_DEPTH) -> ListOrDict: |
| 67 | + """Recursively parse date fields in a list or dictionary up to the specified depth.""" |
| 68 | + if max_depth < 0: |
| 69 | + return data |
| 70 | + |
| 71 | + if isinstance(data, list): |
| 72 | + return [parse_date_fields(item, max_depth - 1) for item in data] |
| 73 | + |
| 74 | + if isinstance(data, dict): |
| 75 | + |
| 76 | + def parse(key: str, value: object) -> object: |
| 77 | + parsed_value = value |
| 78 | + if key.endswith(PARSE_DATE_FIELDS_KEY_SUFFIX) and isinstance(value, str): |
| 79 | + with contextlib.suppress(ValueError): |
| 80 | + parsed_value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ').replace(tzinfo=timezone.utc) |
| 81 | + elif isinstance(value, dict): |
| 82 | + parsed_value = parse_date_fields(value, max_depth - 1) |
| 83 | + elif isinstance(value, list): |
| 84 | + parsed_value = parse_date_fields(value, max_depth) |
| 85 | + return parsed_value |
| 86 | + |
| 87 | + return {key: parse(key, value) for (key, value) in data.items()} |
| 88 | + |
| 89 | + return data |
| 90 | + |
| 91 | + |
| 92 | +def is_content_type_json(content_type: str) -> bool: |
| 93 | + """Check if the given content type is JSON.""" |
| 94 | + return bool(re.search(r'^application/json', content_type, flags=re.IGNORECASE)) |
| 95 | + |
| 96 | + |
| 97 | +def is_content_type_xml(content_type: str) -> bool: |
| 98 | + """Check if the given content type is XML.""" |
| 99 | + return bool(re.search(r'^application/.*xml$', content_type, flags=re.IGNORECASE)) |
| 100 | + |
| 101 | + |
| 102 | +def is_content_type_text(content_type: str) -> bool: |
| 103 | + """Check if the given content type is text.""" |
| 104 | + return bool(re.search(r'^text/', content_type, flags=re.IGNORECASE)) |
| 105 | + |
| 106 | + |
| 107 | +def is_file_or_bytes(value: Any) -> bool: |
| 108 | + """Check if the input value is a file-like object or bytes. |
| 109 | +
|
| 110 | + The check for IOBase is not ideal, it would be better to use duck typing, |
| 111 | + but then the check would be super complex, judging from how the 'requests' library does it. |
| 112 | + This way should be good enough for the vast majority of use cases, if it causes issues, we can improve it later. |
| 113 | + """ |
| 114 | + return isinstance(value, (bytes, bytearray, io.IOBase)) |
| 115 | + |
| 116 | + |
| 117 | +def json_dumps(obj: Any) -> str: |
| 118 | + """Dump JSON to a string with the correct settings and serializer.""" |
| 119 | + return json.dumps(obj, ensure_ascii=False, indent=2, default=str) |
| 120 | + |
| 121 | + |
| 122 | +def maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any: |
| 123 | + """Extract the value of an enumeration member if it is an Enum, otherwise return the original value.""" |
| 124 | + if isinstance(maybe_enum_member, Enum): |
| 125 | + return maybe_enum_member.value |
| 126 | + return maybe_enum_member |
37 | 127 |
|
38 | 128 |
|
39 | 129 | def to_safe_id(id: str) -> str:
|
|
0 commit comments