Skip to content

Commit 09dd8ac

Browse files
authored
feat: Unify datetime handling, remove utcnow() (#52)
1 parent 8f83747 commit 09dd8ac

File tree

9 files changed

+56
-61
lines changed

9 files changed

+56
-61
lines changed

src/apify/_utils.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -234,13 +234,6 @@ async def _force_remove(filename: str) -> None:
234234
await remove(filename)
235235

236236

237-
def _json_serializer(obj: Any) -> str: # TODO: Decide how to parse/dump/handle datetimes!
238-
if isinstance(obj, (datetime)):
239-
return obj.isoformat(timespec='milliseconds') + 'Z'
240-
else:
241-
return str(obj)
242-
243-
244237
def _filter_out_none_values_recursively(dictionary: Dict) -> Dict:
245238
"""Return copy of the dictionary, recursively omitting all keys for which values are None."""
246239
return cast(dict, _filter_out_none_values_recursively_internal(dictionary))
@@ -261,7 +254,7 @@ def _filter_out_none_values_recursively_internal(dictionary: Dict, remove_empty_
261254

262255
def _json_dumps(obj: Any) -> str:
263256
"""Dump JSON to a string with the correct settings and serializer."""
264-
return json.dumps(obj, ensure_ascii=False, indent=2, default=_json_serializer)
257+
return json.dumps(obj, ensure_ascii=False, indent=2, default=str)
265258

266259

267260
uuid_regex = re.compile('[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', re.I)

src/apify/actor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import sys
55
import traceback
6-
from datetime import datetime
6+
from datetime import datetime, timezone
77
from types import TracebackType
88
from typing import Any, Awaitable, Callable, Dict, List, Optional, Type, TypeVar, Union, cast
99

@@ -247,7 +247,7 @@ def _get_system_info(self) -> Dict:
247247
memory_usage_bytes = _get_memory_usage_bytes()
248248
# This is in camel case to be compatible with the events from the platform
249249
result = {
250-
'createdAt': datetime.utcnow().isoformat(timespec='milliseconds') + 'Z',
250+
'createdAt': datetime.now(timezone.utc).isoformat(timespec='milliseconds'),
251251
'cpuCurrentUsage': cpu_usage_percent,
252252
'memCurrentBytes': memory_usage_bytes,
253253
}

src/apify/memory_storage/resource_clients/dataset.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
import os
33
import uuid
4-
from datetime import datetime
4+
from datetime import datetime, timezone
55
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional, Tuple, Union
66

77
import aioshutil
@@ -49,9 +49,9 @@ def __init__(self, *, base_storage_directory: str, client: 'MemoryStorage', id:
4949
self._client = client
5050
self._name = name
5151
self._dataset_entries = {}
52-
self._created_at = datetime.utcnow()
53-
self._accessed_at = datetime.utcnow()
54-
self._modified_at = datetime.utcnow()
52+
self._created_at = datetime.now(timezone.utc)
53+
self._accessed_at = datetime.now(timezone.utc)
54+
self._modified_at = datetime.now(timezone.utc)
5555

5656
async def get(self) -> Optional[Dict]:
5757
"""Retrieve the dataset.
@@ -315,10 +315,10 @@ def to_dataset_info(self) -> Dict:
315315

316316
async def _update_timestamps(self, has_been_modified: bool) -> None:
317317
"""Update the timestamps of the dataset."""
318-
self._accessed_at = datetime.utcnow()
318+
self._accessed_at = datetime.now(timezone.utc)
319319

320320
if has_been_modified:
321-
self._modified_at = datetime.utcnow()
321+
self._modified_at = datetime.now(timezone.utc)
322322

323323
dataset_info = self.to_dataset_info()
324324
await _update_metadata(data=dataset_info, entity_directory=self._dataset_directory, write_metadata=self._client._write_metadata)
@@ -370,9 +370,9 @@ def _find_or_cache_dataset_by_possible_id(client: 'MemoryStorage', entry_name_or
370370
id: Union[str, None] = None
371371
name: Union[str, None] = None
372372
item_count = 0
373-
created_at = datetime.utcnow()
374-
accessed_at = datetime.utcnow()
375-
modified_at = datetime.utcnow()
373+
created_at = datetime.now(timezone.utc)
374+
accessed_at = datetime.now(timezone.utc)
375+
modified_at = datetime.now(timezone.utc)
376376
entries: Dict[str, Dict] = {}
377377

378378
has_seen_metadata_file = False
@@ -389,9 +389,9 @@ def _find_or_cache_dataset_by_possible_id(client: 'MemoryStorage', entry_name_or
389389
id = metadata['id']
390390
name = metadata['name']
391391
item_count = metadata['itemCount']
392-
created_at = datetime.strptime(metadata['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
393-
accessed_at = datetime.strptime(metadata['accessedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
394-
modified_at = datetime.strptime(metadata['modifiedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
392+
created_at = datetime.fromisoformat(metadata['createdAt'])
393+
accessed_at = datetime.fromisoformat(metadata['accessedAt'])
394+
modified_at = datetime.fromisoformat(metadata['modifiedAt'])
395395

396396
continue
397397

src/apify/memory_storage/resource_clients/key_value_store.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pathlib
66
import uuid
77
import warnings
8-
from datetime import datetime
8+
from datetime import datetime, timezone
99
from operator import itemgetter
1010
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, Optional, Union
1111

@@ -49,9 +49,9 @@ def __init__(self, *, base_storage_directory: str, client: 'MemoryStorage', id:
4949
self._client = client
5050
self._name = name
5151
self._key_value_entries = {}
52-
self._created_at = datetime.utcnow()
53-
self._accessed_at = datetime.utcnow()
54-
self._modified_at = datetime.utcnow()
52+
self._created_at = datetime.now(timezone.utc)
53+
self._accessed_at = datetime.now(timezone.utc)
54+
self._modified_at = datetime.now(timezone.utc)
5555

5656
async def get(self) -> Optional[Dict]:
5757
"""Retrieve the key-value store.
@@ -315,10 +315,10 @@ def to_key_value_store_info(self) -> Dict:
315315
}
316316

317317
async def _update_timestamps(self, has_been_modified: bool) -> None:
318-
self._accessed_at = datetime.utcnow()
318+
self._accessed_at = datetime.now(timezone.utc)
319319

320320
if has_been_modified:
321-
self._modified_at = datetime.utcnow()
321+
self._modified_at = datetime.now(timezone.utc)
322322

323323
kv_store_info = self.to_key_value_store_info()
324324
await _update_metadata(data=kv_store_info, entity_directory=self._key_value_store_directory, write_metadata=self._client._write_metadata)
@@ -339,9 +339,9 @@ def _find_or_cache_key_value_store_by_possible_id(client: 'MemoryStorage', entry
339339

340340
id: Union[str, None] = None
341341
name: Union[str, None] = None
342-
created_at = datetime.utcnow()
343-
accessed_at = datetime.utcnow()
344-
modified_at = datetime.utcnow()
342+
created_at = datetime.now(timezone.utc)
343+
accessed_at = datetime.now(timezone.utc)
344+
modified_at = datetime.now(timezone.utc)
345345
internal_records: Dict[str, Dict] = {}
346346

347347
# Access the key value store folder
@@ -353,9 +353,9 @@ def _find_or_cache_key_value_store_by_possible_id(client: 'MemoryStorage', entry
353353
metadata = json.load(f)
354354
id = metadata['id']
355355
name = metadata['name']
356-
created_at = datetime.strptime(metadata['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
357-
accessed_at = datetime.strptime(metadata['accessedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
358-
modified_at = datetime.strptime(metadata['modifiedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
356+
created_at = datetime.fromisoformat(metadata['createdAt'])
357+
accessed_at = datetime.fromisoformat(metadata['accessedAt'])
358+
modified_at = datetime.fromisoformat(metadata['modifiedAt'])
359359

360360
continue
361361

src/apify/memory_storage/resource_clients/request_queue.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
import os
33
import uuid
4-
from datetime import datetime
4+
from datetime import datetime, timezone
55
from typing import TYPE_CHECKING, Dict, List, Optional, Union
66

77
import aioshutil
@@ -43,9 +43,9 @@ def __init__(self, *, base_storage_directory: str, client: 'MemoryStorage', id:
4343
self._client = client
4444
self._name = name
4545
self._requests = {}
46-
self._created_at = datetime.utcnow()
47-
self._accessed_at = datetime.utcnow()
48-
self._modified_at = datetime.utcnow()
46+
self._created_at = datetime.now(timezone.utc)
47+
self._accessed_at = datetime.now(timezone.utc)
48+
self._modified_at = datetime.now(timezone.utc)
4949

5050
async def get(self) -> Optional[Dict]:
5151
"""Retrieve the request queue.
@@ -300,10 +300,10 @@ def to_request_queue_info(self) -> Dict:
300300
}
301301

302302
async def _update_timestamps(self, has_been_modified: bool) -> None:
303-
self._accessed_at = datetime.utcnow()
303+
self._accessed_at = datetime.now(timezone.utc)
304304

305305
if has_been_modified:
306-
self._modified_at = datetime.utcnow()
306+
self._modified_at = datetime.now(timezone.utc)
307307

308308
request_queue_info = self.to_request_queue_info()
309309
await _update_metadata(data=request_queue_info, entity_directory=self._request_queue_directory, write_metadata=self._client._write_metadata)
@@ -336,7 +336,7 @@ def _calculate_order_no(self, request: Dict, forefront: Optional[bool]) -> Optio
336336
if request.get('handledAt') is not None:
337337
return None
338338

339-
timestamp = int(round(datetime.utcnow().timestamp()))
339+
timestamp = int(round(datetime.now(timezone.utc).timestamp()))
340340

341341
return -timestamp if forefront else timestamp
342342

@@ -356,9 +356,9 @@ def _find_or_cache_request_queue_by_possible_id(client: 'MemoryStorage', entry_n
356356

357357
id: Union[str, None] = None
358358
name: Union[str, None] = None
359-
created_at = datetime.utcnow()
360-
accessed_at = datetime.utcnow()
361-
modified_at = datetime.utcnow()
359+
created_at = datetime.now(timezone.utc)
360+
accessed_at = datetime.now(timezone.utc)
361+
modified_at = datetime.now(timezone.utc)
362362
handled_request_count = 0
363363
pending_request_count = 0
364364
entries: List[Dict] = []
@@ -372,9 +372,9 @@ def _find_or_cache_request_queue_by_possible_id(client: 'MemoryStorage', entry_n
372372
metadata = json.load(f)
373373
id = metadata['id']
374374
name = metadata['name']
375-
created_at = datetime.strptime(metadata['createdAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
376-
accessed_at = datetime.strptime(metadata['accessedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
377-
modified_at = datetime.strptime(metadata['modifiedAt'], '%Y-%m-%dT%H:%M:%S.%fZ')
375+
created_at = datetime.fromisoformat(metadata['createdAt'])
376+
accessed_at = datetime.fromisoformat(metadata['accessedAt'])
377+
modified_at = datetime.fromisoformat(metadata['modifiedAt'])
378378
handled_request_count = metadata['handledRequestCount']
379379
pending_request_count = metadata['pendingRequestCount']
380380

tests/integration/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,10 @@ if you need something really complex:
113113
async def test_something(self, make_actor: ActorFactory) -> None:
114114
actor_source_files = {
115115
'src/utils.py': """
116-
from datetime import datetime
116+
from datetime import datetime, timezone
117117
118118
def get_current_datetime():
119-
return datetime.utcnow()
119+
return datetime.now(timezone.utc)
120120
""",
121121
'src/main.py': """
122122
import asyncio

tests/integration/test_fixtures.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ async def test_source_files(self, make_actor: ActorFactory) -> None:
5252
test_started_at = datetime.now(timezone.utc)
5353
actor_source_files = {
5454
'src/utils.py': """
55-
from datetime import datetime
55+
from datetime import datetime, timezone
5656
5757
def get_current_datetime():
58-
return datetime.utcnow()
58+
return datetime.now(timezone.utc)
5959
""",
6060
'src/main.py': """
6161
import asyncio
@@ -77,7 +77,7 @@ async def main():
7777
output_record = await actor.last_run().key_value_store().get_record('OUTPUT')
7878
assert output_record is not None
7979

80-
output_datetime = datetime.fromisoformat(output_record['value']).replace(tzinfo=timezone.utc)
80+
output_datetime = datetime.fromisoformat(output_record['value'])
8181
assert output_datetime > test_started_at
8282
assert output_datetime < datetime.now(timezone.utc)
8383

tests/unit/memory_storage/resource_clients/test_request_queue.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from datetime import datetime
2+
from datetime import datetime, timezone
33

44
import pytest
55

@@ -132,7 +132,7 @@ async def test_update_record(request_queue_client: RequestQueueClient) -> None:
132132
rq_info_before_update = await request_queue_client.get()
133133
assert rq_info_before_update is not None
134134
assert rq_info_before_update['pendingRequestCount'] == 1
135-
request_update_info = await request_queue_client.update_request({**request, 'handledAt': datetime.utcnow()})
135+
request_update_info = await request_queue_client.update_request({**request, 'handledAt': datetime.now(timezone.utc)})
136136
assert request_update_info['wasAlreadyHandled'] is False
137137
rq_info_after_update = await request_queue_client.get()
138138
assert rq_info_after_update is not None

tests/unit/test_utils.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import asyncio
2-
import datetime
32
import io
43
import os
54
import uuid
65
from collections import OrderedDict
6+
from datetime import datetime, timezone
77
from enum import Enum
88

99
import pytest
@@ -51,7 +51,7 @@ def test__fetch_and_parse_env_var(monkeypatch: pytest.MonkeyPatch) -> None:
5151
assert _fetch_and_parse_env_var(ApifyEnvVars.MEMORY_MBYTES) == 1024
5252
assert _fetch_and_parse_env_var(ApifyEnvVars.META_ORIGIN) == 'API'
5353
assert _fetch_and_parse_env_var(ApifyEnvVars.STARTED_AT) == \
54-
datetime.datetime(2022, 12, 2, 15, 19, 34, 907000, tzinfo=datetime.timezone.utc)
54+
datetime(2022, 12, 2, 15, 19, 34, 907000, tzinfo=timezone.utc)
5555

5656
assert _fetch_and_parse_env_var('DUMMY_BOOL') == '1' # type: ignore
5757
assert _fetch_and_parse_env_var('DUMMY_DATETIME') == '2022-12-02T15:19:34.907Z' # type: ignore
@@ -96,7 +96,7 @@ def test__maybe_parse_bool() -> None:
9696

9797
def test__maybe_parse_datetime() -> None:
9898
assert _maybe_parse_datetime('2022-12-02T15:19:34.907Z') == \
99-
datetime.datetime(2022, 12, 2, 15, 19, 34, 907000, tzinfo=datetime.timezone.utc)
99+
datetime(2022, 12, 2, 15, 19, 34, 907000, tzinfo=timezone.utc)
100100
assert _maybe_parse_datetime('2022-12-02T15:19:34.907') == '2022-12-02T15:19:34.907'
101101
assert _maybe_parse_datetime('anything') == 'anything'
102102

@@ -275,14 +275,16 @@ def test__json_dumps() -> None:
275275
"number": 456,
276276
"nested": {
277277
"abc": "def"
278-
}
278+
},
279+
"datetime": "2022-01-01 00:00:00+00:00"
279280
}"""
280-
actual = _json_dumps({ # TODO: add a date into the object after datetime serialization format is finalized
281+
actual = _json_dumps({
281282
'string': '123',
282283
'number': 456,
283284
'nested': {
284285
'abc': 'def',
285286
},
287+
'datetime': datetime(2022, 1, 1, tzinfo=timezone.utc),
286288
})
287289
assert actual == expected
288290

@@ -322,11 +324,11 @@ def test__budget_ow() -> None:
322324
_budget_ow({
323325
'a': 123,
324326
'b': 'string',
325-
'c': datetime.datetime.utcnow(),
327+
'c': datetime.now(timezone.utc),
326328
}, {
327329
'a': (int, True),
328330
'b': (str, False),
329-
'c': (datetime.datetime, True),
331+
'c': (datetime, True),
330332
})
331333
with pytest.raises(ValueError, match='required'):
332334
_budget_ow({}, {'id': (str, True)})

0 commit comments

Comments
 (0)