Skip to content

Commit 5b416ee

Browse files
committed
Better naming + safety measures
1 parent 649227d commit 5b416ee

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

src/apify/storage_clients/_file_system/_key_value_store_client.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import json
3+
import logging
34
from pathlib import Path
45

56
from more_itertools import flatten
@@ -11,6 +12,8 @@
1112

1213
from apify._configuration import Configuration
1314

15+
logger = logging.getLogger(__name__)
16+
1417

1518
class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
1619
"""Apify-specific implementation of the `FileSystemKeyValueStoreClient`.
@@ -31,7 +34,7 @@ async def purge(self) -> None:
3134
# First try to find the alternative format of the input file and process it if it exists.
3235
for file_path in self.path_to_kvs.glob('*'):
3336
if file_path.name in configuration.input_key_candidates:
34-
await self._process_input_json(file_path)
37+
await self._sanitize_input_json(file_path)
3538

3639
async with self._lock:
3740
files_to_keep = set(
@@ -50,18 +53,29 @@ async def purge(self) -> None:
5053
update_modified_at=True,
5154
)
5255

53-
async def _process_input_json(self, path: Path) -> None:
54-
"""Process simple input json file to format expected by the FileSystemKeyValueStoreClient.
56+
async def _sanitize_input_json(self, path: Path) -> None:
57+
"""Transform an input json file to match the naming convention expected by the FileSystemKeyValueStoreClient.
5558
5659
For example: INPUT.json -> INPUT, INPUT.json.metadata
5760
"""
61+
configuration = Configuration.get_global_configuration()
62+
63+
f = None
5864
try:
5965
f = await asyncio.to_thread(path.open)
6066
input_data = json.load(f)
6167
finally:
62-
f.close()
68+
if f is not None:
69+
f.close()
70+
71+
if await self.record_exists(key=configuration.canonical_input_key):
72+
logger.warning(f'Redundant input file found: {path}')
73+
return
74+
75+
logger.info(f'Renaming input file: {path.name} -> {configuration.canonical_input_key}')
76+
6377
await asyncio.to_thread(path.unlink, missing_ok=True)
64-
await self.set_value(key=path.stem, value=input_data)
78+
await self.set_value(key=configuration.canonical_input_key, value=input_data)
6579

6680
@override
6781
async def get_value(self, *, key: str) -> KeyValueStoreRecord | None:

0 commit comments

Comments
 (0)