2
2
import json
3
3
from pathlib import Path
4
4
5
+ from more_itertools import flatten
5
6
from typing_extensions import override
6
7
7
8
from crawlee ._consts import METADATA_FILENAME
8
9
from crawlee .storage_clients ._file_system import FileSystemKeyValueStoreClient
10
+ from crawlee .storage_clients .models import KeyValueStoreRecord
9
11
10
12
from apify ._configuration import Configuration
11
13
@@ -24,16 +26,24 @@ async def purge(self) -> None:
24
26
It deletes all files in the key-value store directory, except for the metadata file and
25
27
the `INPUT.json` file. It also updates the metadata to reflect that the store has been purged.
26
28
"""
27
- kvs_input_key = Configuration .get_global_configuration (). input_key
29
+ configuration = Configuration .get_global_configuration ()
28
30
29
31
# First try to find the alternative format of the input file and process it if it exists.
30
32
for file_path in self .path_to_kvs .glob ('*' ):
31
- if file_path .name == f'{ kvs_input_key } .json' :
33
+ if (
34
+ file_path .name in configuration .input_key_candidates
35
+ and file_path .name != configuration .canonical_input_key
36
+ ):
32
37
await self ._process_input_json (file_path )
33
38
34
39
async with self ._lock :
40
+ files_to_keep = set (
41
+ flatten ([key , f'{ key } .{ METADATA_FILENAME } ' ] for key in configuration .input_key_candidates )
42
+ )
43
+ files_to_keep .add (METADATA_FILENAME )
44
+
35
45
for file_path in self .path_to_kvs .glob ('*' ):
36
- if file_path .name in { METADATA_FILENAME , kvs_input_key , f' { kvs_input_key } . { METADATA_FILENAME } ' } :
46
+ if file_path .name in files_to_keep :
37
47
continue
38
48
if file_path .is_file ():
39
49
await asyncio .to_thread (file_path .unlink , missing_ok = True )
@@ -55,3 +65,15 @@ async def _process_input_json(self, path: Path) -> None:
55
65
f .close ()
56
66
await asyncio .to_thread (path .unlink , missing_ok = True )
57
67
await self .set_value (key = path .stem , value = input_data )
68
+
69
+ @override
70
+ async def get_value (self , * , key : str ) -> KeyValueStoreRecord | None :
71
+ configuration = Configuration .get_global_configuration ()
72
+
73
+ if key in configuration .input_key_candidates :
74
+ for candidate in configuration .input_key_candidates :
75
+ value = await super ().get_value (key = candidate )
76
+ if value is not None :
77
+ return value
78
+
79
+ return await super ().get_value (key = key )
0 commit comments