Skip to content

Commit d04cf6c

Browse files
committed
ApifyFileSystemStorageClient will hvae identical cache key as FileSystemStorageClient
1 parent 0e03e0e commit d04cf6c

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

src/apify/storage_clients/_file_system/_storage_client.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from ._key_value_store_client import ApifyFileSystemKeyValueStoreClient
1111

1212
if TYPE_CHECKING:
13+
from collections.abc import Hashable
14+
1315
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
1416

1517

@@ -21,6 +23,13 @@ class ApifyFileSystemStorageClient(FileSystemStorageClient):
2123
except for the metadata file and the `INPUT.json` file.
2224
"""
2325

26+
@override
27+
def get_storage_client_cache_key(self, configuration: Configuration) -> Hashable:
28+
# Ensure same cache key as the `FileSystemStorageClient` to prevent potential purging of the path twice.
29+
# If `FileSystemStorageClient` opens the storage first, it will be used even in successive open calls by
30+
# `ApifyFileSystemStorageClient` and vice versa.
31+
return FileSystemStorageClient().get_storage_client_cache_key(configuration)
32+
2433
@override
2534
async def create_kvs_client(
2635
self,

tests/unit/test_apify_storages.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44

55
import pytest
66

7+
from crawlee.storage_clients import FileSystemStorageClient
8+
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
79
from crawlee.storage_clients.models import StorageMetadata
810
from crawlee.storages._base import Storage
911

10-
from apify import Configuration
12+
from apify import Actor, Configuration
1113
from apify.storage_clients import ApifyStorageClient
1214
from apify.storage_clients._apify import ApifyDatasetClient, ApifyKeyValueStoreClient, ApifyRequestQueueClient
15+
from apify.storage_clients._file_system import ApifyFileSystemKeyValueStoreClient, ApifyFileSystemStorageClient
1316
from apify.storages import Dataset, KeyValueStore, RequestQueue
1417

1518

@@ -61,3 +64,31 @@ def create_metadata(id: str) -> StorageMetadata:
6164
# Equivalent configuration results in same storage clients.
6265
assert storage_1 is storage_4
6366
assert storage_3 is storage_5
67+
68+
69+
async def test_no_double_purge_for_filesystem_storage_client() -> None:
70+
expected_value = 'some value'
71+
expected_key = 'some key'
72+
73+
async with Actor():
74+
await Actor.set_value(expected_key, expected_value)
75+
# RQ uses KVS under the hood for persistence, so it will try to open same default KVS as it was already opened,
76+
# but based on different client - FileSystemStorageClient.
77+
await Actor.open_request_queue()
78+
assert expected_value == await Actor.get_value(expected_key)
79+
80+
81+
async def test_first_filesystem_storage_client_wins() -> None:
82+
"""Test that when two different FileSystemStorageClient variants are used to open the same storage, they both use
83+
the same client that was used to open the storage first"""
84+
kvs_1 = await KeyValueStore.open(storage_client=ApifyFileSystemStorageClient())
85+
kvs_2 = await KeyValueStore.open(storage_client=FileSystemStorageClient())
86+
87+
kvs_3 = await KeyValueStore.open(name='a', storage_client=FileSystemStorageClient())
88+
kvs_4 = await KeyValueStore.open(name='a', storage_client=ApifyFileSystemStorageClient())
89+
90+
assert kvs_1 is kvs_2
91+
assert type(kvs_2._client) is ApifyFileSystemKeyValueStoreClient
92+
93+
assert kvs_3 is kvs_4
94+
assert type(kvs_4._client) is FileSystemKeyValueStoreClient

0 commit comments

Comments
 (0)