Skip to content

Commit 14c5395

Browse files
committed
Add special caching for ApifyClient
TODO: Merge update from NDU storages
1 parent 5bf51f7 commit 14c5395

File tree

5 files changed

+83
-2625
lines changed

5 files changed

+83
-2625
lines changed

src/apify/storage_clients/_apify/_storage_client.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,72 +9,71 @@
99
from ._dataset_client import ApifyDatasetClient
1010
from ._key_value_store_client import ApifyKeyValueStoreClient
1111
from ._request_queue_client import ApifyRequestQueueClient
12+
from apify._configuration import Configuration as ApifyConfiguration
1213
from apify._utils import docs_group
1314

1415
if TYPE_CHECKING:
15-
from crawlee.configuration import Configuration
16+
from collections.abc import Hashable
17+
18+
from crawlee.configuration import Configuration as CrawleeConfiguration
1619

1720

1821
@docs_group('Storage clients')
1922
class ApifyStorageClient(StorageClient):
2023
"""Apify storage client."""
2124

25+
# This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
26+
_lsp_violation_error_message_template = (
27+
'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
28+
)
29+
30+
@override
31+
def get_additional_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
32+
if isinstance(configuration, ApifyConfiguration):
33+
return f'{configuration.api_base_url},{configuration.token}'
34+
raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
35+
2236
@override
2337
async def create_dataset_client(
2438
self,
2539
*,
2640
id: str | None = None,
2741
name: str | None = None,
28-
configuration: Configuration | None = None,
42+
alias: str | None = None,
43+
configuration: CrawleeConfiguration | None = None,
2944
) -> ApifyDatasetClient:
30-
# Import here to avoid circular imports.
31-
from apify import Configuration as ApifyConfiguration # noqa: PLC0415
32-
3345
configuration = configuration or ApifyConfiguration.get_global_configuration()
3446
if isinstance(configuration, ApifyConfiguration):
3547
return await ApifyDatasetClient.open(id=id, name=name, configuration=configuration)
3648

37-
raise TypeError(
38-
f'Expected "configuration" to be an instance of "apify.Configuration", '
39-
f'but got {type(configuration).__name__} instead.'
40-
)
49+
raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
4150

4251
@override
4352
async def create_kvs_client(
4453
self,
4554
*,
4655
id: str | None = None,
4756
name: str | None = None,
48-
configuration: Configuration | None = None,
57+
alias: str | None = None,
58+
configuration: CrawleeConfiguration | None = None,
4959
) -> ApifyKeyValueStoreClient:
50-
# Import here to avoid circular imports.
51-
from apify import Configuration as ApifyConfiguration # noqa: PLC0415
52-
5360
configuration = configuration or ApifyConfiguration.get_global_configuration()
5461
if isinstance(configuration, ApifyConfiguration):
5562
return await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
5663

57-
raise TypeError(
58-
f'Expected "configuration" to be an instance of "apify.Configuration", '
59-
f'but got {type(configuration).__name__} instead.'
60-
)
64+
raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
6165

6266
@override
6367
async def create_rq_client(
6468
self,
6569
*,
6670
id: str | None = None,
6771
name: str | None = None,
68-
configuration: Configuration | None = None,
72+
alias: str | None = None,
73+
configuration: CrawleeConfiguration | None = None,
6974
) -> ApifyRequestQueueClient:
70-
# Import here to avoid circular imports.
71-
from apify import Configuration as ApifyConfiguration # noqa: PLC0415
72-
7375
configuration = configuration or ApifyConfiguration.get_global_configuration()
7476
if isinstance(configuration, ApifyConfiguration):
7577
return await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration)
7678

77-
raise TypeError(
78-
f'Expected "configuration" to be an instance of "apify.Configuration", '
79-
f'but got {type(configuration).__name__} instead.'
80-
)
79+
raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))

src/apify/storage_clients/_file_system/_storage_client.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@ async def create_kvs_client(
2727
*,
2828
id: str | None = None,
2929
name: str | None = None,
30+
alias: str | None = None,
3031
configuration: Configuration | None = None,
3132
) -> FileSystemKeyValueStoreClient:
3233
configuration = configuration or Configuration.get_global_configuration()
33-
client = await ApifyFileSystemKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
34+
client = await ApifyFileSystemKeyValueStoreClient.open(
35+
id=id, name=name, alias=alias, configuration=configuration
36+
)
3437
await self._purge_if_needed(client, configuration)
3538
return client
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from unittest import mock
2+
from unittest.mock import AsyncMock
3+
4+
import pytest
5+
6+
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
7+
from crawlee.storages._base import Storage
8+
9+
from apify import Configuration
10+
from apify.storage_clients import ApifyStorageClient
11+
from apify.storage_clients._apify import ApifyDatasetClient, ApifyKeyValueStoreClient, ApifyRequestQueueClient
12+
13+
14+
@pytest.mark.parametrize(
15+
('storage', '_storage_client'),
16+
[
17+
(Dataset, ApifyDatasetClient),
18+
(KeyValueStore, ApifyKeyValueStoreClient),
19+
(RequestQueue, ApifyRequestQueueClient),
20+
],
21+
)
22+
async def test_get_additional_cache_key(
23+
storage: Storage, _storage_client: ApifyDatasetClient | ApifyKeyValueStoreClient | ApifyRequestQueueClient
24+
) -> None:
25+
"""Test that Storages based on `ApifyStorageClient` include `token` and `api_base_url` in additional cache key."""
26+
storage_names = iter(['1', '2', '3', '1', '3'])
27+
28+
apify_storage_client = ApifyStorageClient()
29+
30+
config_1 = Configuration(token='a')
31+
config_2 = Configuration(token='b')
32+
config_3 = Configuration(token='a', api_base_url='https://super_custom_api.com')
33+
34+
config_4 = Configuration(token='a')
35+
config_5 = Configuration(token='a', api_base_url='https://super_custom_api.com')
36+
37+
mocked_open = AsyncMock(spec=_storage_client.open)
38+
mocked_open.get_metadata = AsyncMock(storage_names)
39+
40+
with mock.patch.object(_storage_client, 'open', mocked_open):
41+
storage_1 = await storage.open(storage_client=apify_storage_client, configuration=config_1)
42+
storage_2 = await storage.open(storage_client=apify_storage_client, configuration=config_2)
43+
storage_3 = await storage.open(storage_client=apify_storage_client, configuration=config_3)
44+
storage_4 = await storage.open(storage_client=apify_storage_client, configuration=config_4)
45+
storage_5 = await storage.open(storage_client=apify_storage_client, configuration=config_5)
46+
47+
# Different configuration results in different storage clients.
48+
assert storage_1 is not storage_2
49+
assert storage_1 is not storage_3
50+
assert storage_2 is not storage_3
51+
52+
# Equivalent configuration results in same storage clients.
53+
assert storage_1 is storage_4
54+
assert storage_3 is storage_5

tests/unit/storage_clients/test_file_system.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ async def test_purge_preserves_input_file_and_metadata() -> None:
2222
kvs_storage_client = await ApifyFileSystemKeyValueStoreClient.open(
2323
id=None,
2424
name='test-kvs',
25+
alias=None,
2526
configuration=configuration,
2627
)
2728

0 commit comments

Comments
 (0)