Skip to content

Commit 094512f

Browse files
committed
add memory storage from_config constructor
1 parent 6ee1190 commit 094512f

File tree

7 files changed

+67
-45
lines changed

7 files changed

+67
-45
lines changed

docs/guides/code/request_storage/purge_explicitly_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55

66
async def main() -> None:
7-
storage_client = MemoryStorageClient()
7+
storage_client = MemoryStorageClient.from_config()
88
# highlight-next-line
99
await storage_client.purge_on_start()
1010

src/crawlee/_service_locator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def get_storage_client(self) -> BaseStorageClient:
7575
if self._storage_client is None:
7676
from crawlee.memory_storage_client import MemoryStorageClient
7777

78-
self._storage_client = MemoryStorageClient()
78+
self._storage_client = MemoryStorageClient.from_config()
7979

8080
return self._storage_client
8181

src/crawlee/memory_storage_client/_memory_storage_client.py

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
from typing_extensions import override
1212

13-
from crawlee import service_locator
1413
from crawlee._utils.docs import docs_group
1514
from crawlee.base_storage_client import BaseStorageClient
15+
from crawlee.configuration import Configuration
1616
from crawlee.memory_storage_client._dataset_client import DatasetClient
1717
from crawlee.memory_storage_client._dataset_collection_client import DatasetCollectionClient
1818
from crawlee.memory_storage_client._key_value_store_client import KeyValueStoreClient
@@ -22,7 +22,7 @@
2222

2323
if TYPE_CHECKING:
2424
from crawlee.base_storage_client._types import ResourceClient
25-
from crawlee.configuration import Configuration
25+
2626

2727
TResourceClient = TypeVar('TResourceClient', DatasetClient, KeyValueStoreClient, RequestQueueClient)
2828

@@ -57,38 +57,31 @@ class MemoryStorageClient(BaseStorageClient):
5757

5858
def __init__(
5959
self,
60-
configuration: Configuration | None = None,
6160
*,
62-
write_metadata: bool | None = None,
63-
persist_storage: bool | None = None,
64-
storage_dir: str | None = None,
65-
default_request_queue_id: str | None = None,
66-
default_key_value_store_id: str | None = None,
67-
default_dataset_id: str | None = None,
61+
write_metadata: bool,
62+
persist_storage: bool,
63+
storage_dir: str,
64+
default_request_queue_id: str,
65+
default_key_value_store_id: str,
66+
default_dataset_id: str,
6867
) -> None:
6968
"""A default constructor.
7069
71-
All parameters are optional and can be set either directly or via the configuration object. The defaults
72-
are taken from the configuration object.
73-
7470
Args:
75-
configuration: The configuration object.
7671
write_metadata: Whether to write metadata to the storage.
7772
persist_storage: Whether to persist the storage.
7873
storage_dir: Path to the storage directory.
7974
default_request_queue_id: The default request queue ID.
8075
default_key_value_store_id: The default key-value store ID.
8176
default_dataset_id: The default dataset ID.
8277
"""
83-
config = configuration or service_locator.get_configuration()
84-
8578
# Set the internal attributes.
86-
self._write_metadata = write_metadata or config.write_metadata
87-
self._persist_storage = persist_storage or config.persist_storage
88-
self._storage_dir = storage_dir or config.storage_dir
89-
self._default_request_queue_id = default_request_queue_id or config.default_request_queue_id
90-
self._default_key_value_store_id = default_key_value_store_id or config.default_key_value_store_id
91-
self._default_dataset_id = default_dataset_id or config.default_dataset_id
79+
self._write_metadata = write_metadata
80+
self._persist_storage = persist_storage
81+
self._storage_dir = storage_dir
82+
self._default_request_queue_id = default_request_queue_id
83+
self._default_key_value_store_id = default_key_value_store_id
84+
self._default_dataset_id = default_dataset_id
9285

9386
self.datasets_handled: list[DatasetClient] = []
9487
self.key_value_stores_handled: list[KeyValueStoreClient] = []
@@ -97,6 +90,26 @@ def __init__(
9790
self._purged_on_start = False # Indicates whether a purge was already performed on this instance.
9891
self._purge_lock = asyncio.Lock()
9992

93+
@classmethod
94+
def from_config(cls, config: Configuration | None = None) -> MemoryStorageClient:
95+
"""Create a new instance based on the provided configuration.
96+
97+
All the memory storage client parameters are taken from the configuration object.
98+
99+
Args:
100+
config: The configuration object.
101+
"""
102+
config = config or Configuration.get_global_configuration()
103+
104+
return cls(
105+
write_metadata=config.write_metadata,
106+
persist_storage=config.persist_storage,
107+
storage_dir=config.storage_dir,
108+
default_request_queue_id=config.default_request_queue_id,
109+
default_key_value_store_id=config.default_key_value_store_id,
110+
default_dataset_id=config.default_dataset_id,
111+
)
112+
100113
@property
101114
def write_metadata(self) -> bool:
102115
"""Whether to write metadata to the storage."""

tests/unit/_memory_storage_client/test_memory_storage_client.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
async def test_write_metadata(tmp_path: Path) -> None:
1818
dataset_name = 'test'
1919
dataset_no_metadata_name = 'test-no-metadata'
20-
ms = MemoryStorageClient(
20+
ms = MemoryStorageClient.from_config(
2121
Configuration(
2222
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
2323
write_metadata=True,
2424
),
2525
)
26-
ms_no_metadata = MemoryStorageClient(
26+
ms_no_metadata = MemoryStorageClient.from_config(
2727
Configuration(
2828
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
2929
write_metadata=False,
@@ -48,7 +48,7 @@ async def test_write_metadata(tmp_path: Path) -> None:
4848
],
4949
)
5050
async def test_persist_storage(persist_storage: bool, tmp_path: Path) -> None: # noqa: FBT001
51-
ms = MemoryStorageClient(
51+
ms = MemoryStorageClient.from_config(
5252
Configuration(
5353
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
5454
persist_storage=persist_storage,
@@ -82,18 +82,20 @@ async def test_persist_storage(persist_storage: bool, tmp_path: Path) -> None:
8282

8383
def test_persist_storage_set_to_false_via_string_env_var(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
8484
monkeypatch.setenv('CRAWLEE_PERSIST_STORAGE', 'false')
85-
ms = MemoryStorageClient(Configuration(crawlee_storage_dir=str(tmp_path))) # type: ignore[call-arg]
85+
ms = MemoryStorageClient.from_config(
86+
Configuration(crawlee_storage_dir=str(tmp_path)), # type: ignore[call-arg]
87+
)
8688
assert ms.persist_storage is False
8789

8890

8991
def test_persist_storage_set_to_false_via_numeric_env_var(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
9092
monkeypatch.setenv('CRAWLEE_PERSIST_STORAGE', '0')
91-
ms = MemoryStorageClient(Configuration(crawlee_storage_dir=str(tmp_path))) # type: ignore[call-arg]
93+
ms = MemoryStorageClient.from_config(Configuration(crawlee_storage_dir=str(tmp_path))) # type: ignore[call-arg]
9294
assert ms.persist_storage is False
9395

9496

9597
def test_persist_storage_true_via_constructor_arg(tmp_path: Path) -> None:
96-
ms = MemoryStorageClient(
98+
ms = MemoryStorageClient.from_config(
9799
Configuration(
98100
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
99101
persist_storage=True,
@@ -104,20 +106,24 @@ def test_persist_storage_true_via_constructor_arg(tmp_path: Path) -> None:
104106

105107
def test_default_write_metadata_behavior(tmp_path: Path) -> None:
106108
# Default behavior
107-
ms = MemoryStorageClient(Configuration(crawlee_storage_dir=str(tmp_path))) # type: ignore[call-arg]
109+
ms = MemoryStorageClient.from_config(
110+
Configuration(crawlee_storage_dir=str(tmp_path)), # type: ignore[call-arg]
111+
)
108112
assert ms.write_metadata is True
109113

110114

111115
def test_write_metadata_set_to_false_via_env_var(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
112116
# Test if env var changes write_metadata to False
113117
monkeypatch.setenv('CRAWLEE_WRITE_METADATA', 'false')
114-
ms = MemoryStorageClient(Configuration(crawlee_storage_dir=str(tmp_path))) # type: ignore[call-arg]
118+
ms = MemoryStorageClient.from_config(
119+
Configuration(crawlee_storage_dir=str(tmp_path)), # type: ignore[call-arg]
120+
)
115121
assert ms.write_metadata is False
116122

117123

118124
def test_write_metadata_false_via_constructor_arg_overrides_env_var(tmp_path: Path) -> None:
119125
# Test if constructor arg takes precedence over env var value
120-
ms = MemoryStorageClient(
126+
ms = MemoryStorageClient.from_config(
121127
Configuration(
122128
write_metadata=False,
123129
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
@@ -127,7 +133,7 @@ def test_write_metadata_false_via_constructor_arg_overrides_env_var(tmp_path: Pa
127133

128134

129135
async def test_purge_datasets(tmp_path: Path) -> None:
130-
ms = MemoryStorageClient(
136+
ms = MemoryStorageClient.from_config(
131137
Configuration(
132138
write_metadata=True,
133139
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
@@ -150,7 +156,7 @@ async def test_purge_datasets(tmp_path: Path) -> None:
150156

151157

152158
async def test_purge_key_value_stores(tmp_path: Path) -> None:
153-
ms = MemoryStorageClient(
159+
ms = MemoryStorageClient.from_config(
154160
Configuration(
155161
write_metadata=True,
156162
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
@@ -185,7 +191,7 @@ async def test_purge_key_value_stores(tmp_path: Path) -> None:
185191

186192

187193
async def test_purge_request_queues(tmp_path: Path) -> None:
188-
ms = MemoryStorageClient(
194+
ms = MemoryStorageClient.from_config(
189195
Configuration(
190196
write_metadata=True,
191197
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
@@ -207,7 +213,7 @@ async def test_purge_request_queues(tmp_path: Path) -> None:
207213

208214

209215
async def test_not_implemented_method(tmp_path: Path) -> None:
210-
ms = MemoryStorageClient(
216+
ms = MemoryStorageClient.from_config(
211217
Configuration(
212218
write_metadata=True,
213219
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
@@ -230,21 +236,21 @@ async def test_default_storage_path_used(monkeypatch: pytest.MonkeyPatch) -> Non
230236
monkeypatch.delenv('CRAWLEE_STORAGE_DIR', raising=False)
231237

232238
# Initialize the service locator with default configuration
233-
msc = MemoryStorageClient()
239+
msc = MemoryStorageClient.from_config()
234240
assert msc.storage_dir == './storage'
235241

236242

237243
async def test_storage_path_from_env_var_overrides_default(monkeypatch: pytest.MonkeyPatch) -> None:
238244
# We expect the env var to override the default value
239245
monkeypatch.setenv('CRAWLEE_STORAGE_DIR', './env_var_storage_dir')
240246
service_locator.set_configuration(Configuration())
241-
ms = MemoryStorageClient()
247+
ms = MemoryStorageClient.from_config()
242248
assert ms.storage_dir == './env_var_storage_dir'
243249

244250

245251
async def test_parametrized_storage_path_overrides_env_var() -> None:
246252
# We expect the parametrized value to be used
247-
ms = MemoryStorageClient(
253+
ms = MemoryStorageClient.from_config(
248254
Configuration(crawlee_storage_dir='./parametrized_storage_dir'), # type: ignore[call-arg]
249255
)
250256
assert ms.storage_dir == './parametrized_storage_dir'

tests/unit/conftest.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from yarl import URL
1212

1313
from crawlee import service_locator
14+
from crawlee.configuration import Configuration
1415
from crawlee.memory_storage_client import MemoryStorageClient
1516
from crawlee.proxy_configuration import ProxyInfo
1617
from crawlee.storages import _creation_management
@@ -164,8 +165,10 @@ async def disabled_proxy(proxy_info: ProxyInfo) -> AsyncGenerator[ProxyInfo, Non
164165
@pytest.fixture
165166
def memory_storage_client(tmp_path: Path) -> MemoryStorageClient:
166167
"""A fixture for testing the memory storage client and its resource clients."""
167-
return MemoryStorageClient(
168-
write_metadata=True,
168+
config = Configuration(
169169
persist_storage=True,
170-
storage_dir=str(tmp_path),
170+
write_metadata=True,
171+
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
171172
)
173+
174+
return MemoryStorageClient.from_config(config)

tests/unit/test_configuration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ async def test_storage_not_persisted_when_disabled(tmp_path: Path, httpbin: URL)
3131
write_metadata=False,
3232
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
3333
)
34-
storage_client = MemoryStorageClient(config)
34+
storage_client = MemoryStorageClient.from_config(config)
3535
service_locator.set_storage_client(storage_client)
3636

3737
crawler = HttpCrawler()
@@ -53,7 +53,7 @@ async def test_storage_persisted_when_enabled(tmp_path: Path, httpbin: URL) -> N
5353
write_metadata=True,
5454
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
5555
)
56-
storage_client = MemoryStorageClient(config)
56+
storage_client = MemoryStorageClient.from_config(config)
5757
service_locator.set_storage_client(storage_client)
5858

5959
crawler = HttpCrawler()

tests/unit/test_service_locator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def test_storage_client() -> None:
4040
default_storage_client = service_locator.get_storage_client()
4141
assert isinstance(default_storage_client, MemoryStorageClient)
4242

43-
custom_storage_client = MemoryStorageClient()
43+
custom_storage_client = MemoryStorageClient.from_config()
4444
service_locator.set_storage_client(custom_storage_client)
4545
storage_client = service_locator.get_storage_client()
4646
assert storage_client == custom_storage_client

0 commit comments

Comments
 (0)