Skip to content

Commit de429dc

Browse files
committed
Apify storage client fixes
1 parent 44d8e09 commit de429dc

File tree

5 files changed

+81
-44
lines changed

5 files changed

+81
-44
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ dependencies = [
3737
"apify-client>=1.12.0",
3838
"apify-shared>=1.3.0",
3939
"cachetools>=5.5.0",
40-
"crawlee@git+https://github.com/apify/crawlee-python.git@0c4cfc9ada06e35f63213e6a937c4e85defcbecf",
40+
"crawlee@git+https://github.com/apify/crawlee-python.git@master",
4141
"cryptography>=42.0.0",
4242
"httpx>=0.27.0",
4343
# TODO: ensure compatibility with the latest version of lazy-object-proxy

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,6 @@ async def open(
105105
f'(api_public_base_url={api_public_base_url}).'
106106
)
107107

108-
if id and name:
109-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
110-
111108
# Create Apify client with the provided token and API URL.
112109
apify_client_async = ApifyClientAsync(
113110
token=token,
@@ -118,23 +115,40 @@ async def open(
118115
)
119116
apify_datasets_client = apify_client_async.datasets()
120117

118+
# If both id and name are provided, raise an error.
119+
if id and name:
120+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
121+
122+
# If id is provided, get the storage by ID.
123+
if id and name is None:
124+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
125+
121126
# If name is provided, get or create the storage by name.
122-
if name is not None and id is None:
127+
if name and id is None:
123128
id = DatasetMetadata.model_validate(
124129
await apify_datasets_client.get_or_create(name=name),
125130
).id
131+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
126132

127133
# If both id and name are None, try to get the default storage ID from environment variables.
128134
if id is None and name is None:
129-
id = getattr(configuration, 'default_dataset_id', None)
135+
id = configuration.default_dataset_id
136+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
130137

131-
if id is None:
132-
raise ValueError(
133-
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
134-
)
138+
# Fetch its metadata.
139+
metadata = await apify_dataset_client.get()
140+
141+
# If metadata is None, it means the storage does not exist, so we create it.
142+
if metadata is None:
143+
id = DatasetMetadata.model_validate(
144+
await apify_datasets_client.get_or_create(),
145+
).id
146+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
135147

136-
# Get the client for the specific storage by ID.
137-
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
148+
# Verify that the storage exists by fetching its metadata again.
149+
metadata = await apify_dataset_client.get()
150+
if metadata is None:
151+
raise ValueError(f'Opening dataset with id={id} and name={name} failed.')
138152

139153
return cls(
140154
api_client=apify_dataset_client,

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,6 @@ async def open(
9797
f'(api_public_base_url={api_public_base_url}).'
9898
)
9999

100-
if id and name:
101-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
102-
103100
# Create Apify client with the provided token and API URL.
104101
apify_client_async = ApifyClientAsync(
105102
token=token,
@@ -110,23 +107,40 @@ async def open(
110107
)
111108
apify_kvss_client = apify_client_async.key_value_stores()
112109

110+
# If both id and name are provided, raise an error.
111+
if id and name:
112+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
113+
114+
# If id is provided, get the storage by ID.
115+
if id and name is None:
116+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
117+
113118
# If name is provided, get or create the storage by name.
114-
if name is not None and id is None:
119+
if name and id is None:
115120
id = ApifyKeyValueStoreMetadata.model_validate(
116121
await apify_kvss_client.get_or_create(name=name),
117122
).id
123+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
118124

119125
# If both id and name are None, try to get the default storage ID from environment variables.
120126
if id is None and name is None:
121-
id = getattr(configuration, 'default_key_value_store_id', None)
127+
id = configuration.default_key_value_store_id
128+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
122129

123-
if id is None:
124-
raise ValueError(
125-
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
126-
)
130+
# Fetch its metadata.
131+
metadata = await apify_kvs_client.get()
132+
133+
# If metadata is None, it means the storage does not exist, so we create it.
134+
if metadata is None:
135+
id = ApifyKeyValueStoreMetadata.model_validate(
136+
await apify_kvss_client.get_or_create(),
137+
).id
138+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
127139

128-
# Get the client for the specific storage by ID.
129-
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
140+
# Verify that the storage exists by fetching its metadata again.
141+
metadata = await apify_kvs_client.get()
142+
if metadata is None:
143+
raise ValueError(f'Opening key-value store with id={id} and name={name} failed.')
130144

131145
return cls(
132146
api_client=apify_kvs_client,

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class ApifyRequestQueueClient(RequestQueueClient):
3939
def __init__(
4040
self,
4141
*,
42-
metadata: RequestQueueMetadata,
4342
api_client: RequestQueueClientAsync,
4443
api_public_base_url: str,
4544
lock: asyncio.Lock,
@@ -48,8 +47,6 @@ def __init__(
4847
4948
Preferably use the `ApifyRequestQueueClient.open` class method to create a new instance.
5049
"""
51-
self._metadata = metadata
52-
5350
self._api_client = api_client
5451
"""The Apify request queue client for API operations."""
5552

@@ -122,9 +119,6 @@ async def open(
122119
f'(api_public_base_url={api_public_base_url}).'
123120
)
124121

125-
if id and name:
126-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
127-
128122
# Create Apify client with the provided token and API URL.
129123
apify_client_async = ApifyClientAsync(
130124
token=token,
@@ -135,29 +129,42 @@ async def open(
135129
)
136130
apify_rqs_client = apify_client_async.request_queues()
137131

132+
# If both id and name are provided, raise an error.
133+
if id and name:
134+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
135+
136+
# If id is provided, get the storage by ID.
137+
if id and name is None:
138+
apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
139+
138140
# If name is provided, get or create the storage by name.
139-
if name is not None and id is None:
141+
if name and id is None:
140142
id = RequestQueueMetadata.model_validate(
141143
await apify_rqs_client.get_or_create(name=name),
142144
).id
145+
apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
143146

144147
# If both id and name are None, try to get the default storage ID from environment variables.
145148
if id is None and name is None:
146-
id = getattr(configuration, 'default_request_queue_id', None)
149+
id = configuration.default_request_queue_id
150+
apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
147151

148-
if id is None:
149-
raise ValueError(
150-
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
151-
)
152+
# Fetch its metadata.
153+
metadata = await apify_rq_client.get()
152154

153-
# Get the client for the specific storage by ID.
154-
apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
155+
# If metadata is None, it means the storage does not exist, so we create it.
156+
if metadata is None:
157+
id = RequestQueueMetadata.model_validate(
158+
await apify_rqs_client.get_or_create(),
159+
).id
160+
apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
155161

156-
# Fetch its metadata.
157-
metadata = RequestQueueMetadata.model_validate(await apify_rq_client.get())
162+
# Verify that the storage exists by fetching its metadata again.
163+
metadata = await apify_rq_client.get()
164+
if metadata is None:
165+
raise ValueError(f'Opening request queue with id={id} and name={name} failed.')
158166

159167
return cls(
160-
metadata=metadata,
161168
api_client=apify_rq_client,
162169
api_public_base_url=api_public_base_url,
163170
lock=asyncio.Lock(),
@@ -477,10 +484,12 @@ async def _list_head(
477484
if cached_request and cached_request.hydrated:
478485
items.append(cached_request.hydrated)
479486

487+
metadata = await self.get_metadata()
488+
480489
return RequestQueueHead(
481490
limit=limit,
482-
had_multiple_clients=self._metadata.had_multiple_clients,
483-
queue_modified_at=self._metadata.modified_at,
491+
had_multiple_clients=metadata.had_multiple_clients,
492+
queue_modified_at=metadata.modified_at,
484493
items=items,
485494
queue_has_locked_requests=self._queue_has_locked_requests,
486495
lock_time=lock_time,

uv.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)