Skip to content

Commit 287a119

Browse files
committed
add async metadata getters
1 parent 3bacab7 commit 287a119

12 files changed

+42
-58
lines changed

docs/03_concepts/code/conditional_actor_charge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ async def main() -> None:
66
# Check the dataset because there might already be items
77
# if the run migrated or was restarted
88
default_dataset = await Actor.open_dataset()
9-
charged_items = default_dataset.metadata.item_count
9+
metadata = await default_dataset.get_metadata()
10+
charged_items = metadata.item_count
1011

1112
# highlight-start
1213
if Actor.get_charging_manager().get_pricing_info().is_pay_per_event:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ dependencies = [
3737
"apify-client>=1.12.0",
3838
"apify-shared>=1.3.0",
3939
"cachetools>=5.5.0",
40-
"crawlee@git+https://github.com/apify/crawlee-python.git@new-storage-clients",
40+
"crawlee@git+https://github.com/apify/crawlee-python.git@9dfac4b8afb8027979d85947f0db303f384b7158",
4141
"cryptography>=42.0.0",
4242
"httpx>=0.27.0",
4343
# TODO: ensure compatibility with the latest version of lazy-object-proxy

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ class ApifyDatasetClient(DatasetClient):
3737
def __init__(
3838
self,
3939
*,
40-
metadata: DatasetMetadata,
4140
api_client: DatasetClientAsync,
4241
api_public_base_url: str,
4342
lock: asyncio.Lock,
@@ -46,8 +45,6 @@ def __init__(
4645
4746
Preferably use the `ApifyDatasetClient.open` class method to create a new instance.
4847
"""
49-
self._metadata = metadata
50-
5148
self._api_client = api_client
5249
"""The Apify dataset client for API operations."""
5350

@@ -57,10 +54,10 @@ def __init__(
5754
self._lock = lock
5855
"""A lock to ensure that only one operation is performed at a time."""
5956

60-
@property
6157
@override
62-
def metadata(self) -> DatasetMetadata:
63-
return self._metadata
58+
async def get_metadata(self) -> DatasetMetadata:
59+
metadata = await self._api_client.get()
60+
return DatasetMetadata.model_validate(metadata)
6461

6562
@classmethod
6663
async def open(
@@ -138,11 +135,7 @@ async def open(
138135
# Get the client for the specific storage by ID.
139136
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
140137

141-
# Fetch its metadata.
142-
metadata = DatasetMetadata.model_validate(await apify_dataset_client.get())
143-
144138
return cls(
145-
metadata=metadata,
146139
api_client=apify_dataset_client,
147140
api_public_base_url=api_public_base_url,
148141
lock=asyncio.Lock(),
@@ -178,8 +171,6 @@ async def payloads_generator() -> AsyncIterator[str]:
178171
items = await self._check_and_serialize(data)
179172
await self._api_client.push_items(items=items)
180173

181-
await self._update_metadata()
182-
183174
@override
184175
async def get_data(
185176
self,
@@ -209,9 +200,7 @@ async def get_data(
209200
flatten=flatten,
210201
view=view,
211202
)
212-
result = DatasetItemsListPage.model_validate(vars(response))
213-
await self._update_metadata()
214-
return result
203+
return DatasetItemsListPage.model_validate(vars(response))
215204

216205
@override
217206
async def iterate_items(
@@ -240,13 +229,6 @@ async def iterate_items(
240229
):
241230
yield item
242231

243-
await self._update_metadata()
244-
245-
async def _update_metadata(self) -> None:
246-
"""Update the dataset metadata file with current information."""
247-
metadata = await self._api_client.get()
248-
self._metadata = DatasetMetadata.model_validate(metadata)
249-
250232
@classmethod
251233
async def _check_and_serialize(cls, item: JsonSerializable, index: int | None = None) -> str:
252234
"""Serialize a given item to JSON, checks its serializability and size against a limit.

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
2929
def __init__(
3030
self,
3131
*,
32-
metadata: KeyValueStoreMetadata,
3332
api_client: KeyValueStoreClientAsync,
3433
api_public_base_url: str,
3534
lock: asyncio.Lock,
@@ -38,8 +37,6 @@ def __init__(
3837
3938
Preferably use the `ApifyKeyValueStoreClient.open` class method to create a new instance.
4039
"""
41-
self._metadata = metadata
42-
4340
self._api_client = api_client
4441
"""The Apify KVS client for API operations."""
4542

@@ -49,10 +46,10 @@ def __init__(
4946
self._lock = lock
5047
"""A lock to ensure that only one operation is performed at a time."""
5148

52-
@property
5349
@override
54-
def metadata(self) -> KeyValueStoreMetadata:
55-
return self._metadata
50+
async def get_metadata(self) -> KeyValueStoreMetadata:
51+
metadata = await self._api_client.get()
52+
return KeyValueStoreMetadata.model_validate(metadata)
5653

5754
@classmethod
5855
async def open(
@@ -130,11 +127,7 @@ async def open(
130127
# Get the client for the specific storage by ID.
131128
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
132129

133-
# Fetch its metadata.
134-
metadata = KeyValueStoreMetadata.model_validate(await apify_kvs_client.get())
135-
136130
return cls(
137-
metadata=metadata,
138131
api_client=apify_kvs_client,
139132
api_public_base_url=api_public_base_url,
140133
lock=asyncio.Lock(),
@@ -229,9 +222,10 @@ async def get_public_url(self, key: str) -> str:
229222
public_url = (
230223
URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key
231224
)
225+
metadata = await self.get_metadata()
232226

233-
if self.metadata.model_extra is not None:
234-
url_signing_secret_key = self.metadata.model_extra.get('urlSigningSecretKey')
227+
if metadata.model_extra is not None:
228+
url_signing_secret_key = metadata.model_extra.get('urlSigningSecretKey')
235229
if url_signing_secret_key is not None:
236230
public_url = public_url.with_query(signature=create_hmac_signature(url_signing_secret_key, key))
237231

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ def __init__(
7070
self._should_check_for_forefront_requests = False
7171
"""Whether to check for forefront requests in the next list_head call."""
7272

73-
@property
7473
@override
75-
def metadata(self) -> RequestQueueMetadata:
76-
return self._metadata
74+
async def get_metadata(self) -> RequestQueueMetadata:
75+
metadata = await self._api_client.get()
76+
return RequestQueueMetadata.model_validate(metadata)
7777

7878
@classmethod
7979
async def open(

tests/integration/test_actor_dataset.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,9 @@ async def main() -> None:
104104
dataset_by_name_2 = await Actor.open_dataset(name=dataset_name)
105105
assert dataset_by_name_1 is dataset_by_name_2
106106

107-
dataset_by_id_1 = await Actor.open_dataset(id=dataset_by_name_1.metadata.id)
108-
dataset_by_id_2 = await Actor.open_dataset(id=dataset_by_name_1.metadata.id)
107+
dataset_1_metadata = await dataset_by_name_1.get_metadata()
108+
dataset_by_id_1 = await Actor.open_dataset(id=dataset_1_metadata.id)
109+
dataset_by_id_2 = await Actor.open_dataset(id=dataset_1_metadata.id)
109110
assert dataset_by_id_1 is dataset_by_name_1
110111
assert dataset_by_id_2 is dataset_by_id_1
111112

@@ -129,7 +130,7 @@ async def test_force_cloud(
129130

130131
async with Actor:
131132
dataset = await Actor.open_dataset(name=dataset_name, force_cloud=True)
132-
dataset_id = dataset.metadata.id
133+
dataset_id = (await dataset.get_metadata()).id
133134

134135
await dataset.push_data(dataset_item)
135136

tests/integration/test_actor_key_value_store.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ async def main() -> None:
4545
kvs_by_name_2 = await Actor.open_key_value_store(name=kvs_name)
4646
assert kvs_by_name_1 is kvs_by_name_2
4747

48-
kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_by_name_1.metadata.id)
49-
kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_by_name_1.metadata.id)
48+
kvs_1_metadata = await kvs_by_name_1.get_metadata()
49+
kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_1_metadata.id)
50+
kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_1_metadata.id)
5051
assert kvs_by_id_1 is kvs_by_name_1
5152
assert kvs_by_id_2 is kvs_by_id_1
5253

@@ -69,7 +70,7 @@ async def test_force_cloud(
6970

7071
async with Actor:
7172
key_value_store = await Actor.open_key_value_store(name=key_value_store_name, force_cloud=True)
72-
key_value_store_id = key_value_store.metadata.id
73+
key_value_store_id = (await key_value_store.get_metadata()).id
7374

7475
await key_value_store.set_value('foo', 'bar')
7576

@@ -209,9 +210,10 @@ async def main() -> None:
209210
record_key = 'public-record-key'
210211

211212
kvs = await Actor.open_key_value_store()
212-
assert kvs.metadata.model_extra is not None
213+
metadata = await kvs.get_metadata()
214+
assert metadata.model_extra is not None
213215

214-
url_signing_secret_key = kvs.metadata.model_extra.get('urlSigningSecretKey')
216+
url_signing_secret_key = metadata.model_extra.get('urlSigningSecretKey')
215217
assert url_signing_secret_key is not None
216218

217219
await kvs.set_value(record_key, {'exposedData': 'test'}, 'application/json')

tests/integration/test_actor_request_queue.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ async def main() -> None:
4545
rq_by_name_2 = await Actor.open_request_queue(name=rq_name)
4646
assert rq_by_name_1 is rq_by_name_2
4747

48-
rq_by_id_1 = await Actor.open_request_queue(id=rq_by_name_1.metadata.id)
49-
rq_by_id_2 = await Actor.open_request_queue(id=rq_by_name_1.metadata.id)
48+
rq_1_metadata = await rq_by_name_1.get_metadata()
49+
rq_by_id_1 = await Actor.open_request_queue(id=rq_1_metadata.id)
50+
rq_by_id_2 = await Actor.open_request_queue(id=rq_1_metadata.id)
5051
assert rq_by_id_1 is rq_by_name_1
5152
assert rq_by_id_2 is rq_by_id_1
5253

@@ -69,7 +70,7 @@ async def test_force_cloud(
6970

7071
async with Actor:
7172
request_queue = await Actor.open_request_queue(name=request_queue_name, force_cloud=True)
72-
request_queue_id = request_queue.metadata.id
73+
request_queue_id = (await request_queue.get_metadata()).id
7374

7475
request_info = await request_queue.add_request(Request.from_url('http://example.com'))
7576

tests/unit/actor/test_actor_dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ async def test_open_dataset_returns_same_references() -> None:
2121
dataset_by_name_2 = await Actor.open_dataset(name=dataset_name)
2222
assert dataset_by_name_1 is dataset_by_name_2
2323

24-
dataset_by_id_1 = await Actor.open_dataset(id=dataset_by_name_1.metadata.id)
25-
dataset_by_id_2 = await Actor.open_dataset(id=dataset_by_name_1.metadata.id)
24+
dataset_1_metadata = await dataset_by_name_1.get_metadata()
25+
dataset_by_id_1 = await Actor.open_dataset(id=dataset_1_metadata.id)
26+
dataset_by_id_2 = await Actor.open_dataset(id=dataset_1_metadata.id)
2627

2728
assert dataset_by_id_1 is dataset_by_name_1
2829
assert dataset_by_id_2 is dataset_by_id_1

tests/unit/actor/test_actor_key_value_store.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ async def test_open_returns_same_references() -> None:
2121
kvs_by_name_2 = await Actor.open_key_value_store(name=kvs_name)
2222
assert kvs_by_name_1 is kvs_by_name_2
2323

24-
kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_by_name_1.metadata.id)
25-
kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_by_name_1.metadata.id)
24+
kvs_1_metadata = await kvs_by_name_1.get_metadata()
25+
kvs_by_id_1 = await Actor.open_key_value_store(id=kvs_1_metadata.id)
26+
kvs_by_id_2 = await Actor.open_key_value_store(id=kvs_1_metadata.id)
2627
assert kvs_by_id_1 is kvs_by_name_1
2728
assert kvs_by_id_2 is kvs_by_id_1
2829

0 commit comments

Comments
 (0)