Skip to content

Commit 4270cb9

Browse files
committed
Fixes
1 parent 9f0470a commit 4270cb9

File tree

9 files changed

+101
-52
lines changed

9 files changed

+101
-52
lines changed

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import asyncio
44
import warnings
5+
from datetime import datetime
56
from logging import getLogger
67
from typing import TYPE_CHECKING, Any
78

@@ -65,7 +66,18 @@ def __init__(
6566
@override
6667
async def get_metadata(self) -> DatasetMetadata:
6768
metadata = await self._api_client.get()
68-
return DatasetMetadata.model_validate(metadata)
69+
70+
if metadata is None:
71+
raise ValueError('Failed to retrieve dataset metadata.')
72+
73+
return DatasetMetadata(
74+
id=metadata.id,
75+
name=metadata.name,
76+
created_at=datetime.fromisoformat(metadata.created_at.replace('Z', '+00:00')),
77+
modified_at=datetime.fromisoformat(metadata.modified_at.replace('Z', '+00:00')),
78+
accessed_at=datetime.fromisoformat(metadata.accessed_at.replace('Z', '+00:00')),
79+
item_count=int(metadata.item_count),
80+
)
6981

7082
@classmethod
7183
async def open(

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import asyncio
44
import warnings
5+
from datetime import datetime
56
from logging import getLogger
67
from typing import TYPE_CHECKING, Any
78

@@ -11,7 +12,7 @@
1112
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
1213

1314
from ._api_client_creation import create_storage_api_client
14-
from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
15+
from ._models import ApifyKeyValueStoreMetadata
1516

1617
if TYPE_CHECKING:
1718
from collections.abc import AsyncIterator
@@ -54,7 +55,18 @@ def __init__(
5455
@override
5556
async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
5657
metadata = await self._api_client.get()
57-
return ApifyKeyValueStoreMetadata.model_validate(metadata)
58+
59+
if metadata is None:
60+
raise ValueError('Failed to retrieve dataset metadata.')
61+
62+
return ApifyKeyValueStoreMetadata(
63+
id=metadata.id,
64+
name=metadata.name,
65+
created_at=datetime.fromisoformat(metadata.created_at.replace('Z', '+00:00')),
66+
modified_at=datetime.fromisoformat(metadata.modified_at.replace('Z', '+00:00')),
67+
accessed_at=datetime.fromisoformat(metadata.accessed_at.replace('Z', '+00:00')),
68+
url_signing_secret_key=metadata.url_signing_secret_key,
69+
)
5870

5971
@classmethod
6072
async def open(
@@ -143,14 +155,13 @@ async def iterate_keys(
143155
count = 0
144156

145157
while True:
146-
response = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key)
147-
list_key_page = KeyValueStoreListKeysPage.model_validate(response)
158+
list_key_page = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key)
148159

149160
for item in list_key_page.items:
150161
# Convert KeyValueStoreKeyInfo to KeyValueStoreRecordMetadata
151162
record_metadata = KeyValueStoreRecordMetadata(
152163
key=item.key,
153-
size=item.size,
164+
size=int(item.size),
154165
content_type='application/octet-stream', # Content type not available from list_keys
155166
)
156167
yield record_metadata

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,31 +78,34 @@ async def get_metadata(self) -> ApifyRequestQueueMetadata:
7878
Returns:
7979
Request queue metadata with accurate counts and timestamps, combining API data with local estimates.
8080
"""
81-
response = await self._api_client.get()
81+
metadata = await self._api_client.get()
8282

83-
if response is None:
83+
if metadata is None:
8484
raise ValueError('Failed to fetch request queue metadata from the API.')
8585

86-
total_request_count = int(response.total_request_count)
87-
handled_request_count = int(response.handled_request_count)
88-
pending_request_count = int(response.pending_request_count)
89-
created_at = datetime.fromisoformat(response.created_at)
90-
modified_at = datetime.fromisoformat(response.modified_at)
91-
accessed_at = datetime.fromisoformat(response.accessed_at)
86+
total_request_count = int(metadata.total_request_count)
87+
handled_request_count = int(metadata.handled_request_count)
88+
pending_request_count = int(metadata.pending_request_count)
89+
created_at = datetime.fromisoformat(metadata.created_at.replace('Z', '+00:00'))
90+
modified_at = datetime.fromisoformat(metadata.modified_at.replace('Z', '+00:00'))
91+
accessed_at = datetime.fromisoformat(metadata.accessed_at.replace('Z', '+00:00'))
9292

9393
# Enhance API response with local estimations to account for propagation delays (API data can be delayed
9494
# by a few seconds, while local estimates are immediately accurate).
9595
return ApifyRequestQueueMetadata(
96-
id=response.id,
97-
name=response.name,
96+
id=metadata.id,
97+
name=metadata.name,
9898
total_request_count=max(total_request_count, self._implementation.metadata.total_request_count),
9999
handled_request_count=max(handled_request_count, self._implementation.metadata.handled_request_count),
100100
pending_request_count=pending_request_count,
101101
created_at=min(created_at, self._implementation.metadata.created_at),
102102
modified_at=max(modified_at, self._implementation.metadata.modified_at),
103103
accessed_at=max(accessed_at, self._implementation.metadata.accessed_at),
104-
had_multiple_clients=response.had_multiple_clients or self._implementation.metadata.had_multiple_clients,
105-
stats=RequestQueueStats.model_validate(response.stats, by_alias=True), # ty: ignore[possibly-missing-attribute]
104+
had_multiple_clients=metadata.had_multiple_clients or self._implementation.metadata.had_multiple_clients,
105+
stats=RequestQueueStats.model_validate(
106+
metadata.stats.model_dump(by_alias=True) if metadata.stats else {},
107+
by_alias=True,
108+
),
106109
)
107110

108111
@classmethod
@@ -151,7 +154,7 @@ async def open(
151154
raw_metadata = await api_client.get()
152155
if raw_metadata is None:
153156
raise ValueError('Failed to retrieve request queue metadata from the API.')
154-
metadata = ApifyRequestQueueMetadata.model_validate(raw_metadata)
157+
metadata = ApifyRequestQueueMetadata.model_validate(raw_metadata.model_dump(by_alias=True))
155158

156159
return cls(
157160
api_client=api_client,

src/apify/storage_clients/_apify/_request_queue_shared_client.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -121,18 +121,17 @@ async def add_batch_of_requests(
121121

122122
if new_requests:
123123
# Prepare requests for API by converting to dictionaries.
124-
requests_dict = [
125-
request.model_dump(
126-
by_alias=True,
127-
)
128-
for request in new_requests
129-
]
124+
requests_dict = [request.model_dump(by_alias=True) for request in new_requests]
130125

131126
# Send requests to API.
132-
api_response = AddRequestsResponse.model_validate(
133-
await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront)
127+
batch_response = await self._api_client.batch_add_requests(
128+
requests=requests_dict,
129+
forefront=forefront,
134130
)
135131

132+
batch_response_dict = batch_response.model_dump(by_alias=True)
133+
api_response = AddRequestsResponse.model_validate(batch_response_dict)
134+
136135
# Add the locally known already present processed requests based on the local cache.
137136
api_response.processed_requests.extend(already_present_requests)
138137

@@ -312,7 +311,8 @@ async def _get_request_by_id(self, request_id: str) -> Request | None:
312311
if response is None:
313312
return None
314313

315-
return Request.model_validate(response)
314+
response_dict = response.model_dump(by_alias=True)
315+
return Request.model_validate(response_dict)
316316

317317
async def _ensure_head_is_non_empty(self) -> None:
318318
"""Ensure that the queue head has requests if they are available in the queue."""
@@ -442,7 +442,7 @@ async def _list_head(
442442
self.metadata.had_multiple_clients = list_and_lost_data.had_multiple_clients
443443

444444
for request_data in list_and_lost_data.items:
445-
request = Request.model_validate(request_data)
445+
request = Request.model_validate(request_data.model_dump(by_alias=True))
446446
request_id = request_data.id
447447

448448
# Skip requests without ID or unique key
@@ -473,7 +473,8 @@ async def _list_head(
473473
# After adding new requests to the forefront, any existing leftover locked request is kept in the end.
474474
self._queue_head.append(leftover_id)
475475

476-
return RequestQueueHead.model_validate(list_and_lost_data)
476+
list_and_lost_dict = list_and_lost_data.model_dump(by_alias=True)
477+
return RequestQueueHead.model_validate(list_and_lost_dict)
477478

478479
def _cache_request(
479480
self,

src/apify/storage_clients/_apify/_request_queue_single_client.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -147,22 +147,20 @@ async def add_batch_of_requests(
147147

148148
if new_requests:
149149
# Prepare requests for API by converting to dictionaries.
150-
requests_dict = [
151-
request.model_dump(
152-
by_alias=True,
153-
)
154-
for request in new_requests
155-
]
150+
requests_dict = [request.model_dump(by_alias=True) for request in new_requests]
156151

157152
# Send requests to API.
158-
api_response = AddRequestsResponse.model_validate(
159-
await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront)
160-
)
153+
batch_response = await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront)
154+
batch_response_dict = batch_response.model_dump(by_alias=True)
155+
api_response = AddRequestsResponse.model_validate(batch_response_dict)
156+
161157
# Add the locally known already present processed requests based on the local cache.
162158
api_response.processed_requests.extend(already_present_requests)
159+
163160
# Remove unprocessed requests from the cache
164161
for unprocessed_request in api_response.unprocessed_requests:
165-
self._requests_cache.pop(unique_key_to_request_id(unprocessed_request.unique_key), None)
162+
request_id = unique_key_to_request_id(unprocessed_request.unique_key)
163+
self._requests_cache.pop(request_id, None)
166164

167165
else:
168166
api_response = AddRequestsResponse(
@@ -292,12 +290,12 @@ async def _list_head(self) -> None:
292290
# Should warn once? This might be outside expected context if the other consumers consumes at the same time
293291

294292
if response.queue_modified_at:
295-
modified_at = datetime.fromisoformat(response.queue_modified_at)
293+
modified_at = datetime.fromisoformat(response.queue_modified_at.replace('Z', '+00:00'))
296294
self.metadata.modified_at = max(self.metadata.modified_at, modified_at)
297295

298296
# Update the cached data
299297
for request_data in response.items:
300-
request = Request.model_validate(request_data)
298+
request = Request.model_validate(request_data.model_dump(by_alias=True))
301299
request_id = request_data.id
302300

303301
if request_id in self._requests_in_progress:
@@ -329,7 +327,8 @@ async def _get_request_by_id(self, id: str) -> Request | None:
329327
if response is None:
330328
return None
331329

332-
request = Request.model_validate(response)
330+
response_dict = response.model_dump(by_alias=True)
331+
request = Request.model_validate(response_dict)
333332

334333
# Updated local caches
335334
if id in self._requests_in_progress:
@@ -380,7 +379,7 @@ async def _init_caches(self) -> None:
380379
"""
381380
response = await self._api_client.list_requests(limit=10_000)
382381
for request_data in response.items:
383-
request = Request.model_validate(request_data)
382+
request = Request.model_validate(request_data.model_dump(by_alias=True))
384383
request_id = request_data.id
385384

386385
if request.was_already_handled:

tests/integration/actor/test_actor_request_queue.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,12 @@ async def main() -> None:
9898
stats_after = _rq.stats
9999
Actor.log.info(stats_after)
100100

101-
assert (stats_after['writeCount'] - stats_before['writeCount']) == 1
101+
assert stats_after is not None
102+
assert stats_after.write_count is not None
103+
assert stats_before is not None
104+
assert stats_before.write_count is not None
105+
106+
assert (stats_after.write_count - stats_before.write_count) == 1
102107

103108
actor = await make_actor(label='rq-deduplication', main_func=main)
104109
run_result = await run_actor(actor)
@@ -147,7 +152,12 @@ async def main() -> None:
147152
stats_after = _rq.stats
148153
Actor.log.info(stats_after)
149154

150-
assert (stats_after['writeCount'] - stats_before['writeCount']) == 2
155+
assert stats_after is not None
156+
assert stats_after.write_count is not None
157+
assert stats_before is not None
158+
assert stats_before.write_count is not None
159+
160+
assert (stats_after.write_count - stats_before.write_count) == 2
151161

152162
actor = await make_actor(label='rq-deduplication', main_func=main)
153163
run_result = await run_actor(actor)
@@ -193,6 +203,9 @@ async def main() -> None:
193203
stats_before = _rq.stats
194204
Actor.log.info(stats_before)
195205

206+
assert stats_before is not None
207+
assert stats_before.write_count is not None
208+
196209
# Add batches of some new and some already present requests in workers
197210
async def add_requests_worker() -> None:
198211
await rq.add_requests(requests[: next(batch_size)])
@@ -207,7 +220,10 @@ async def add_requests_worker() -> None:
207220
stats_after = _rq.stats
208221
Actor.log.info(stats_after)
209222

210-
assert (stats_after['writeCount'] - stats_before['writeCount']) == len(requests)
223+
assert stats_after is not None
224+
assert stats_after.write_count is not None
225+
226+
assert (stats_after.write_count - stats_before.write_count) == len(requests)
211227

212228
actor = await make_actor(label='rq-parallel-deduplication', main_func=main)
213229
run_result = await run_actor(actor)

tests/integration/apify_api/test_request_queue.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,7 @@ async def test_request_queue_had_multiple_clients(
930930
# Check that it is correctly in the API
931931
api_response = await api_client.get()
932932
assert api_response
933-
assert api_response['hadMultipleClients'] is True
933+
assert api_response.had_multiple_clients is True
934934

935935

936936
async def test_request_queue_not_had_multiple_clients(
@@ -949,7 +949,7 @@ async def test_request_queue_not_had_multiple_clients(
949949
api_client = apify_client_async.request_queue(request_queue_id=rq.id)
950950
api_response = await api_client.get()
951951
assert api_response
952-
assert api_response['hadMultipleClients'] is False
952+
assert api_response.had_multiple_clients is False
953953

954954

955955
async def test_request_queue_simple_and_full_at_the_same_time(
@@ -1165,6 +1165,9 @@ async def test_request_queue_deduplication_unprocessed_requests(
11651165
stats_before = _rq.stats
11661166
Actor.log.info(stats_before)
11671167

1168+
assert stats_before is not None
1169+
assert stats_before.write_count is not None
1170+
11681171
def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dict[str, list[dict]]:
11691172
"""Simulate API returning unprocessed requests."""
11701173
return {
@@ -1176,7 +1179,7 @@ def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dic
11761179
}
11771180

11781181
with mock.patch(
1179-
'apify_client.clients.resource_clients.request_queue.RequestQueueClientAsync.batch_add_requests',
1182+
'apify_client._resource_clients.request_queue.RequestQueueClientAsync.batch_add_requests',
11801183
side_effect=return_unprocessed_requests,
11811184
):
11821185
# Simulate failed API call for adding requests. Request was not processed and should not be cached.
@@ -1191,4 +1194,7 @@ def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dic
11911194
stats_after = _rq.stats
11921195
Actor.log.info(stats_after)
11931196

1194-
assert (stats_after['writeCount'] - stats_before['writeCount']) == 1
1197+
assert stats_after is not None
1198+
assert stats_after.write_count is not None
1199+
1200+
assert (stats_after.write_count - stats_before.write_count) == 1

tests/unit/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callabl
6262
def _prepare_test_env() -> None:
6363
if hasattr(apify._actor.Actor, '__wrapped__'):
6464
delattr(apify._actor.Actor, '__wrapped__')
65+
6566
apify._actor.Actor._is_initialized = False
6667

6768
# Set the environment variable for the local storage directory to the temporary path.

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)