Skip to content

Commit 70a1b71

Browse files
committed
Merge branch 'remove-request-id' into unique-key
2 parents 5763b48 + bedb6dd commit 70a1b71

File tree

11 files changed

+144
-118
lines changed

11 files changed

+144
-118
lines changed

docs/03_concepts/code/03_rq.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,10 @@ async def main() -> None:
2020

2121
# If you try to add an existing request again, it will not do anything
2222
add_request_info = await queue.add_request(
23-
Request.from_url('http://different-example.com/5')
23+
Request.from_url('http://example.com/5')
2424
)
2525
Actor.log.info(f'Add request info: {add_request_info}')
2626

27-
processed_request = await queue.get_request(add_request_info.id)
28-
Actor.log.info(f'Processed request: {processed_request}')
29-
3027
# Finally, process the queue until all requests are handled
3128
while not await queue.is_finished():
3229
# Fetch the next unhandled request in the queue

docs/04_upgrading/upgrading_to_v3.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
2-
id: upgrading-to-v2
3-
title: Upgrading to v2
2+
id: upgrading-to-v3
3+
title: Upgrading to v3
44
---
55

66
This page summarizes the breaking changes between Apify Python SDK v2.x and v3.0.

src/apify/scrapy/requests.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
122122

123123
# Update the meta field with the meta field from the apify_request
124124
meta = scrapy_request.meta or {}
125-
meta.update({'apify_request_id': apify_request.id, 'apify_request_unique_key': apify_request.unique_key})
125+
meta.update({'apify_request_unique_key': apify_request.unique_key})
126126
# scrapy_request.meta is a property, so we have to set it like this
127127
scrapy_request._meta = meta # noqa: SLF001
128128

@@ -134,7 +134,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
134134
url=apify_request.url,
135135
method=apify_request.method,
136136
meta={
137-
'apify_request_id': apify_request.id,
138137
'apify_request_unique_key': apify_request.unique_key,
139138
},
140139
)

src/apify/storage_clients/_apify/_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ class CachedRequest(BaseModel):
9494
Only internal structure.
9595
"""
9696

97-
id: str
98-
"""The ID of the request."""
97+
unique_key: str
98+
"""Unique key of the request."""
9999

100100
was_already_handled: bool
101101
"""Whether the request was already handled."""

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 80 additions & 58 deletions
Large diffs are not rendered by default.

tests/integration/test_actor_request_queue.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,10 @@ async def test_force_cloud(
8484
apify_client_async: ApifyClientAsync,
8585
apify_named_rq: RequestQueue,
8686
) -> None:
87+
request_queue_id = (await apify_named_rq.get_metadata()).id
8788
request_info = await apify_named_rq.add_request(Request.from_url('http://example.com'))
88-
request_queue_client = apify_client_async.request_queue(apify_named_rq.id)
89+
assert request_info.id is not None
90+
request_queue_client = apify_client_async.request_queue(request_queue_id)
8991

9092
request_queue_details = await request_queue_client.get()
9193
assert request_queue_details is not None

tests/integration/test_request_queue.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -405,38 +405,35 @@ async def main() -> None:
405405
assert run_result.status == 'SUCCEEDED'
406406

407407

408-
async def test_get_request_by_id(
408+
async def test_get_request_by_unique_key(
409409
make_actor: MakeActorFunction,
410410
run_actor: RunActorFunction,
411411
) -> None:
412-
"""Test retrieving specific requests by their ID."""
412+
"""Test retrieving specific requests by their unique_key."""
413413

414414
async def main() -> None:
415415
async with Actor:
416416
rq = await Actor.open_request_queue()
417417
Actor.log.info('Request queue opened')
418418

419-
# Add a request and get its ID
419+
# Add a request and get its unique_key
420420
add_result = await rq.add_request('https://example.com/test')
421-
request_id = add_result.id
422-
Actor.log.info(f'Request added with ID: {request_id}')
421+
request_unique_key = add_result.unique_key
422+
Actor.log.info(f'Request added with unique_key: {request_unique_key}')
423423

424-
# Retrieve the request by ID
425-
retrieved_request = await rq.get_request(request_id)
424+
# Retrieve the request by unique_key
425+
retrieved_request = await rq.get_request(request_unique_key)
426426
assert retrieved_request is not None, f'retrieved_request={retrieved_request}'
427427
assert retrieved_request.url == 'https://example.com/test', f'retrieved_request.url={retrieved_request.url}'
428-
assert retrieved_request.id == request_id, (
429-
f'retrieved_request.id={retrieved_request.id}',
430-
f'request_id={request_id}',
431-
)
432-
Actor.log.info('Request retrieved successfully by ID')
428+
assert retrieved_request.unique_key == request_unique_key, (f'{request_unique_key=}',)
429+
Actor.log.info('Request retrieved successfully by unique_key')
433430

434-
# Test with non-existent ID
435-
non_existent_request = await rq.get_request('non-existent-id')
431+
# Test with non-existent unique_key
432+
non_existent_request = await rq.get_request('non-existent-unique_key')
436433
assert non_existent_request is None, f'non_existent_request={non_existent_request}'
437-
Actor.log.info('Non-existent ID correctly returned None')
434+
Actor.log.info('Non-existent unique_key correctly returned None')
438435

439-
actor = await make_actor(label='rq-get-by-id-test', main_func=main)
436+
actor = await make_actor(label='rq-get-by-unique-key-test', main_func=main)
440437
run_result = await run_actor(actor)
441438
assert run_result.status == 'SUCCEEDED'
442439

tests/unit/scrapy/requests/test_to_apify_request.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def test_with_id_and_unique_key(spider: Spider) -> None:
6666
url='https://example.com',
6767
method='GET',
6868
meta={
69-
'apify_request_id': 'abc123',
7069
'apify_request_unique_key': 'https://example.com',
7170
'userData': {'some_user_data': 'hello'},
7271
},
@@ -77,7 +76,6 @@ def test_with_id_and_unique_key(spider: Spider) -> None:
7776

7877
assert apify_request.url == 'https://example.com'
7978
assert apify_request.method == 'GET'
80-
assert apify_request.id == 'abc123'
8179
assert apify_request.unique_key == 'https://example.com'
8280

8381
user_data = apify_request.user_data

tests/unit/scrapy/requests/test_to_scrapy_request.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ def test_without_reconstruction(spider: Spider) -> None:
3636
assert isinstance(scrapy_request, Request)
3737
assert apify_request.url == scrapy_request.url
3838
assert apify_request.method == scrapy_request.method
39-
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
4039
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
4140

4241

@@ -56,7 +55,6 @@ def test_without_reconstruction_with_optional_fields(spider: Spider) -> None:
5655
assert isinstance(scrapy_request, Request)
5756
assert apify_request.url == scrapy_request.url
5857
assert apify_request.method == scrapy_request.method
59-
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
6058
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
6159

6260
scrapy_request_headers = scrapy_request.headers.get('authorization')
@@ -82,7 +80,6 @@ def test_with_reconstruction(spider: Spider) -> None:
8280
assert isinstance(scrapy_request, Request)
8381
assert apify_request.url == scrapy_request.url
8482
assert apify_request.method == scrapy_request.method
85-
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
8683
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
8784
assert apify_request.user_data == scrapy_request.meta.get('userData')
8885

@@ -106,7 +103,6 @@ def test_with_reconstruction_with_optional_fields(spider: Spider) -> None:
106103
assert isinstance(scrapy_request, Request)
107104
assert apify_request.url == scrapy_request.url
108105
assert apify_request.method == scrapy_request.method
109-
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
110106
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
111107

112108
scrapy_request_headers = scrapy_request.headers.get('authorization')
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import pytest
2+
3+
from apify.storage_clients._apify._request_queue_client import unique_key_to_request_id
4+
5+
6+
def test_unique_key_to_request_id_length() -> None:
7+
unique_key = 'exampleKey123'
8+
request_id = unique_key_to_request_id(unique_key, request_id_length=15)
9+
assert len(request_id) == 15, 'Request ID should have the correct length.'
10+
11+
12+
def test_unique_key_to_request_id_consistency() -> None:
13+
unique_key = 'consistentKey'
14+
request_id_1 = unique_key_to_request_id(unique_key)
15+
request_id_2 = unique_key_to_request_id(unique_key)
16+
assert request_id_1 == request_id_2, 'The same unique key should generate consistent request IDs.'
17+
18+
19+
@pytest.mark.parametrize(
20+
('unique_key', 'expected_request_id'),
21+
[
22+
('abc', 'ungWv48BzpBQUDe'),
23+
('uniqueKey', 'xiWPs083cree7mH'),
24+
('', '47DEQpj8HBSaTIm'),
25+
('测试中文', 'lKPdJkdvw8MXEUp'),
26+
('test+/=', 'XZRQjhoG0yjfnYD'),
27+
],
28+
ids=[
29+
'basic_abc',
30+
'keyword_uniqueKey',
31+
'empty_string',
32+
'non_ascii_characters',
33+
'url_unsafe_characters',
34+
],
35+
)
36+
def test_unique_key_to_request_id_matches_known_values(unique_key: str, expected_request_id: str) -> None:
37+
request_id = unique_key_to_request_id(unique_key)
38+
assert request_id == expected_request_id, f'Unique key "{unique_key}" should produce the expected request ID.'

0 commit comments

Comments
 (0)