Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions docs/03_concepts/code/03_rq.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@ async def main() -> None:

# If you try to add an existing request again, it will not do anything
add_request_info = await queue.add_request(
Request.from_url('http://different-example.com/5')
Request.from_url('http://example.com/5')
)
Actor.log.info(f'Add request info: {add_request_info}')

processed_request = await queue.get_request(add_request_info.id)
Actor.log.info(f'Processed request: {processed_request}')

# Finally, process the queue until all requests are handled
while not await queue.is_finished():
# Fetch the next unhandled request in the queue
Expand Down
4 changes: 2 additions & 2 deletions docs/04_upgrading/upgrading_to_v3.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
id: upgrading-to-v2
title: Upgrading to v2
id: upgrading-to-v3
title: Upgrading to v3
---

This page summarizes the breaking changes between Apify Python SDK v2.x and v3.0.
Expand Down
3 changes: 1 addition & 2 deletions src/apify/scrapy/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ

# Update the meta field with the meta field from the apify_request
meta = scrapy_request.meta or {}
meta.update({'apify_request_id': apify_request.id, 'apify_request_unique_key': apify_request.unique_key})
meta.update({'apify_request_unique_key': apify_request.unique_key})
# scrapy_request.meta is a property, so we have to set it like this
scrapy_request._meta = meta # noqa: SLF001

Expand All @@ -134,7 +134,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
url=apify_request.url,
method=apify_request.method,
meta={
'apify_request_id': apify_request.id,
'apify_request_unique_key': apify_request.unique_key,
},
)
Expand Down
4 changes: 2 additions & 2 deletions src/apify/storage_clients/_apify/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ class CachedRequest(BaseModel):
Only internal structure.
"""

id: str
"""The ID of the request."""
unique_key: str
"""Unique key of the request."""

was_already_handled: bool
"""Whether the request was already handled."""
Expand Down
138 changes: 80 additions & 58 deletions src/apify/storage_clients/_apify/_request_queue_client.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/integration/test_actor_request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ async def test_force_cloud(
) -> None:
request_queue_id = (await apify_named_rq.get_metadata()).id
request_info = await apify_named_rq.add_request(Request.from_url('http://example.com'))
assert request_info.id is not None
request_queue_client = apify_client_async.request_queue(request_queue_id)

request_queue_details = await request_queue_client.get()
Expand Down
29 changes: 13 additions & 16 deletions tests/integration/test_request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,38 +399,35 @@ async def main() -> None:
assert run_result.status == 'SUCCEEDED'


async def test_get_request_by_id(
async def test_get_request_by_unique_key(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
"""Test retrieving specific requests by their ID."""
"""Test retrieving specific requests by their unique_key."""

async def main() -> None:
async with Actor:
rq = await Actor.open_request_queue()
Actor.log.info('Request queue opened')

# Add a request and get its ID
# Add a request and get its unique_key
add_result = await rq.add_request('https://example.com/test')
request_id = add_result.id
Actor.log.info(f'Request added with ID: {request_id}')
request_unique_key = add_result.unique_key
Actor.log.info(f'Request added with unique_key: {request_unique_key}')

# Retrieve the request by ID
retrieved_request = await rq.get_request(request_id)
# Retrieve the request by unique_key
retrieved_request = await rq.get_request(request_unique_key)
assert retrieved_request is not None, f'retrieved_request={retrieved_request}'
assert retrieved_request.url == 'https://example.com/test', f'retrieved_request.url={retrieved_request.url}'
assert retrieved_request.id == request_id, (
f'retrieved_request.id={retrieved_request.id}',
f'request_id={request_id}',
)
Actor.log.info('Request retrieved successfully by ID')
assert retrieved_request.unique_key == request_unique_key, (f'{request_unique_key=}',)
Actor.log.info('Request retrieved successfully by unique_key')

# Test with non-existent ID
non_existent_request = await rq.get_request('non-existent-id')
# Test with non-existent unique_key
non_existent_request = await rq.get_request('non-existent-unique_key')
assert non_existent_request is None, f'non_existent_request={non_existent_request}'
Actor.log.info('Non-existent ID correctly returned None')
Actor.log.info('Non-existent unique_key correctly returned None')

actor = await make_actor(label='rq-get-by-id-test', main_func=main)
actor = await make_actor(label='rq-get-by-unique-key-test', main_func=main)
run_result = await run_actor(actor)
assert run_result.status == 'SUCCEEDED'

Expand Down
2 changes: 0 additions & 2 deletions tests/unit/scrapy/requests/test_to_apify_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def test_with_id_and_unique_key(spider: Spider) -> None:
url='https://example.com',
method='GET',
meta={
'apify_request_id': 'abc123',
'apify_request_unique_key': 'https://example.com',
'userData': {'some_user_data': 'hello'},
},
Expand All @@ -77,7 +76,6 @@ def test_with_id_and_unique_key(spider: Spider) -> None:

assert apify_request.url == 'https://example.com'
assert apify_request.method == 'GET'
assert apify_request.id == 'abc123'
assert apify_request.unique_key == 'https://example.com'

user_data = apify_request.user_data
Expand Down
4 changes: 0 additions & 4 deletions tests/unit/scrapy/requests/test_to_scrapy_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def test_without_reconstruction(spider: Spider) -> None:
assert isinstance(scrapy_request, Request)
assert apify_request.url == scrapy_request.url
assert apify_request.method == scrapy_request.method
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')


Expand All @@ -56,7 +55,6 @@ def test_without_reconstruction_with_optional_fields(spider: Spider) -> None:
assert isinstance(scrapy_request, Request)
assert apify_request.url == scrapy_request.url
assert apify_request.method == scrapy_request.method
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')

scrapy_request_headers = scrapy_request.headers.get('authorization')
Expand All @@ -82,7 +80,6 @@ def test_with_reconstruction(spider: Spider) -> None:
assert isinstance(scrapy_request, Request)
assert apify_request.url == scrapy_request.url
assert apify_request.method == scrapy_request.method
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
assert apify_request.user_data == scrapy_request.meta.get('userData')

Expand All @@ -106,7 +103,6 @@ def test_with_reconstruction_with_optional_fields(spider: Spider) -> None:
assert isinstance(scrapy_request, Request)
assert apify_request.url == scrapy_request.url
assert apify_request.method == scrapy_request.method
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')

scrapy_request_headers = scrapy_request.headers.get('authorization')
Expand Down
38 changes: 38 additions & 0 deletions tests/unit/storage_clients/test_apify_request_queue_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from apify.storage_clients._apify._request_queue_client import unique_key_to_request_id


def test_unique_key_to_request_id_length() -> None:
unique_key = 'exampleKey123'
request_id = unique_key_to_request_id(unique_key, request_id_length=15)
assert len(request_id) == 15, 'Request ID should have the correct length.'


def test_unique_key_to_request_id_consistency() -> None:
unique_key = 'consistentKey'
request_id_1 = unique_key_to_request_id(unique_key)
request_id_2 = unique_key_to_request_id(unique_key)
assert request_id_1 == request_id_2, 'The same unique key should generate consistent request IDs.'


@pytest.mark.parametrize(
('unique_key', 'expected_request_id'),
[
('abc', 'ungWv48BzpBQUDe'),
('uniqueKey', 'xiWPs083cree7mH'),
('', '47DEQpj8HBSaTIm'),
('测试中文', 'lKPdJkdvw8MXEUp'),
('test+/=', 'XZRQjhoG0yjfnYD'),
],
ids=[
'basic_abc',
'keyword_uniqueKey',
'empty_string',
'non_ascii_characters',
'url_unsafe_characters',
],
)
def test_unique_key_to_request_id_matches_known_values(unique_key: str, expected_request_id: str) -> None:
request_id = unique_key_to_request_id(unique_key)
assert request_id == expected_request_id, f'Unique key "{unique_key}" should produce the expected request ID.'
31 changes: 4 additions & 27 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading