Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
367 changes: 180 additions & 187 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ keywords = [
python = "^3.9"
apify-client = ">=1.8.1"
apify-shared = ">=1.1.2"
crawlee = ">=0.3.9"
crawlee = "~0.4.0"
cryptography = ">=42.0.0"
httpx = ">=0.27.0"
lazy-object-proxy = ">=1.10.0"
Expand Down
12 changes: 7 additions & 5 deletions src/apify/scrapy/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
Returns:
The converted Apify request if the conversion was successful, otherwise None.
"""
if not isinstance(cast(Any, scrapy_request), Request):
Actor.log.warning('Failed to convert to Apify request: Scrapy request must be a Request instance.')
if not isinstance(scrapy_request, Request):
Actor.log.warning( # type: ignore[unreachable]
'Failed to convert to Apify request: Scrapy request must be a Request instance.'
)
return None

call_id = crypto_random_object_id(8)
Expand All @@ -53,7 +55,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
if _is_request_produced_by_middleware(scrapy_request):
unique_key = compute_unique_key(
url=scrapy_request.url,
method=scrapy_request.method,
method=scrapy_request.method, # type: ignore[arg-type] # str vs literal
payload=scrapy_request.body,
use_extended_unique_key=True,
)
Expand All @@ -80,9 +82,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest

# Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
if isinstance(scrapy_request.headers, Headers):
apify_request.headers = HttpHeaders(scrapy_request.headers.to_unicode_dict())
apify_request.headers = HttpHeaders(dict(scrapy_request.headers.to_unicode_dict()))
else:
Actor.log.warning(
Actor.log.warning( # type: ignore[unreachable]
f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
)

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/scrapy/pipelines/test_actor_dataset_push.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ItemTestCase:
expected_exception=None,
),
ItemTestCase(
item=None,
item=None, # type: ignore[arg-type] # that is the point of this test
item_dict={},
expected_exception=TypeError,
),
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/scrapy/requests/test_to_apify_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_handles_headers(spider: Spider) -> None:
apify_request = to_apify_request(scrapy_request, spider)

assert apify_request is not None
assert apify_request.headers == HttpHeaders(scrapy_request_headers.to_unicode_dict())
assert apify_request.headers == HttpHeaders(dict(scrapy_request_headers.to_unicode_dict()))


def test_without_id_and_unique_key(spider: Spider) -> None:
Expand Down Expand Up @@ -90,5 +90,5 @@ def test_with_id_and_unique_key(spider: Spider) -> None:
def test_invalid_scrapy_request_returns_none(spider: Spider) -> None:
scrapy_request = 'invalid_request'

apify_request = to_apify_request(scrapy_request, spider)
apify_request = to_apify_request(scrapy_request, spider) # type: ignore[arg-type]
assert apify_request is None
10 changes: 8 additions & 2 deletions tests/unit/scrapy/requests/test_to_scrapy_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ def test_without_reconstruction_with_optional_fields(spider: Spider) -> None:
assert apify_request.method == scrapy_request.method
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
assert apify_request.headers.get('authorization') == scrapy_request.headers.get('authorization').decode()

scrapy_request_headers = scrapy_request.headers.get('authorization')
assert scrapy_request_headers is not None
assert apify_request.headers.get('authorization') == scrapy_request_headers.decode()
assert apify_request.user_data == scrapy_request.meta.get('userData')


Expand Down Expand Up @@ -105,7 +108,10 @@ def test_with_reconstruction_with_optional_fields(spider: Spider) -> None:
assert apify_request.method == scrapy_request.method
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
assert apify_request.headers.get('authorization') == scrapy_request.headers.get('authorization').decode()

scrapy_request_headers = scrapy_request.headers.get('authorization')
assert scrapy_request_headers is not None
assert apify_request.headers.get('authorization') == scrapy_request_headers.decode()
assert apify_request.user_data == scrapy_request.meta.get('userData')


Expand Down