Skip to content

Commit 4065bd7

Browse files
authored
chore: Add version upper boundary for Crawlee (#330)
- Add a version upper boundary for Crawlee as 0.5 will break the compatibility (mostly due to the changes in the service container). - And fix (new) type errors.
1 parent 7b87d1d commit 4065bd7

File tree

6 files changed

+199
-198
lines changed

6 files changed

+199
-198
lines changed

poetry.lock

Lines changed: 180 additions & 187 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ keywords = [
4848
python = "^3.9"
4949
apify-client = ">=1.8.1"
5050
apify-shared = ">=1.1.2"
51-
crawlee = ">=0.3.9"
51+
crawlee = "~0.4.0"
5252
cryptography = ">=42.0.0"
5353
httpx = ">=0.27.0"
5454
lazy-object-proxy = ">=1.10.0"

src/apify/scrapy/requests.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
4242
Returns:
4343
The converted Apify request if the conversion was successful, otherwise None.
4444
"""
45-
if not isinstance(cast(Any, scrapy_request), Request):
46-
Actor.log.warning('Failed to convert to Apify request: Scrapy request must be a Request instance.')
45+
if not isinstance(scrapy_request, Request):
46+
Actor.log.warning( # type: ignore[unreachable]
47+
'Failed to convert to Apify request: Scrapy request must be a Request instance.'
48+
)
4749
return None
4850

4951
call_id = crypto_random_object_id(8)
@@ -53,7 +55,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
5355
if _is_request_produced_by_middleware(scrapy_request):
5456
unique_key = compute_unique_key(
5557
url=scrapy_request.url,
56-
method=scrapy_request.method,
58+
method=scrapy_request.method, # type: ignore[arg-type] # str vs literal
5759
payload=scrapy_request.body,
5860
use_extended_unique_key=True,
5961
)
@@ -80,9 +82,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
8082

8183
# Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
8284
if isinstance(scrapy_request.headers, Headers):
83-
apify_request.headers = HttpHeaders(scrapy_request.headers.to_unicode_dict())
85+
apify_request.headers = HttpHeaders(dict(scrapy_request.headers.to_unicode_dict()))
8486
else:
85-
Actor.log.warning(
87+
Actor.log.warning( # type: ignore[unreachable]
8688
f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
8789
)
8890

tests/unit/scrapy/pipelines/test_actor_dataset_push.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class ItemTestCase:
5757
expected_exception=None,
5858
),
5959
ItemTestCase(
60-
item=None,
60+
item=None, # type: ignore[arg-type] # that is the point of this test
6161
item_dict={},
6262
expected_exception=TypeError,
6363
),

tests/unit/scrapy/requests/test_to_apify_request.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_handles_headers(spider: Spider) -> None:
3838
apify_request = to_apify_request(scrapy_request, spider)
3939

4040
assert apify_request is not None
41-
assert apify_request.headers == HttpHeaders(scrapy_request_headers.to_unicode_dict())
41+
assert apify_request.headers == HttpHeaders(dict(scrapy_request_headers.to_unicode_dict()))
4242

4343

4444
def test_without_id_and_unique_key(spider: Spider) -> None:
@@ -90,5 +90,5 @@ def test_with_id_and_unique_key(spider: Spider) -> None:
9090
def test_invalid_scrapy_request_returns_none(spider: Spider) -> None:
9191
scrapy_request = 'invalid_request'
9292

93-
apify_request = to_apify_request(scrapy_request, spider)
93+
apify_request = to_apify_request(scrapy_request, spider) # type: ignore[arg-type]
9494
assert apify_request is None

tests/unit/scrapy/requests/test_to_scrapy_request.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ def test_without_reconstruction_with_optional_fields(spider: Spider) -> None:
5858
assert apify_request.method == scrapy_request.method
5959
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
6060
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
61-
assert apify_request.headers.get('authorization') == scrapy_request.headers.get('authorization').decode()
61+
62+
scrapy_request_headers = scrapy_request.headers.get('authorization')
63+
assert scrapy_request_headers is not None
64+
assert apify_request.headers.get('authorization') == scrapy_request_headers.decode()
6265
assert apify_request.user_data == scrapy_request.meta.get('userData')
6366

6467

@@ -105,7 +108,10 @@ def test_with_reconstruction_with_optional_fields(spider: Spider) -> None:
105108
assert apify_request.method == scrapy_request.method
106109
assert apify_request.id == scrapy_request.meta.get('apify_request_id')
107110
assert apify_request.unique_key == scrapy_request.meta.get('apify_request_unique_key')
108-
assert apify_request.headers.get('authorization') == scrapy_request.headers.get('authorization').decode()
111+
112+
scrapy_request_headers = scrapy_request.headers.get('authorization')
113+
assert scrapy_request_headers is not None
114+
assert apify_request.headers.get('authorization') == scrapy_request_headers.decode()
109115
assert apify_request.user_data == scrapy_request.meta.get('userData')
110116

111117

0 commit comments

Comments
 (0)