Skip to content

Commit 382b650

Browse files
committed
Make ruff happy
1 parent 8412a10 commit 382b650

File tree

4 files changed

+114
-70
lines changed

4 files changed

+114
-70
lines changed

src/apify/_actor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from lazy_object_proxy import Proxy
1010
from pydantic import AliasChoices
11-
from typing_extensions import Self
1211

1312
from apify_client import ApifyClientAsync
1413
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
@@ -31,6 +30,8 @@
3130
import logging
3231
from types import TracebackType
3332

33+
from typing_extensions import Self
34+
3435
from crawlee.proxy_configuration import _NewUrlFunction
3536

3637
from apify._models import Webhook

src/apify/storages/_actor_inputs.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@
2323
)
2424

2525

26-
2726
class _RequestDetails(BaseModel):
2827
method: HttpMethod
2928
payload: str = ''
3029
headers: dict[str, str] = Field(default_factory=dict)
31-
user_data: dict[str, str]= Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
30+
user_data: dict[str, str] = Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
31+
3232

3333
class _RequestsFromUrlInput(_RequestDetails):
3434
requests_from_url: str = Field(alias=ActorInputKeys.startUrls.requestsFromUrl)
@@ -37,6 +37,7 @@ class _RequestsFromUrlInput(_RequestDetails):
3737
class _SimpleUrlInput(_RequestDetails):
3838
url: str
3939

40+
4041
class Input(BaseModel):
4142
model_config = ConfigDict(arbitrary_types_allowed=True)
4243
start_urls: RequestList
@@ -45,11 +46,13 @@ class Input(BaseModel):
4546
async def read(cls, raw_input: dict[str, Any], http_client: BaseHttpClient | None = None) -> Self:
4647
if ActorInputKeys.startUrls in raw_input:
4748
request_list = await _create_request_list(
48-
actor_start_urls_input=raw_input[ActorInputKeys.startUrls], http_client=http_client)
49+
actor_start_urls_input=raw_input[ActorInputKeys.startUrls], http_client=http_client
50+
)
4951
else:
5052
request_list = RequestList()
5153
return cls(start_urls=request_list)
5254

55+
5356
async def _create_request_list(
5457
*, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
5558
) -> RequestList:
@@ -69,10 +72,13 @@ async def _create_request_list(
6972
if not http_client:
7073
http_client = HttpxHttpClient()
7174
simple_url_requests_inputs = [
72-
_SimpleUrlInput(**request_input) for request_input in actor_start_urls_input
73-
if ActorInputKeys.startUrls.url in request_input]
75+
_SimpleUrlInput(**request_input)
76+
for request_input in actor_start_urls_input
77+
if ActorInputKeys.startUrls.url in request_input
78+
]
7479
remote_url_requests_inputs = [
75-
_RequestsFromUrlInput(**request_input) for request_input in actor_start_urls_input
80+
_RequestsFromUrlInput(**request_input)
81+
for request_input in actor_start_urls_input
7682
if ActorInputKeys.startUrls.requestsFromUrl in request_input
7783
]
7884

@@ -109,12 +115,18 @@ async def _create_requests_from_url(
109115
def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
110116
"""Callback to scrape response body with regexp and create Requests from matches."""
111117
matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
112-
created_requests.extend([Request.from_url(
113-
match.group(0),
114-
method=request_input.method,
115-
payload=request_input.payload.encode('utf-8'),
116-
headers=request_input.headers,
117-
user_data=request_input.user_data) for match in matches])
118+
created_requests.extend(
119+
[
120+
Request.from_url(
121+
match.group(0),
122+
method=request_input.method,
123+
payload=request_input.payload.encode('utf-8'),
124+
headers=request_input.headers,
125+
user_data=request_input.user_data,
126+
)
127+
for match in matches
128+
]
129+
)
118130

119131
remote_url_requests = []
120132
for remote_url_requests_input in remote_url_requests_inputs:
Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,29 @@
1-
2-
31
class _KnownInputKey(str):
42
__slots__ = ('_name',)
3+
54
def __init__(self, name: str) -> None:
65
self._name = name
76

87
def __str__(self) -> str:
98
return self._name
109

11-
def __repr__(self) ->str:
10+
def __repr__(self) -> str:
1211
return str(self)
1312

13+
1414
class _StartUrls(_KnownInputKey):
15-
url='url'
15+
url = 'url'
1616
requestsFromUrl = 'requestsFromUrl' # noqa: N815 # Intentional to respect actual naming of input keys.
17-
method='method'
18-
payload= 'payload'
19-
userData='userData' # noqa: N815 # Intentional to respect actual naming of input keys.
20-
headers='headers'
17+
method = 'method'
18+
payload = 'payload'
19+
userData = 'userData' # noqa: N815 # Intentional to respect actual naming of input keys.
20+
headers = 'headers'
2121

2222

2323
class _ActorInputKeys:
2424
# Helper class to have actor input strings all in one place and easy to use with code completion.
2525
startUrls: _StartUrls = _StartUrls('startUrls') # noqa: N815 # Intentional to respect actual naming of input keys.
2626
# More inputs should be gradually added
2727

28+
2829
ActorInputKeys = _ActorInputKeys()

tests/unit/actor/test_actor_inputs.py

Lines changed: 79 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,22 @@
1919
'optional_input',
2020
[
2121
{},
22-
{ActorInputKeys.startUrls.payload: 'some payload', ActorInputKeys.startUrls.userData:
23-
{'some key': 'some value'}, ActorInputKeys.startUrls.headers: {'h1': 'v1', 'h2': 'v2'}},
22+
{
23+
ActorInputKeys.startUrls.payload: 'some payload',
24+
ActorInputKeys.startUrls.userData: {'some key': 'some value'},
25+
ActorInputKeys.startUrls.headers: {'h1': 'v1', 'h2': 'v2'},
26+
},
2427
],
2528
ids=['minimal', 'all_options'],
2629
)
2730
async def test_actor_create_request_list_request_types(
2831
request_method: HttpMethod, optional_input: dict[str, Any]
2932
) -> None:
3033
"""Test proper request list generation from both minimal and full inputs for all method types for simple input."""
31-
minimal_request_dict_input = {ActorInputKeys.startUrls.url: 'https://www.abc.com',
32-
ActorInputKeys.startUrls.method: request_method}
34+
minimal_request_dict_input = {
35+
ActorInputKeys.startUrls.url: 'https://www.abc.com',
36+
ActorInputKeys.startUrls.method: request_method,
37+
}
3338
request_dict_input = {**minimal_request_dict_input, **optional_input}
3439
example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [request_dict_input]}
3540

@@ -75,17 +80,25 @@ def read(self) -> bytes:
7580

7681
async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
7782
"""Test that injected HttpClient's method send_request is called with properly passed arguments."""
78-
example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [
79-
{ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt', ActorInputKeys.startUrls.method: 'GET'},
80-
{ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2', ActorInputKeys.startUrls.method: 'PUT'},
81-
{
82-
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
83-
ActorInputKeys.startUrls.method: 'POST',
84-
ActorInputKeys.startUrls.headers: {'key': 'value'},
85-
ActorInputKeys.startUrls.payload: 'some_payload',
86-
ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
87-
},
88-
]}
83+
example_actor_input: dict[str, Any] = {
84+
ActorInputKeys.startUrls: [
85+
{
86+
ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
87+
ActorInputKeys.startUrls.method: 'GET',
88+
},
89+
{
90+
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
91+
ActorInputKeys.startUrls.method: 'PUT',
92+
},
93+
{
94+
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
95+
ActorInputKeys.startUrls.method: 'POST',
96+
ActorInputKeys.startUrls.headers: {'key': 'value'},
97+
ActorInputKeys.startUrls.payload: 'some_payload',
98+
ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
99+
},
100+
]
101+
}
89102

90103
mocked_read_outputs = ('' for url in example_actor_input[ActorInputKeys.startUrls])
91104
http_client = HttpxHttpClient()
@@ -117,11 +130,19 @@ async def test_actor_create_request_list_from_url() -> None:
117130
)
118131
)
119132

120-
example_actor_input:dict[str, Any] = {ActorInputKeys.startUrls:[
121-
{ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt', ActorInputKeys.startUrls.method: 'GET'},
122-
{ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
123-
{ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2', ActorInputKeys.startUrls.method: 'GET'},
124-
]}
133+
example_actor_input: dict[str, Any] = {
134+
ActorInputKeys.startUrls: [
135+
{
136+
ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
137+
ActorInputKeys.startUrls.method: 'GET',
138+
},
139+
{ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
140+
{
141+
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
142+
ActorInputKeys.startUrls.method: 'GET',
143+
},
144+
]
145+
}
125146

126147
http_client = HttpxHttpClient()
127148
with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
@@ -133,16 +154,18 @@ async def test_actor_create_request_list_from_url() -> None:
133154
# Check correctly created requests' urls in request list
134155
assert {generated_request.url for generated_request in generated_requests} == expected_urls
135156

136-
async def test_actor_create_request_list_from_url_additional_inputs() -> None:
157+
158+
async def test_actor_create_request_list_from_url_additional_inputs() -> None:
137159
"""Test that all generated request properties are correctly populated from input values."""
138160
expected_simple_url = 'https://www.someurl.com'
139161
example_start_url_input = {
140162
ActorInputKeys.startUrls.requestsFromUrl: 'https://crawlee.dev/file.txt',
141163
ActorInputKeys.startUrls.method: 'POST',
142164
ActorInputKeys.startUrls.headers: {'key': 'value'},
143165
ActorInputKeys.startUrls.payload: 'some_payload',
144-
ActorInputKeys.startUrls.userData: {'another_key': 'another_value'}}
145-
example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls:[example_start_url_input]}
166+
ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
167+
}
168+
example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [example_start_url_input]}
146169
response_bodies = iter((expected_simple_url,))
147170
http_client = HttpxHttpClient()
148171
with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
@@ -162,43 +185,50 @@ async def test_actor_create_request_list_from_url_additional_inputs() -> None:
162185
assert request.user_data == expected_user_data
163186

164187

165-
@pytest.mark.parametrize('true_positive', [
166-
'http://www.something.com',
167-
'https://www.something.net',
168-
'http://nowww.cz',
169-
'https://with-hypen.com',
170-
'http://number1.com',
171-
'http://www.number.123.abc',
172-
'http://many.dots.com',
173-
'http://a.com',
174-
'http://www.something.com/somethignelse'
175-
'http://www.something.com/somethignelse.txt',
176-
'http://non-english-chars-áíéåü.com',
177-
'http://www.port.com:1234',
178-
'http://username:[email protected]'
179-
])
188+
@pytest.mark.parametrize(
189+
'true_positive',
190+
[
191+
'http://www.something.com',
192+
'https://www.something.net',
193+
'http://nowww.cz',
194+
'https://with-hypen.com',
195+
'http://number1.com',
196+
'http://www.number.123.abc',
197+
'http://many.dots.com',
198+
'http://a.com',
199+
'http://www.something.com/somethignelse' 'http://www.something.com/somethignelse.txt',
200+
'http://non-english-chars-áíéåü.com',
201+
'http://www.port.com:1234',
202+
'http://username:[email protected]',
203+
],
204+
)
180205
def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
181-
example_string= f'Some text {true_positive} some more text'
206+
example_string = f'Some text {true_positive} some more text'
182207
matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
183208
assert len(matches) == 1
184209
assert matches[0].group(0) == true_positive
185210

186-
@pytest.mark.parametrize('false_positive',[
187-
'http://www.a',
188-
'http://a',
189-
'http://a.a',
190-
'http://123.456',
191-
'www.something.com',
192-
'http:www.something.com',
193-
])
211+
212+
@pytest.mark.parametrize(
213+
'false_positive',
214+
[
215+
'http://www.a',
216+
'http://a',
217+
'http://a.a',
218+
'http://123.456',
219+
'www.something.com',
220+
'http:www.something.com',
221+
],
222+
)
194223
def test_url_no_commas_regex_false_positives(false_positive: str) -> None:
195-
example_string= f'Some text {false_positive} some more text'
224+
example_string = f'Some text {false_positive} some more text'
196225
matches = list(re.findall(URL_NO_COMMAS_REGEX, example_string))
197226
assert len(matches) == 0
198227

228+
199229
def test_url_no_commas_regex_multi_line() -> None:
200230
true_positives = ('http://www.something.com', 'http://www.else.com')
201-
example_string= 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
231+
example_string = 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
202232
matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
203233
assert len(matches) == 2
204234
assert {match.group(0) for match in matches} == set(true_positives)

0 commit comments

Comments
 (0)