Skip to content

Commit e3e45bb

Browse files
committed
Remove Input class
It had too many assumptions that users might not be interested in. Users should create such Input helper classes based on their specific inputs and their names.
1 parent 382b650 commit e3e45bb

File tree

3 files changed

+56
-81
lines changed

3 files changed

+56
-81
lines changed

src/apify/storages/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
22

3-
from ._actor_inputs import Input
3+
from ._actor_inputs import create_request_list
44

5-
__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'Input']
5+
__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'create_request_list']

src/apify/storages/_actor_inputs.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,9 @@
44
import re
55
from asyncio import Task
66
from functools import partial
7-
from typing import TYPE_CHECKING, Any
7+
from typing import Any
88

9-
from pydantic import BaseModel, ConfigDict, Field
10-
11-
if TYPE_CHECKING:
12-
from typing_extensions import Self
9+
from pydantic import BaseModel, Field
1310

1411
from crawlee import Request
1512
from crawlee._types import HttpMethod
@@ -24,7 +21,7 @@
2421

2522

2623
class _RequestDetails(BaseModel):
27-
method: HttpMethod
24+
method: HttpMethod = 'GET'
2825
payload: str = ''
2926
headers: dict[str, str] = Field(default_factory=dict)
3027
user_data: dict[str, str] = Field(default_factory=dict, alias=ActorInputKeys.startUrls.userData)
@@ -38,23 +35,8 @@ class _SimpleUrlInput(_RequestDetails):
3835
url: str
3936

4037

41-
class Input(BaseModel):
42-
model_config = ConfigDict(arbitrary_types_allowed=True)
43-
start_urls: RequestList
44-
45-
@classmethod
46-
async def read(cls, raw_input: dict[str, Any], http_client: BaseHttpClient | None = None) -> Self:
47-
if ActorInputKeys.startUrls in raw_input:
48-
request_list = await _create_request_list(
49-
actor_start_urls_input=raw_input[ActorInputKeys.startUrls], http_client=http_client
50-
)
51-
else:
52-
request_list = RequestList()
53-
return cls(start_urls=request_list)
54-
55-
56-
async def _create_request_list(
57-
*, actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
38+
async def create_request_list(
39+
actor_start_urls_input: list[dict[str, Any]], http_client: BaseHttpClient | None = None
5840
) -> RequestList:
5941
"""Creates RequestList from Actor input requestListSources.
6042

tests/unit/actor/test_actor_inputs.py

Lines changed: 49 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from crawlee._types import HttpHeaders, HttpMethod
1212
from crawlee.http_clients import HttpResponse, HttpxHttpClient
1313

14-
from apify.storages._actor_inputs import URL_NO_COMMAS_REGEX, ActorInputKeys, Input
14+
from apify.storages._actor_inputs import URL_NO_COMMAS_REGEX, ActorInputKeys, create_request_list
1515

1616

1717
@pytest.mark.parametrize('request_method', get_args(HttpMethod))
@@ -36,24 +36,22 @@ async def test_actor_create_request_list_request_types(
3636
ActorInputKeys.startUrls.method: request_method,
3737
}
3838
request_dict_input = {**minimal_request_dict_input, **optional_input}
39-
example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [request_dict_input]}
4039

41-
generated_input = await Input.read(example_actor_input)
40+
request_list = await create_request_list([request_dict_input])
41+
assert not await request_list.is_empty()
42+
request = await request_list.fetch_next_request()
43+
assert request is not None
44+
assert await request_list.is_empty()
4245

43-
assert not await generated_input.start_urls.is_empty()
44-
generated_request = await generated_input.start_urls.fetch_next_request()
45-
assert generated_request is not None
46-
assert await generated_input.start_urls.is_empty()
47-
48-
assert generated_request.method == request_dict_input[ActorInputKeys.startUrls.method]
49-
assert generated_request.url == request_dict_input[ActorInputKeys.startUrls.url]
50-
assert generated_request.payload == request_dict_input.get(ActorInputKeys.startUrls.payload, '').encode('utf-8')
46+
assert request.method == request_dict_input[ActorInputKeys.startUrls.method]
47+
assert request.url == request_dict_input[ActorInputKeys.startUrls.url]
48+
assert request.payload == request_dict_input.get(ActorInputKeys.startUrls.payload, '').encode('utf-8')
5149
expected_user_data = UserData()
5250
if ActorInputKeys.startUrls.userData in optional_input:
5351
for key, value in optional_input[ActorInputKeys.startUrls.userData].items():
5452
expected_user_data[key] = value
55-
assert generated_request.user_data == expected_user_data
56-
assert generated_request.headers.root == optional_input.get(ActorInputKeys.startUrls.headers, {})
53+
assert request.user_data == expected_user_data
54+
assert request.headers.root == optional_input.get(ActorInputKeys.startUrls.headers, {})
5755

5856

5957
def _create_dummy_response(read_output: Iterator[str]) -> HttpResponse:
@@ -80,39 +78,37 @@ def read(self) -> bytes:
8078

8179
async def test_actor_create_request_list_from_url_correctly_send_requests() -> None:
8280
"""Test that injected HttpClient's method send_request is called with properly passed arguments."""
83-
example_actor_input: dict[str, Any] = {
84-
ActorInputKeys.startUrls: [
85-
{
86-
ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
87-
ActorInputKeys.startUrls.method: 'GET',
88-
},
89-
{
90-
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
91-
ActorInputKeys.startUrls.method: 'PUT',
92-
},
93-
{
94-
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
95-
ActorInputKeys.startUrls.method: 'POST',
96-
ActorInputKeys.startUrls.headers: {'key': 'value'},
97-
ActorInputKeys.startUrls.payload: 'some_payload',
98-
ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
99-
},
100-
]
101-
}
81+
actor_start_urls_input: list[dict[str, Any]] = [
82+
{
83+
ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
84+
ActorInputKeys.startUrls.method: 'GET',
85+
},
86+
{
87+
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
88+
ActorInputKeys.startUrls.method: 'PUT',
89+
},
90+
{
91+
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.something.som',
92+
ActorInputKeys.startUrls.method: 'POST',
93+
ActorInputKeys.startUrls.headers: {'key': 'value'},
94+
ActorInputKeys.startUrls.payload: 'some_payload',
95+
ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
96+
},
97+
]
10298

103-
mocked_read_outputs = ('' for url in example_actor_input[ActorInputKeys.startUrls])
99+
mocked_read_outputs = ('' for url in actor_start_urls_input)
104100
http_client = HttpxHttpClient()
105101
with mock.patch.object(
106102
http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
107103
) as mocked_send_request:
108-
await Input.read(example_actor_input, http_client=http_client)
104+
await create_request_list(actor_start_urls_input, http_client=http_client)
109105

110106
expected_calls = [
111107
call(
112108
method='GET',
113109
url=example_input[ActorInputKeys.startUrls.requestsFromUrl],
114110
)
115-
for example_input in example_actor_input[ActorInputKeys.startUrls]
111+
for example_input in actor_start_urls_input
116112
]
117113
mocked_send_request.assert_has_calls(expected_calls)
118114

@@ -130,25 +126,23 @@ async def test_actor_create_request_list_from_url() -> None:
130126
)
131127
)
132128

133-
example_actor_input: dict[str, Any] = {
134-
ActorInputKeys.startUrls: [
135-
{
136-
ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
137-
ActorInputKeys.startUrls.method: 'GET',
138-
},
139-
{ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
140-
{
141-
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
142-
ActorInputKeys.startUrls.method: 'GET',
143-
},
144-
]
145-
}
129+
actor_start_urls_input = [
130+
{
131+
ActorInputKeys.startUrls.requestsFromUrl: 'https://abc.dev/file.txt',
132+
ActorInputKeys.startUrls.method: 'GET',
133+
},
134+
{ActorInputKeys.startUrls.url: expected_simple_url, ActorInputKeys.startUrls.method: 'GET'},
135+
{
136+
ActorInputKeys.startUrls.requestsFromUrl: 'https://www.abc.dev/file2',
137+
ActorInputKeys.startUrls.method: 'GET',
138+
},
139+
]
146140

147141
http_client = HttpxHttpClient()
148142
with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
149-
generated_input = await Input.read(example_actor_input, http_client=http_client)
143+
request_list = await create_request_list(actor_start_urls_input, http_client=http_client)
150144
generated_requests = []
151-
while request := await generated_input.start_urls.fetch_next_request():
145+
while request := await request_list.fetch_next_request():
152146
generated_requests.append(request)
153147

154148
# Check correctly created requests' urls in request list
@@ -158,29 +152,28 @@ async def test_actor_create_request_list_from_url() -> None:
158152
async def test_actor_create_request_list_from_url_additional_inputs() -> None:
159153
"""Test that all generated request properties are correctly populated from input values."""
160154
expected_simple_url = 'https://www.someurl.com'
161-
example_start_url_input = {
155+
example_start_url_input: dict[str, Any] = {
162156
ActorInputKeys.startUrls.requestsFromUrl: 'https://crawlee.dev/file.txt',
163157
ActorInputKeys.startUrls.method: 'POST',
164158
ActorInputKeys.startUrls.headers: {'key': 'value'},
165159
ActorInputKeys.startUrls.payload: 'some_payload',
166160
ActorInputKeys.startUrls.userData: {'another_key': 'another_value'},
167161
}
168-
example_actor_input: dict[str, Any] = {ActorInputKeys.startUrls: [example_start_url_input]}
162+
169163
response_bodies = iter((expected_simple_url,))
170164
http_client = HttpxHttpClient()
171165
with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
172-
generated_input = await Input.read(example_actor_input, http_client=http_client)
173-
request = await generated_input.start_urls.fetch_next_request()
166+
request_list = await create_request_list([example_start_url_input], http_client=http_client)
167+
request = await request_list.fetch_next_request()
174168

175169
# Check all properties correctly created for request
176-
example_start_url_input = example_actor_input[ActorInputKeys.startUrls][0]
177170
assert request
178171
assert request.url == expected_simple_url
179172
assert request.method == example_start_url_input[ActorInputKeys.startUrls.method]
180173
assert request.headers.root == example_start_url_input[ActorInputKeys.startUrls.headers]
181174
assert request.payload == str(example_start_url_input[ActorInputKeys.startUrls.payload]).encode('utf-8')
182175
expected_user_data = UserData()
183-
for key, value in example_actor_input[ActorInputKeys.startUrls][0][ActorInputKeys.startUrls.userData].items():
176+
for key, value in example_start_url_input[ActorInputKeys.startUrls.userData].items():
184177
expected_user_data[key] = value
185178
assert request.user_data == expected_user_data
186179

0 commit comments

Comments
 (0)