11
11
from crawlee ._types import HttpHeaders , HttpMethod
12
12
from crawlee .http_clients import HttpResponse , HttpxHttpClient
13
13
14
- from apify .storages ._actor_inputs import URL_NO_COMMAS_REGEX , ActorInputKeys , Input
14
+ from apify .storages ._actor_inputs import URL_NO_COMMAS_REGEX , ActorInputKeys , create_request_list
15
15
16
16
17
17
@pytest .mark .parametrize ('request_method' , get_args (HttpMethod ))
@@ -36,24 +36,22 @@ async def test_actor_create_request_list_request_types(
36
36
ActorInputKeys .startUrls .method : request_method ,
37
37
}
38
38
request_dict_input = {** minimal_request_dict_input , ** optional_input }
39
- example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls : [request_dict_input ]}
40
39
41
- generated_input = await Input .read (example_actor_input )
40
+ request_list = await create_request_list ([request_dict_input ])
41
+ assert not await request_list .is_empty ()
42
+ request = await request_list .fetch_next_request ()
43
+ assert request is not None
44
+ assert await request_list .is_empty ()
42
45
43
- assert not await generated_input .start_urls .is_empty ()
44
- generated_request = await generated_input .start_urls .fetch_next_request ()
45
- assert generated_request is not None
46
- assert await generated_input .start_urls .is_empty ()
47
-
48
- assert generated_request .method == request_dict_input [ActorInputKeys .startUrls .method ]
49
- assert generated_request .url == request_dict_input [ActorInputKeys .startUrls .url ]
50
- assert generated_request .payload == request_dict_input .get (ActorInputKeys .startUrls .payload , '' ).encode ('utf-8' )
46
+ assert request .method == request_dict_input [ActorInputKeys .startUrls .method ]
47
+ assert request .url == request_dict_input [ActorInputKeys .startUrls .url ]
48
+ assert request .payload == request_dict_input .get (ActorInputKeys .startUrls .payload , '' ).encode ('utf-8' )
51
49
expected_user_data = UserData ()
52
50
if ActorInputKeys .startUrls .userData in optional_input :
53
51
for key , value in optional_input [ActorInputKeys .startUrls .userData ].items ():
54
52
expected_user_data [key ] = value
55
- assert generated_request .user_data == expected_user_data
56
- assert generated_request .headers .root == optional_input .get (ActorInputKeys .startUrls .headers , {})
53
+ assert request .user_data == expected_user_data
54
+ assert request .headers .root == optional_input .get (ActorInputKeys .startUrls .headers , {})
57
55
58
56
59
57
def _create_dummy_response (read_output : Iterator [str ]) -> HttpResponse :
@@ -80,39 +78,37 @@ def read(self) -> bytes:
80
78
81
79
async def test_actor_create_request_list_from_url_correctly_send_requests () -> None :
82
80
"""Test that injected HttpClient's method send_request is called with properly passed arguments."""
83
- example_actor_input : dict [str , Any ] = {
84
- ActorInputKeys .startUrls : [
85
- {
86
- ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
87
- ActorInputKeys .startUrls .method : 'GET' ,
88
- },
89
- {
90
- ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
91
- ActorInputKeys .startUrls .method : 'PUT' ,
92
- },
93
- {
94
- ActorInputKeys .startUrls .requestsFromUrl : 'https://www.something.som' ,
95
- ActorInputKeys .startUrls .method : 'POST' ,
96
- ActorInputKeys .startUrls .headers : {'key' : 'value' },
97
- ActorInputKeys .startUrls .payload : 'some_payload' ,
98
- ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
99
- },
100
- ]
101
- }
81
+ actor_start_urls_input : list [dict [str , Any ]] = [
82
+ {
83
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
84
+ ActorInputKeys .startUrls .method : 'GET' ,
85
+ },
86
+ {
87
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
88
+ ActorInputKeys .startUrls .method : 'PUT' ,
89
+ },
90
+ {
91
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.something.som' ,
92
+ ActorInputKeys .startUrls .method : 'POST' ,
93
+ ActorInputKeys .startUrls .headers : {'key' : 'value' },
94
+ ActorInputKeys .startUrls .payload : 'some_payload' ,
95
+ ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
96
+ },
97
+ ]
102
98
103
- mocked_read_outputs = ('' for url in example_actor_input [ ActorInputKeys . startUrls ] )
99
+ mocked_read_outputs = ('' for url in actor_start_urls_input )
104
100
http_client = HttpxHttpClient ()
105
101
with mock .patch .object (
106
102
http_client , 'send_request' , return_value = _create_dummy_response (mocked_read_outputs )
107
103
) as mocked_send_request :
108
- await Input . read ( example_actor_input , http_client = http_client )
104
+ await create_request_list ( actor_start_urls_input , http_client = http_client )
109
105
110
106
expected_calls = [
111
107
call (
112
108
method = 'GET' ,
113
109
url = example_input [ActorInputKeys .startUrls .requestsFromUrl ],
114
110
)
115
- for example_input in example_actor_input [ ActorInputKeys . startUrls ]
111
+ for example_input in actor_start_urls_input
116
112
]
117
113
mocked_send_request .assert_has_calls (expected_calls )
118
114
@@ -130,25 +126,23 @@ async def test_actor_create_request_list_from_url() -> None:
130
126
)
131
127
)
132
128
133
- example_actor_input : dict [str , Any ] = {
134
- ActorInputKeys .startUrls : [
135
- {
136
- ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
137
- ActorInputKeys .startUrls .method : 'GET' ,
138
- },
139
- {ActorInputKeys .startUrls .url : expected_simple_url , ActorInputKeys .startUrls .method : 'GET' },
140
- {
141
- ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
142
- ActorInputKeys .startUrls .method : 'GET' ,
143
- },
144
- ]
145
- }
129
+ actor_start_urls_input = [
130
+ {
131
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
132
+ ActorInputKeys .startUrls .method : 'GET' ,
133
+ },
134
+ {ActorInputKeys .startUrls .url : expected_simple_url , ActorInputKeys .startUrls .method : 'GET' },
135
+ {
136
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
137
+ ActorInputKeys .startUrls .method : 'GET' ,
138
+ },
139
+ ]
146
140
147
141
http_client = HttpxHttpClient ()
148
142
with mock .patch .object (http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
149
- generated_input = await Input . read ( example_actor_input , http_client = http_client )
143
+ request_list = await create_request_list ( actor_start_urls_input , http_client = http_client )
150
144
generated_requests = []
151
- while request := await generated_input . start_urls .fetch_next_request ():
145
+ while request := await request_list .fetch_next_request ():
152
146
generated_requests .append (request )
153
147
154
148
# Check correctly created requests' urls in request list
@@ -158,29 +152,28 @@ async def test_actor_create_request_list_from_url() -> None:
158
152
async def test_actor_create_request_list_from_url_additional_inputs () -> None :
159
153
"""Test that all generated request properties are correctly populated from input values."""
160
154
expected_simple_url = 'https://www.someurl.com'
161
- example_start_url_input = {
155
+ example_start_url_input : dict [ str , Any ] = {
162
156
ActorInputKeys .startUrls .requestsFromUrl : 'https://crawlee.dev/file.txt' ,
163
157
ActorInputKeys .startUrls .method : 'POST' ,
164
158
ActorInputKeys .startUrls .headers : {'key' : 'value' },
165
159
ActorInputKeys .startUrls .payload : 'some_payload' ,
166
160
ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
167
161
}
168
- example_actor_input : dict [ str , Any ] = { ActorInputKeys . startUrls : [ example_start_url_input ]}
162
+
169
163
response_bodies = iter ((expected_simple_url ,))
170
164
http_client = HttpxHttpClient ()
171
165
with mock .patch .object (http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
172
- generated_input = await Input . read ( example_actor_input , http_client = http_client )
173
- request = await generated_input . start_urls .fetch_next_request ()
166
+ request_list = await create_request_list ([ example_start_url_input ] , http_client = http_client )
167
+ request = await request_list .fetch_next_request ()
174
168
175
169
# Check all properties correctly created for request
176
- example_start_url_input = example_actor_input [ActorInputKeys .startUrls ][0 ]
177
170
assert request
178
171
assert request .url == expected_simple_url
179
172
assert request .method == example_start_url_input [ActorInputKeys .startUrls .method ]
180
173
assert request .headers .root == example_start_url_input [ActorInputKeys .startUrls .headers ]
181
174
assert request .payload == str (example_start_url_input [ActorInputKeys .startUrls .payload ]).encode ('utf-8' )
182
175
expected_user_data = UserData ()
183
- for key , value in example_actor_input [ ActorInputKeys . startUrls ][ 0 ] [ActorInputKeys .startUrls .userData ].items ():
176
+ for key , value in example_start_url_input [ActorInputKeys .startUrls .userData ].items ():
184
177
expected_user_data [key ] = value
185
178
assert request .user_data == expected_user_data
186
179
0 commit comments