|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -import typing |
4 | 3 | from typing import TYPE_CHECKING |
5 | | -from unittest import mock |
6 | | -from unittest.mock import call |
7 | 4 |
|
8 | 5 | import httpx |
9 | 6 | import pytest |
10 | 7 |
|
11 | 8 | from apify_client import ApifyClientAsync |
12 | 9 | from apify_shared.consts import ApifyEnvVars |
13 | | -from crawlee._request import UserData |
14 | | -from crawlee._types import HttpHeaders, HttpMethod |
15 | | -from crawlee.http_clients import HttpResponse, HttpxHttpClient |
16 | 10 |
|
17 | 11 | from apify import Actor |
18 | 12 |
|
@@ -147,128 +141,3 @@ async def test_proxy_configuration_with_actor_proxy_input( |
147 | 141 | assert len(route.calls) == 2 |
148 | 142 |
|
149 | 143 | await Actor.exit() |
150 | | - |
151 | | - |
152 | | -@pytest.mark.parametrize('request_method', typing.get_args(HttpMethod)) |
153 | | -@pytest.mark.parametrize( |
154 | | - 'optional_input', |
155 | | - [ |
156 | | - {}, |
157 | | - {'payload': 'some payload', 'userData': {'some key': 'some value'}, 'headers': {'h1': 'v1', 'h2': 'v2'}}, |
158 | | - ], |
159 | | - ids=['minimal', 'all_options'], |
160 | | -) |
161 | | -async def test_actor_create_request_list_request_types( |
162 | | - request_method: HttpMethod, optional_input: dict[str, str] |
163 | | -) -> None: |
164 | | - """Test proper request list generation from both minimal and full inputs for all method types for simple input.""" |
165 | | - minimal_request_dict_input = {'url': 'https://www.abc.com', 'method': request_method} |
166 | | - request_dict_input = {**minimal_request_dict_input, **optional_input} |
167 | | - example_start_urls_input = [ |
168 | | - request_dict_input, |
169 | | - ] |
170 | | - |
171 | | - generated_request_list = await Actor.create_request_list(actor_start_urls_input=example_start_urls_input) |
172 | | - |
173 | | - assert not await generated_request_list.is_empty() |
174 | | - generated_request = await generated_request_list.fetch_next_request() |
175 | | - assert await generated_request_list.is_empty() |
176 | | - |
177 | | - assert generated_request.method == request_dict_input['method'] |
178 | | - assert generated_request.url == request_dict_input['url'] |
179 | | - assert generated_request.payload == request_dict_input.get('payload', '').encode('utf-8') |
180 | | - expected_user_data = UserData() |
181 | | - if 'userData' in optional_input: |
182 | | - for key, value in optional_input['userData'].items(): |
183 | | - expected_user_data[key] = value |
184 | | - assert generated_request.user_data == expected_user_data |
185 | | - expected_headers = HttpHeaders(root=optional_input.get('headers', {})) |
186 | | - assert generated_request.headers == expected_headers |
187 | | - |
188 | | - |
189 | | -def _create_dummy_response(read_output: typing.Iterable[str]) -> HttpResponse: |
190 | | - """Create dummy_response that will iterate through read_output when called like dummy_response.read()""" |
191 | | - |
192 | | - class DummyResponse(HttpResponse): |
193 | | - @property |
194 | | - def http_version(self) -> str: |
195 | | - return '' |
196 | | - |
197 | | - @property |
198 | | - def status_code(self) -> int: |
199 | | - return 200 |
200 | | - |
201 | | - @property |
202 | | - def headers(self) -> HttpHeaders: |
203 | | - return HttpHeaders() |
204 | | - |
205 | | - def read(self) -> bytes: |
206 | | - return next(read_output).encode('utf-8') |
207 | | - |
208 | | - return DummyResponse() |
209 | | - |
210 | | - |
211 | | -async def test_actor_create_request_list_from_url_correctly_send_requests() -> None: |
212 | | - """Test that injected HttpClient's method send_request is called with properly passed arguments.""" |
213 | | - |
214 | | - example_start_urls_input = [ |
215 | | - {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'}, |
216 | | - {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'PUT'}, |
217 | | - { |
218 | | - 'requestsFromUrl': 'https://www.something.som', |
219 | | - 'method': 'POST', |
220 | | - 'headers': {'key': 'value'}, |
221 | | - 'payload': 'some_payload', |
222 | | - 'userData': 'irrelevant', |
223 | | - }, |
224 | | - ] |
225 | | - mocked_read_outputs = ('' for url in example_start_urls_input) |
226 | | - http_client = HttpxHttpClient() |
227 | | - with mock.patch.object( |
228 | | - http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs) |
229 | | - ) as mocked_send_request: |
230 | | - await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=http_client) |
231 | | - |
232 | | - expected_calls = [ |
233 | | - call( |
234 | | - method=example_input['method'], |
235 | | - url=example_input['requestsFromUrl'], |
236 | | - headers=example_input.get('headers', {}), |
237 | | - payload=example_input.get('payload', '').encode('utf-8'), |
238 | | - ) |
239 | | - for example_input in example_start_urls_input |
240 | | - ] |
241 | | - mocked_send_request.assert_has_calls(expected_calls) |
242 | | - |
243 | | - |
244 | | -async def test_actor_create_request_list_from_url() -> None: |
245 | | - """Test that create_request_list is correctly reading urls from remote url sources and also from simple input.""" |
246 | | - expected_simple_url = 'https://www.someurl.com' |
247 | | - expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'} |
248 | | - expected_remote_urls_2 = {'http://www.ok.com', 'https://www.true-positive.com'} |
249 | | - expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url} |
250 | | - response_bodies = iter( |
251 | | - ( |
252 | | - 'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1), |
253 | | - 'some stuff{} more stuff{} www.falsepositive www.false_positive.com'.format(*expected_remote_urls_2), |
254 | | - ) |
255 | | - ) |
256 | | - |
257 | | - example_start_urls_input = [ |
258 | | - {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'}, |
259 | | - {'url': expected_simple_url, 'method': 'GET'}, |
260 | | - {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'GET'}, |
261 | | - ] |
262 | | - |
263 | | - http_client = HttpxHttpClient() |
264 | | - with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)): |
265 | | - generated_request_list = await Actor.create_request_list( |
266 | | - actor_start_urls_input=example_start_urls_input, http_client=http_client |
267 | | - ) |
268 | | - generated_requests = [] |
269 | | - while request := await generated_request_list.fetch_next_request(): |
270 | | - print(request) |
271 | | - generated_requests.append(request) |
272 | | - |
273 | | - # Check correctly created requests' urls in request list |
274 | | - assert {generated_request.url for generated_request in generated_requests} == expected_urls |
0 commit comments