|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 |
| -import typing |
4 | 3 | from typing import TYPE_CHECKING
|
5 |
| -from unittest import mock |
6 |
| -from unittest.mock import call |
7 | 4 |
|
8 | 5 | import httpx
|
9 | 6 | import pytest
|
10 | 7 |
|
11 | 8 | from apify_client import ApifyClientAsync
|
12 | 9 | from apify_shared.consts import ApifyEnvVars
|
13 |
| -from crawlee._request import UserData |
14 |
| -from crawlee._types import HttpHeaders, HttpMethod |
15 |
| -from crawlee.http_clients import HttpResponse, HttpxHttpClient |
16 | 10 |
|
17 | 11 | from apify import Actor
|
18 | 12 |
|
@@ -147,128 +141,3 @@ async def test_proxy_configuration_with_actor_proxy_input(
|
147 | 141 | assert len(route.calls) == 2
|
148 | 142 |
|
149 | 143 | await Actor.exit()
|
150 |
| - |
151 |
| - |
152 |
| -@pytest.mark.parametrize('request_method', typing.get_args(HttpMethod)) |
153 |
| -@pytest.mark.parametrize( |
154 |
| - 'optional_input', |
155 |
| - [ |
156 |
| - {}, |
157 |
| - {'payload': 'some payload', 'userData': {'some key': 'some value'}, 'headers': {'h1': 'v1', 'h2': 'v2'}}, |
158 |
| - ], |
159 |
| - ids=['minimal', 'all_options'], |
160 |
| -) |
161 |
| -async def test_actor_create_request_list_request_types( |
162 |
| - request_method: HttpMethod, optional_input: dict[str, str] |
163 |
| -) -> None: |
164 |
| - """Test proper request list generation from both minimal and full inputs for all method types for simple input.""" |
165 |
| - minimal_request_dict_input = {'url': 'https://www.abc.com', 'method': request_method} |
166 |
| - request_dict_input = {**minimal_request_dict_input, **optional_input} |
167 |
| - example_start_urls_input = [ |
168 |
| - request_dict_input, |
169 |
| - ] |
170 |
| - |
171 |
| - generated_request_list = await Actor.create_request_list(actor_start_urls_input=example_start_urls_input) |
172 |
| - |
173 |
| - assert not await generated_request_list.is_empty() |
174 |
| - generated_request = await generated_request_list.fetch_next_request() |
175 |
| - assert await generated_request_list.is_empty() |
176 |
| - |
177 |
| - assert generated_request.method == request_dict_input['method'] |
178 |
| - assert generated_request.url == request_dict_input['url'] |
179 |
| - assert generated_request.payload == request_dict_input.get('payload', '').encode('utf-8') |
180 |
| - expected_user_data = UserData() |
181 |
| - if 'userData' in optional_input: |
182 |
| - for key, value in optional_input['userData'].items(): |
183 |
| - expected_user_data[key] = value |
184 |
| - assert generated_request.user_data == expected_user_data |
185 |
| - expected_headers = HttpHeaders(root=optional_input.get('headers', {})) |
186 |
| - assert generated_request.headers == expected_headers |
187 |
| - |
188 |
| - |
189 |
| -def _create_dummy_response(read_output: typing.Iterable[str]) -> HttpResponse: |
190 |
| - """Create dummy_response that will iterate through read_output when called like dummy_response.read()""" |
191 |
| - |
192 |
| - class DummyResponse(HttpResponse): |
193 |
| - @property |
194 |
| - def http_version(self) -> str: |
195 |
| - return '' |
196 |
| - |
197 |
| - @property |
198 |
| - def status_code(self) -> int: |
199 |
| - return 200 |
200 |
| - |
201 |
| - @property |
202 |
| - def headers(self) -> HttpHeaders: |
203 |
| - return HttpHeaders() |
204 |
| - |
205 |
| - def read(self) -> bytes: |
206 |
| - return next(read_output).encode('utf-8') |
207 |
| - |
208 |
| - return DummyResponse() |
209 |
| - |
210 |
| - |
211 |
| -async def test_actor_create_request_list_from_url_correctly_send_requests() -> None: |
212 |
| - """Test that injected HttpClient's method send_request is called with properly passed arguments.""" |
213 |
| - |
214 |
| - example_start_urls_input = [ |
215 |
| - {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'}, |
216 |
| - {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'PUT'}, |
217 |
| - { |
218 |
| - 'requestsFromUrl': 'https://www.something.som', |
219 |
| - 'method': 'POST', |
220 |
| - 'headers': {'key': 'value'}, |
221 |
| - 'payload': 'some_payload', |
222 |
| - 'userData': 'irrelevant', |
223 |
| - }, |
224 |
| - ] |
225 |
| - mocked_read_outputs = ('' for url in example_start_urls_input) |
226 |
| - http_client = HttpxHttpClient() |
227 |
| - with mock.patch.object( |
228 |
| - http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs) |
229 |
| - ) as mocked_send_request: |
230 |
| - await Actor.create_request_list(actor_start_urls_input=example_start_urls_input, http_client=http_client) |
231 |
| - |
232 |
| - expected_calls = [ |
233 |
| - call( |
234 |
| - method=example_input['method'], |
235 |
| - url=example_input['requestsFromUrl'], |
236 |
| - headers=example_input.get('headers', {}), |
237 |
| - payload=example_input.get('payload', '').encode('utf-8'), |
238 |
| - ) |
239 |
| - for example_input in example_start_urls_input |
240 |
| - ] |
241 |
| - mocked_send_request.assert_has_calls(expected_calls) |
242 |
| - |
243 |
| - |
244 |
| -async def test_actor_create_request_list_from_url() -> None: |
245 |
| - """Test that create_request_list is correctly reading urls from remote url sources and also from simple input.""" |
246 |
| - expected_simple_url = 'https://www.someurl.com' |
247 |
| - expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'} |
248 |
| - expected_remote_urls_2 = {'http://www.ok.com', 'https://www.true-positive.com'} |
249 |
| - expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url} |
250 |
| - response_bodies = iter( |
251 |
| - ( |
252 |
| - 'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1), |
253 |
| - 'some stuff{} more stuff{} www.falsepositive www.false_positive.com'.format(*expected_remote_urls_2), |
254 |
| - ) |
255 |
| - ) |
256 |
| - |
257 |
| - example_start_urls_input = [ |
258 |
| - {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'}, |
259 |
| - {'url': expected_simple_url, 'method': 'GET'}, |
260 |
| - {'requestsFromUrl': 'https://www.crawlee.dev/file2', 'method': 'GET'}, |
261 |
| - ] |
262 |
| - |
263 |
| - http_client = HttpxHttpClient() |
264 |
| - with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)): |
265 |
| - generated_request_list = await Actor.create_request_list( |
266 |
| - actor_start_urls_input=example_start_urls_input, http_client=http_client |
267 |
| - ) |
268 |
| - generated_requests = [] |
269 |
| - while request := await generated_request_list.fetch_next_request(): |
270 |
| - print(request) |
271 |
| - generated_requests.append(request) |
272 |
| - |
273 |
| - # Check correctly created requests' urls in request list |
274 |
| - assert {generated_request.url for generated_request in generated_requests} == expected_urls |
0 commit comments