Skip to content

Commit 318c9c8

Browse files
committed
Addresing review comments 2
1 parent 3f33145 commit 318c9c8

File tree

2 files changed

+62
-78
lines changed

2 files changed

+62
-78
lines changed

src/apify/storages/_request_list.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,25 @@ async def open(
5151
) -> RequestList:
5252
"""Creates RequestList from Actor input requestListSources.
5353
54-
name is name of the returned RequestList
55-
request_list_sources_input can contain list dicts with either url or requestsFromUrl key
56-
http_client is client that will be used to send get request to url defined in requestsFromUrl
57-
58-
Example request_list_sources_input:
59-
[
60-
# Gather urls from response body.
61-
{'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
62-
# Directly include this url.
63-
{'url': 'https://crawlee.dev', 'method': 'GET'}
64-
]
54+
Args:
55+
name: Name of the returned RequestList.
56+
request_list_sources_input: List of dicts with either url key or requestsFromUrl key.
57+
http_client: Client that will be used to send get request to urls defined by value of requestsFromUrl keys.
58+
59+
Returns:
60+
RequestList created from request_list_sources_input.
61+
62+
### Usage
63+
64+
```python
65+
example_input = [
66+
# Gather urls from response body.
67+
{'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
68+
# Directly include this url.
69+
{'url': 'https://crawlee.dev', 'method': 'GET'}
70+
]
71+
request_list = await RequestList.open(request_list_sources_input=example_input)
72+
```
6573
"""
6674
request_list_sources_input = request_list_sources_input or []
6775
return await RequestList._create_request_list(name, request_list_sources_input, http_client)
@@ -73,7 +81,7 @@ async def _create_request_list(
7381
if not http_client:
7482
http_client = HttpxHttpClient()
7583

76-
ulr_inputs = url_input_adapter.validate_python(request_list_sources_input) # instance of list[Union[...]]
84+
ulr_inputs = url_input_adapter.validate_python(request_list_sources_input)
7785

7886
simple_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _SimpleUrlInput]
7987
remote_url_inputs = [url_input for url_input in ulr_inputs if type(url_input) is _RequestsFromUrlInput]

tests/unit/actor/test_request_list.py

Lines changed: 42 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
from __future__ import annotations
22

33
import re
4-
from typing import Any, Iterator, get_args
5-
from unittest import mock
6-
from unittest.mock import call
4+
from dataclasses import dataclass
5+
from typing import Any, get_args
76

87
import pytest
8+
import respx
9+
from httpx import Response
910

1011
from crawlee._request import UserData
11-
from crawlee._types import HttpHeaders, HttpMethod
12-
from crawlee.http_clients import HttpResponse, HttpxHttpClient
12+
from crawlee._types import HttpMethod
1313

1414
from apify.storages._request_list import URL_NO_COMMAS_REGEX, RequestList
1515

@@ -52,30 +52,9 @@ async def test_request_list_open_request_types(request_method: HttpMethod, optio
5252
assert request.headers.root == optional_input.get('headers', {})
5353

5454

55-
def _create_dummy_response(read_output: Iterator[str]) -> HttpResponse:
56-
"""Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
57-
58-
class DummyResponse(HttpResponse):
59-
@property
60-
def http_version(self) -> str:
61-
return ''
62-
63-
@property
64-
def status_code(self) -> int:
65-
return 200
66-
67-
@property
68-
def headers(self) -> HttpHeaders:
69-
return HttpHeaders()
70-
71-
def read(self) -> bytes:
72-
return next(read_output).encode('utf-8')
73-
74-
return DummyResponse()
75-
76-
77-
async def test__request_list_open_from_url_correctly_send_requests() -> None:
78-
"""Test that injected HttpClient's method send_request is called with properly passed arguments."""
55+
@respx.mock
56+
async def test_request_list_open_from_url_correctly_send_requests() -> None:
57+
"""Test that requests are sent to expected urls."""
7958
request_list_sources_input: list[dict[str, Any]] = [
8059
{
8160
'requestsFromUrl': 'https://abc.dev/file.txt',
@@ -94,65 +73,65 @@ async def test__request_list_open_from_url_correctly_send_requests() -> None:
9473
},
9574
]
9675

97-
mocked_read_outputs = ('' for url in request_list_sources_input)
76+
routes = [respx.get(entry['requestsFromUrl']) for entry in request_list_sources_input]
9877

99-
mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
100-
with mock.patch.object(
101-
mocked_http_client, 'send_request', return_value=_create_dummy_response(mocked_read_outputs)
102-
) as mocked_send_request:
103-
await RequestList.open(request_list_sources_input=request_list_sources_input, http_client=mocked_http_client)
78+
await RequestList.open(request_list_sources_input=request_list_sources_input)
10479

105-
expected_calls = [
106-
call(
107-
method='GET',
108-
url=example_input['requestsFromUrl'],
109-
)
110-
for example_input in request_list_sources_input
111-
]
112-
mocked_send_request.assert_has_calls(expected_calls)
80+
for route in routes:
81+
assert route.called
11382

11483

84+
@respx.mock
11585
async def test_request_list_open_from_url() -> None:
11686
"""Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
11787
expected_simple_url = 'https://www.someurl.com'
11888
expected_remote_urls_1 = {'http://www.something.com', 'https://www.somethingelse.com', 'http://www.bla.net'}
11989
expected_remote_urls_2 = {'http://www.ok.com', 'https://www.true-positive.com'}
12090
expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url}
121-
response_bodies = iter(
122-
(
91+
92+
@dataclass
93+
class MockedUrlInfo:
94+
url: str
95+
response_text: str
96+
97+
mocked_urls = (
98+
MockedUrlInfo(
99+
'https://abc.dev/file.txt',
123100
'blablabla{} more blablabla{} , even more blablabla. {} '.format(*expected_remote_urls_1),
124-
'some stuff{} more stuff{} www.falsepositive www.false_positive.com'.format(*expected_remote_urls_2),
125-
)
101+
),
102+
MockedUrlInfo(
103+
'https://www.abc.dev/file2',
104+
'some stuff{} more stuff{} www.false_positive.com'.format(*expected_remote_urls_2),
105+
),
126106
)
127107

128108
request_list_sources_input = [
129109
{
130-
'requestsFromUrl': 'https://abc.dev/file.txt',
110+
'requestsFromUrl': mocked_urls[0].url,
131111
'method': 'GET',
132112
},
133113
{'url': expected_simple_url, 'method': 'GET'},
134114
{
135-
'requestsFromUrl': 'https://www.abc.dev/file2',
115+
'requestsFromUrl': mocked_urls[1].url,
136116
'method': 'GET',
137117
},
138118
]
119+
for mocked_url in mocked_urls:
120+
respx.get(mocked_url.url).mock(return_value=Response(200, text=mocked_url.response_text))
139121

140-
mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
141-
with mock.patch.object(mocked_http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
142-
request_list = await RequestList.open(
143-
request_list_sources_input=request_list_sources_input, http_client=mocked_http_client
144-
)
145-
generated_requests = []
146-
while request := await request_list.fetch_next_request():
147-
generated_requests.append(request)
122+
request_list = await RequestList.open(request_list_sources_input=request_list_sources_input)
123+
generated_requests = []
124+
while request := await request_list.fetch_next_request():
125+
generated_requests.append(request)
148126

149127
# Check correctly created requests' urls in request list
150128
assert {generated_request.url for generated_request in generated_requests} == expected_urls
151129

152130

131+
@respx.mock
153132
async def test_request_list_open_from_url_additional_inputs() -> None:
154133
"""Test that all generated request properties are correctly populated from input values."""
155-
expected_simple_url = 'https://www.someurl.com'
134+
expected_url = 'https://www.someurl.com'
156135
example_start_url_input: dict[str, Any] = {
157136
'requestsFromUrl': 'https://crawlee.dev/file.txt',
158137
'method': 'POST',
@@ -161,17 +140,14 @@ async def test_request_list_open_from_url_additional_inputs() -> None:
161140
'userData': {'another_key': 'another_value'},
162141
}
163142

164-
response_bodies = iter((expected_simple_url,))
165-
mocked_http_client = mock.Mock(spec_set=HttpxHttpClient)
166-
with mock.patch.object(mocked_http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
167-
request_list = await RequestList.open(
168-
request_list_sources_input=[example_start_url_input], http_client=mocked_http_client
169-
)
170-
request = await request_list.fetch_next_request()
143+
respx.get(example_start_url_input['requestsFromUrl']).mock(return_value=Response(200, text=expected_url))
144+
145+
request_list = await RequestList.open(request_list_sources_input=[example_start_url_input])
146+
request = await request_list.fetch_next_request()
171147

172148
# Check all properties correctly created for request
173149
assert request
174-
assert request.url == expected_simple_url
150+
assert request.url == expected_url
175151
assert request.method == example_start_url_input['method']
176152
assert request.headers.root == example_start_url_input['headers']
177153
assert request.payload == str(example_start_url_input['payload']).encode('utf-8')

0 commit comments

Comments
 (0)