Skip to content

Commit 234b6fb

Browse files
committed
Add tests fro regexp.
Add test for checking all genrated request properties.
1 parent 0623ade commit 234b6fb

File tree

2 files changed

+69
-6
lines changed

2 files changed

+69
-6
lines changed

src/apify/_actor_inputs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import asyncio
44
import re
55
from asyncio import Task
6+
from functools import partial
67
from typing import Any
78

8-
from functools import partial
99
from pydantic import BaseModel, Field
1010

1111
from crawlee import Request

tests/unit/actor/test_actor_create_request_list.py

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import re
34
import typing
45
from unittest import mock
56
from unittest.mock import call
@@ -11,6 +12,7 @@
1112
from crawlee.http_clients import HttpResponse, HttpxHttpClient
1213

1314
from apify import Actor
15+
from apify._actor_inputs import URL_NO_COMMAS_REGEX
1416

1517

1618
@pytest.mark.parametrize('request_method', typing.get_args(HttpMethod))
@@ -47,8 +49,7 @@ async def test_actor_create_request_list_request_types(
4749
for key, value in optional_input['user_data'].items():
4850
expected_user_data[key] = value
4951
assert generated_request.user_data == expected_user_data
50-
expected_headers = HttpHeaders(root=optional_input.get('headers', {}))
51-
assert generated_request.headers == expected_headers
52+
assert generated_request.headers.root == optional_input.get('headers', {})
5253

5354

5455
def _create_dummy_response(read_output: typing.Iterator[str]) -> HttpResponse:
@@ -130,12 +131,74 @@ async def test_actor_create_request_list_from_url() -> None:
130131
)
131132
generated_requests = []
132133
while request := await generated_request_list.fetch_next_request():
133-
print(request)
134134
generated_requests.append(request)
135135

136136
# Check correctly created requests' urls in request list
137137
assert {generated_request.url for generated_request in generated_requests} == expected_urls
138138

139139
async def test_actor_create_request_list_from_url_additional_inputs() -> None:
140-
assert False
141-
# TODO test that will check that additional properties, like payload, headers request type are all properly passed.
140+
"""Test that all generated request properties are correctly populated from input values."""
141+
expected_simple_url = 'https://www.someurl.com'
142+
example_start_urls_input = [
143+
{'requests_from_url': 'https://crawlee.dev/file.txt', 'method': 'POST',
144+
'headers': {'key': 'value'},
145+
'payload': 'some_payload',
146+
'user_data': {'another_key': 'another_value'}},
147+
]
148+
response_bodies = iter((expected_simple_url,))
149+
http_client = HttpxHttpClient()
150+
with mock.patch.object(http_client, 'send_request', return_value=_create_dummy_response(response_bodies)):
151+
generated_request_list = await Actor.create_request_list(
152+
actor_start_urls_input=example_start_urls_input, http_client=http_client
153+
)
154+
request = await generated_request_list.fetch_next_request()
155+
156+
# Check all properties correctly created for request
157+
assert request.url == expected_simple_url
158+
assert request.method == example_start_urls_input[0]['method']
159+
assert request.headers.root == example_start_urls_input[0]['headers']
160+
assert request.payload == example_start_urls_input[0]['payload'].encode('utf-8')
161+
expected_user_data = UserData()
162+
for key, value in example_start_urls_input[0]['user_data'].items():
163+
expected_user_data[key] = value
164+
assert request.user_data == expected_user_data
165+
166+
167+
@pytest.mark.parametrize('true_positive', [
168+
'http://www.something.com',
169+
'https://www.something.net',
170+
'http://nowww.cz',
171+
'https://with-hypen.com',
172+
'http://number1.com',
173+
'http://www.number.123',
174+
'http://many.dots.com',
175+
'http://a.com',
176+
'http://www.something.com/somethignelse'
177+
'http://www.something.com/somethignelse.txt',
178+
# "http://non-english-chars-á.com" # re module not suitable, regex can do this with \p{L}. Do we want this?
179+
])
180+
def test_url_no_commas_regex_true_positives(true_positive: str) -> None:
181+
example_string= f'Some text {true_positive} some more text'
182+
matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
183+
assert len(matches) == 1
184+
assert matches[0].group(0) == true_positive
185+
186+
@pytest.mark.parametrize('false_positive',[
187+
'http://www.a',
188+
'http://a',
189+
'http://a.a',
190+
'http://123.456',
191+
'www.something.com',
192+
'http:www.something.com',
193+
])
194+
def test_url_no_commas_regex_false_positives(false_positive: str) -> None:
195+
example_string= f'Some text {false_positive} some more text'
196+
matches = list(re.findall(URL_NO_COMMAS_REGEX, example_string))
197+
assert len(matches) == 0
198+
199+
def test_url_no_commas_regex_multi_line() -> None:
200+
true_positives = ('http://www.something.com', 'http://www.else.com')
201+
example_string= 'Some text {} some more text \n Some new line text {} ...'.format(*true_positives)
202+
matches = list(re.finditer(URL_NO_COMMAS_REGEX, example_string))
203+
assert len(matches) == 2
204+
assert {match.group(0) for match in matches} == set(true_positives)

0 commit comments

Comments
 (0)