1919 'optional_input' ,
2020 [
2121 {},
22- {ActorInputKeys .startUrls .payload : 'some payload' , ActorInputKeys .startUrls .userData :
23- {'some key' : 'some value' }, ActorInputKeys .startUrls .headers : {'h1' : 'v1' , 'h2' : 'v2' }},
22+ {
23+ ActorInputKeys .startUrls .payload : 'some payload' ,
24+ ActorInputKeys .startUrls .userData : {'some key' : 'some value' },
25+ ActorInputKeys .startUrls .headers : {'h1' : 'v1' , 'h2' : 'v2' },
26+ },
2427 ],
2528 ids = ['minimal' , 'all_options' ],
2629)
2730async def test_actor_create_request_list_request_types (
2831 request_method : HttpMethod , optional_input : dict [str , Any ]
2932) -> None :
3033 """Test proper request list generation from both minimal and full inputs for all method types for simple input."""
31- minimal_request_dict_input = {ActorInputKeys .startUrls .url : 'https://www.abc.com' ,
32- ActorInputKeys .startUrls .method : request_method }
34+ minimal_request_dict_input = {
35+ ActorInputKeys .startUrls .url : 'https://www.abc.com' ,
36+ ActorInputKeys .startUrls .method : request_method ,
37+ }
3338 request_dict_input = {** minimal_request_dict_input , ** optional_input }
3439 example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls : [request_dict_input ]}
3540
@@ -75,17 +80,25 @@ def read(self) -> bytes:
7580
7681async def test_actor_create_request_list_from_url_correctly_send_requests () -> None :
7782 """Test that injected HttpClient's method send_request is called with properly passed arguments."""
78- example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls : [
79- {ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' , ActorInputKeys .startUrls .method : 'GET' },
80- {ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' , ActorInputKeys .startUrls .method : 'PUT' },
81- {
82- ActorInputKeys .startUrls .requestsFromUrl : 'https://www.something.som' ,
83- ActorInputKeys .startUrls .method : 'POST' ,
84- ActorInputKeys .startUrls .headers : {'key' : 'value' },
85- ActorInputKeys .startUrls .payload : 'some_payload' ,
86- ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
87- },
88- ]}
83+ example_actor_input : dict [str , Any ] = {
84+ ActorInputKeys .startUrls : [
85+ {
86+ ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
87+ ActorInputKeys .startUrls .method : 'GET' ,
88+ },
89+ {
90+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
91+ ActorInputKeys .startUrls .method : 'PUT' ,
92+ },
93+ {
94+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.something.som' ,
95+ ActorInputKeys .startUrls .method : 'POST' ,
96+ ActorInputKeys .startUrls .headers : {'key' : 'value' },
97+ ActorInputKeys .startUrls .payload : 'some_payload' ,
98+ ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
99+ },
100+ ]
101+ }
89102
90103 mocked_read_outputs = ('' for url in example_actor_input [ActorInputKeys .startUrls ])
91104 http_client = HttpxHttpClient ()
@@ -117,11 +130,19 @@ async def test_actor_create_request_list_from_url() -> None:
117130 )
118131 )
119132
120- example_actor_input :dict [str , Any ] = {ActorInputKeys .startUrls :[
121- {ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' , ActorInputKeys .startUrls .method : 'GET' },
122- {ActorInputKeys .startUrls .url : expected_simple_url , ActorInputKeys .startUrls .method : 'GET' },
123- {ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' , ActorInputKeys .startUrls .method : 'GET' },
124- ]}
133+ example_actor_input : dict [str , Any ] = {
134+ ActorInputKeys .startUrls : [
135+ {
136+ ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
137+ ActorInputKeys .startUrls .method : 'GET' ,
138+ },
139+ {ActorInputKeys .startUrls .url : expected_simple_url , ActorInputKeys .startUrls .method : 'GET' },
140+ {
141+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
142+ ActorInputKeys .startUrls .method : 'GET' ,
143+ },
144+ ]
145+ }
125146
126147 http_client = HttpxHttpClient ()
127148 with mock .patch .object (http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
@@ -133,16 +154,18 @@ async def test_actor_create_request_list_from_url() -> None:
133154 # Check correctly created requests' urls in request list
134155 assert {generated_request .url for generated_request in generated_requests } == expected_urls
135156
136- async def test_actor_create_request_list_from_url_additional_inputs () -> None :
157+
158+ async def test_actor_create_request_list_from_url_additional_inputs () -> None :
137159 """Test that all generated request properties are correctly populated from input values."""
138160 expected_simple_url = 'https://www.someurl.com'
139161 example_start_url_input = {
140162 ActorInputKeys .startUrls .requestsFromUrl : 'https://crawlee.dev/file.txt' ,
141163 ActorInputKeys .startUrls .method : 'POST' ,
142164 ActorInputKeys .startUrls .headers : {'key' : 'value' },
143165 ActorInputKeys .startUrls .payload : 'some_payload' ,
144- ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' }}
145- example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls :[example_start_url_input ]}
166+ ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
167+ }
168+ example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls : [example_start_url_input ]}
146169 response_bodies = iter ((expected_simple_url ,))
147170 http_client = HttpxHttpClient ()
148171 with mock .patch .object (http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
@@ -162,43 +185,50 @@ async def test_actor_create_request_list_from_url_additional_inputs() -> None:
162185 assert request .user_data == expected_user_data
163186
164187
165- @pytest .mark .parametrize ('true_positive' , [
166- 'http://www.something.com' ,
167- 'https://www.something.net' ,
168- 'http://nowww.cz' ,
169- 'https://with-hypen.com' ,
170- 'http://number1.com' ,
171- 'http://www.number.123.abc' ,
172- 'http://many.dots.com' ,
173- 'http://a.com' ,
174- 'http://www.something.com/somethignelse'
175- 'http://www.something.com/somethignelse.txt' ,
176- 'http://non-english-chars-áíéåü.com' ,
177- 'http://www.port.com:1234' ,
178- 'http://username:[email protected] ' 179- ])
188+ @pytest .mark .parametrize (
189+ 'true_positive' ,
190+ [
191+ 'http://www.something.com' ,
192+ 'https://www.something.net' ,
193+ 'http://nowww.cz' ,
194+ 'https://with-hypen.com' ,
195+ 'http://number1.com' ,
196+ 'http://www.number.123.abc' ,
197+ 'http://many.dots.com' ,
198+ 'http://a.com' ,
199+ 'http://www.something.com/somethignelse' 'http://www.something.com/somethignelse.txt' ,
200+ 'http://non-english-chars-áíéåü.com' ,
201+ 'http://www.port.com:1234' ,
202+ 'http://username:[email protected] ' , 203+ ],
204+ )
180205def test_url_no_commas_regex_true_positives (true_positive : str ) -> None :
181- example_string = f'Some text { true_positive } some more text'
206+ example_string = f'Some text { true_positive } some more text'
182207 matches = list (re .finditer (URL_NO_COMMAS_REGEX , example_string ))
183208 assert len (matches ) == 1
184209 assert matches [0 ].group (0 ) == true_positive
185210
186- @pytest .mark .parametrize ('false_positive' ,[
187- 'http://www.a' ,
188- 'http://a' ,
189- 'http://a.a' ,
190- 'http://123.456' ,
191- 'www.something.com' ,
192- 'http:www.something.com' ,
193- ])
211+
212+ @pytest .mark .parametrize (
213+ 'false_positive' ,
214+ [
215+ 'http://www.a' ,
216+ 'http://a' ,
217+ 'http://a.a' ,
218+ 'http://123.456' ,
219+ 'www.something.com' ,
220+ 'http:www.something.com' ,
221+ ],
222+ )
194223def test_url_no_commas_regex_false_positives (false_positive : str ) -> None :
195- example_string = f'Some text { false_positive } some more text'
224+ example_string = f'Some text { false_positive } some more text'
196225 matches = list (re .findall (URL_NO_COMMAS_REGEX , example_string ))
197226 assert len (matches ) == 0
198227
228+
199229def test_url_no_commas_regex_multi_line () -> None :
200230 true_positives = ('http://www.something.com' , 'http://www.else.com' )
201- example_string = 'Some text {} some more text \n Some new line text {} ...' .format (* true_positives )
231+ example_string = 'Some text {} some more text \n Some new line text {} ...' .format (* true_positives )
202232 matches = list (re .finditer (URL_NO_COMMAS_REGEX , example_string ))
203233 assert len (matches ) == 2
204234 assert {match .group (0 ) for match in matches } == set (true_positives )
0 commit comments