19
19
'optional_input' ,
20
20
[
21
21
{},
22
- {ActorInputKeys .startUrls .payload : 'some payload' , ActorInputKeys .startUrls .userData :
23
- {'some key' : 'some value' }, ActorInputKeys .startUrls .headers : {'h1' : 'v1' , 'h2' : 'v2' }},
22
+ {
23
+ ActorInputKeys .startUrls .payload : 'some payload' ,
24
+ ActorInputKeys .startUrls .userData : {'some key' : 'some value' },
25
+ ActorInputKeys .startUrls .headers : {'h1' : 'v1' , 'h2' : 'v2' },
26
+ },
24
27
],
25
28
ids = ['minimal' , 'all_options' ],
26
29
)
27
30
async def test_actor_create_request_list_request_types (
28
31
request_method : HttpMethod , optional_input : dict [str , Any ]
29
32
) -> None :
30
33
"""Test proper request list generation from both minimal and full inputs for all method types for simple input."""
31
- minimal_request_dict_input = {ActorInputKeys .startUrls .url : 'https://www.abc.com' ,
32
- ActorInputKeys .startUrls .method : request_method }
34
+ minimal_request_dict_input = {
35
+ ActorInputKeys .startUrls .url : 'https://www.abc.com' ,
36
+ ActorInputKeys .startUrls .method : request_method ,
37
+ }
33
38
request_dict_input = {** minimal_request_dict_input , ** optional_input }
34
39
example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls : [request_dict_input ]}
35
40
@@ -75,17 +80,25 @@ def read(self) -> bytes:
75
80
76
81
async def test_actor_create_request_list_from_url_correctly_send_requests () -> None :
77
82
"""Test that injected HttpClient's method send_request is called with properly passed arguments."""
78
- example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls : [
79
- {ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' , ActorInputKeys .startUrls .method : 'GET' },
80
- {ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' , ActorInputKeys .startUrls .method : 'PUT' },
81
- {
82
- ActorInputKeys .startUrls .requestsFromUrl : 'https://www.something.som' ,
83
- ActorInputKeys .startUrls .method : 'POST' ,
84
- ActorInputKeys .startUrls .headers : {'key' : 'value' },
85
- ActorInputKeys .startUrls .payload : 'some_payload' ,
86
- ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
87
- },
88
- ]}
83
+ example_actor_input : dict [str , Any ] = {
84
+ ActorInputKeys .startUrls : [
85
+ {
86
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
87
+ ActorInputKeys .startUrls .method : 'GET' ,
88
+ },
89
+ {
90
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
91
+ ActorInputKeys .startUrls .method : 'PUT' ,
92
+ },
93
+ {
94
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.something.som' ,
95
+ ActorInputKeys .startUrls .method : 'POST' ,
96
+ ActorInputKeys .startUrls .headers : {'key' : 'value' },
97
+ ActorInputKeys .startUrls .payload : 'some_payload' ,
98
+ ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
99
+ },
100
+ ]
101
+ }
89
102
90
103
mocked_read_outputs = ('' for url in example_actor_input [ActorInputKeys .startUrls ])
91
104
http_client = HttpxHttpClient ()
@@ -117,11 +130,19 @@ async def test_actor_create_request_list_from_url() -> None:
117
130
)
118
131
)
119
132
120
- example_actor_input :dict [str , Any ] = {ActorInputKeys .startUrls :[
121
- {ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' , ActorInputKeys .startUrls .method : 'GET' },
122
- {ActorInputKeys .startUrls .url : expected_simple_url , ActorInputKeys .startUrls .method : 'GET' },
123
- {ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' , ActorInputKeys .startUrls .method : 'GET' },
124
- ]}
133
+ example_actor_input : dict [str , Any ] = {
134
+ ActorInputKeys .startUrls : [
135
+ {
136
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://abc.dev/file.txt' ,
137
+ ActorInputKeys .startUrls .method : 'GET' ,
138
+ },
139
+ {ActorInputKeys .startUrls .url : expected_simple_url , ActorInputKeys .startUrls .method : 'GET' },
140
+ {
141
+ ActorInputKeys .startUrls .requestsFromUrl : 'https://www.abc.dev/file2' ,
142
+ ActorInputKeys .startUrls .method : 'GET' ,
143
+ },
144
+ ]
145
+ }
125
146
126
147
http_client = HttpxHttpClient ()
127
148
with mock .patch .object (http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
@@ -133,16 +154,18 @@ async def test_actor_create_request_list_from_url() -> None:
133
154
# Check correctly created requests' urls in request list
134
155
assert {generated_request .url for generated_request in generated_requests } == expected_urls
135
156
136
- async def test_actor_create_request_list_from_url_additional_inputs () -> None :
157
+
158
+ async def test_actor_create_request_list_from_url_additional_inputs () -> None :
137
159
"""Test that all generated request properties are correctly populated from input values."""
138
160
expected_simple_url = 'https://www.someurl.com'
139
161
example_start_url_input = {
140
162
ActorInputKeys .startUrls .requestsFromUrl : 'https://crawlee.dev/file.txt' ,
141
163
ActorInputKeys .startUrls .method : 'POST' ,
142
164
ActorInputKeys .startUrls .headers : {'key' : 'value' },
143
165
ActorInputKeys .startUrls .payload : 'some_payload' ,
144
- ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' }}
145
- example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls :[example_start_url_input ]}
166
+ ActorInputKeys .startUrls .userData : {'another_key' : 'another_value' },
167
+ }
168
+ example_actor_input : dict [str , Any ] = {ActorInputKeys .startUrls : [example_start_url_input ]}
146
169
response_bodies = iter ((expected_simple_url ,))
147
170
http_client = HttpxHttpClient ()
148
171
with mock .patch .object (http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
@@ -162,43 +185,50 @@ async def test_actor_create_request_list_from_url_additional_inputs() -> None:
162
185
assert request .user_data == expected_user_data
163
186
164
187
165
- @pytest .mark .parametrize ('true_positive' , [
166
- 'http://www.something.com' ,
167
- 'https://www.something.net' ,
168
- 'http://nowww.cz' ,
169
- 'https://with-hypen.com' ,
170
- 'http://number1.com' ,
171
- 'http://www.number.123.abc' ,
172
- 'http://many.dots.com' ,
173
- 'http://a.com' ,
174
- 'http://www.something.com/somethignelse'
175
- 'http://www.something.com/somethignelse.txt' ,
176
- 'http://non-english-chars-áíéåü.com' ,
177
- 'http://www.port.com:1234' ,
178
- 'http://username:[email protected] '
179
- ])
188
+ @pytest .mark .parametrize (
189
+ 'true_positive' ,
190
+ [
191
+ 'http://www.something.com' ,
192
+ 'https://www.something.net' ,
193
+ 'http://nowww.cz' ,
194
+ 'https://with-hypen.com' ,
195
+ 'http://number1.com' ,
196
+ 'http://www.number.123.abc' ,
197
+ 'http://many.dots.com' ,
198
+ 'http://a.com' ,
199
+ 'http://www.something.com/somethignelse' 'http://www.something.com/somethignelse.txt' ,
200
+ 'http://non-english-chars-áíéåü.com' ,
201
+ 'http://www.port.com:1234' ,
202
+ 'http://username:[email protected] ' ,
203
+ ],
204
+ )
180
205
def test_url_no_commas_regex_true_positives (true_positive : str ) -> None :
181
- example_string = f'Some text { true_positive } some more text'
206
+ example_string = f'Some text { true_positive } some more text'
182
207
matches = list (re .finditer (URL_NO_COMMAS_REGEX , example_string ))
183
208
assert len (matches ) == 1
184
209
assert matches [0 ].group (0 ) == true_positive
185
210
186
- @pytest .mark .parametrize ('false_positive' ,[
187
- 'http://www.a' ,
188
- 'http://a' ,
189
- 'http://a.a' ,
190
- 'http://123.456' ,
191
- 'www.something.com' ,
192
- 'http:www.something.com' ,
193
- ])
211
+
212
+ @pytest .mark .parametrize (
213
+ 'false_positive' ,
214
+ [
215
+ 'http://www.a' ,
216
+ 'http://a' ,
217
+ 'http://a.a' ,
218
+ 'http://123.456' ,
219
+ 'www.something.com' ,
220
+ 'http:www.something.com' ,
221
+ ],
222
+ )
194
223
def test_url_no_commas_regex_false_positives (false_positive : str ) -> None :
195
- example_string = f'Some text { false_positive } some more text'
224
+ example_string = f'Some text { false_positive } some more text'
196
225
matches = list (re .findall (URL_NO_COMMAS_REGEX , example_string ))
197
226
assert len (matches ) == 0
198
227
228
+
199
229
def test_url_no_commas_regex_multi_line () -> None :
200
230
true_positives = ('http://www.something.com' , 'http://www.else.com' )
201
- example_string = 'Some text {} some more text \n Some new line text {} ...' .format (* true_positives )
231
+ example_string = 'Some text {} some more text \n Some new line text {} ...' .format (* true_positives )
202
232
matches = list (re .finditer (URL_NO_COMMAS_REGEX , example_string ))
203
233
assert len (matches ) == 2
204
234
assert {match .group (0 ) for match in matches } == set (true_positives )
0 commit comments