1
1
from __future__ import annotations
2
2
3
3
import re
4
- from typing import Any , Iterator , get_args
5
- from unittest import mock
6
- from unittest .mock import call
4
+ from dataclasses import dataclass
5
+ from typing import Any , get_args
7
6
8
7
import pytest
8
+ import respx
9
+ from httpx import Response
9
10
10
11
from crawlee ._request import UserData
11
- from crawlee ._types import HttpHeaders , HttpMethod
12
- from crawlee .http_clients import HttpResponse , HttpxHttpClient
12
+ from crawlee ._types import HttpMethod
13
13
14
14
from apify .storages ._request_list import URL_NO_COMMAS_REGEX , RequestList
15
15
@@ -52,30 +52,9 @@ async def test_request_list_open_request_types(request_method: HttpMethod, optio
52
52
assert request .headers .root == optional_input .get ('headers' , {})
53
53
54
54
55
- def _create_dummy_response (read_output : Iterator [str ]) -> HttpResponse :
56
- """Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
57
-
58
- class DummyResponse (HttpResponse ):
59
- @property
60
- def http_version (self ) -> str :
61
- return ''
62
-
63
- @property
64
- def status_code (self ) -> int :
65
- return 200
66
-
67
- @property
68
- def headers (self ) -> HttpHeaders :
69
- return HttpHeaders ()
70
-
71
- def read (self ) -> bytes :
72
- return next (read_output ).encode ('utf-8' )
73
-
74
- return DummyResponse ()
75
-
76
-
77
- async def test__request_list_open_from_url_correctly_send_requests () -> None :
78
- """Test that injected HttpClient's method send_request is called with properly passed arguments."""
55
+ @respx .mock
56
+ async def test_request_list_open_from_url_correctly_send_requests () -> None :
57
+ """Test that requests are sent to expected urls."""
79
58
request_list_sources_input : list [dict [str , Any ]] = [
80
59
{
81
60
'requestsFromUrl' : 'https://abc.dev/file.txt' ,
@@ -94,65 +73,65 @@ async def test__request_list_open_from_url_correctly_send_requests() -> None:
94
73
},
95
74
]
96
75
97
- mocked_read_outputs = ( '' for url in request_list_sources_input )
76
+ routes = [ respx . get ( entry [ 'requestsFromUrl' ]) for entry in request_list_sources_input ]
98
77
99
- mocked_http_client = mock .Mock (spec_set = HttpxHttpClient )
100
- with mock .patch .object (
101
- mocked_http_client , 'send_request' , return_value = _create_dummy_response (mocked_read_outputs )
102
- ) as mocked_send_request :
103
- await RequestList .open (request_list_sources_input = request_list_sources_input , http_client = mocked_http_client )
78
+ await RequestList .open (request_list_sources_input = request_list_sources_input )
104
79
105
- expected_calls = [
106
- call (
107
- method = 'GET' ,
108
- url = example_input ['requestsFromUrl' ],
109
- )
110
- for example_input in request_list_sources_input
111
- ]
112
- mocked_send_request .assert_has_calls (expected_calls )
80
+ for route in routes :
81
+ assert route .called
113
82
114
83
84
+ @respx .mock
115
85
async def test_request_list_open_from_url () -> None :
116
86
"""Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
117
87
expected_simple_url = 'https://www.someurl.com'
118
88
expected_remote_urls_1 = {'http://www.something.com' , 'https://www.somethingelse.com' , 'http://www.bla.net' }
119
89
expected_remote_urls_2 = {'http://www.ok.com' , 'https://www.true-positive.com' }
120
90
expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url }
121
- response_bodies = iter (
122
- (
91
+
92
+ @dataclass
93
+ class MockedUrlInfo :
94
+ url : str
95
+ response_text : str
96
+
97
+ mocked_urls = (
98
+ MockedUrlInfo (
99
+ 'https://abc.dev/file.txt' ,
123
100
'blablabla{} more blablabla{} , even more blablabla. {} ' .format (* expected_remote_urls_1 ),
124
- 'some stuff{} more stuff{} www.falsepositive www.false_positive.com' .format (* expected_remote_urls_2 ),
125
- )
101
+ ),
102
+ MockedUrlInfo (
103
+ 'https://www.abc.dev/file2' ,
104
+ 'some stuff{} more stuff{} www.false_positive.com' .format (* expected_remote_urls_2 ),
105
+ ),
126
106
)
127
107
128
108
request_list_sources_input = [
129
109
{
130
- 'requestsFromUrl' : 'https://abc.dev/file.txt' ,
110
+ 'requestsFromUrl' : mocked_urls [ 0 ]. url ,
131
111
'method' : 'GET' ,
132
112
},
133
113
{'url' : expected_simple_url , 'method' : 'GET' },
134
114
{
135
- 'requestsFromUrl' : 'https://www.abc.dev/file2' ,
115
+ 'requestsFromUrl' : mocked_urls [ 1 ]. url ,
136
116
'method' : 'GET' ,
137
117
},
138
118
]
119
+ for mocked_url in mocked_urls :
120
+ respx .get (mocked_url .url ).mock (return_value = Response (200 , text = mocked_url .response_text ))
139
121
140
- mocked_http_client = mock .Mock (spec_set = HttpxHttpClient )
141
- with mock .patch .object (mocked_http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
142
- request_list = await RequestList .open (
143
- request_list_sources_input = request_list_sources_input , http_client = mocked_http_client
144
- )
145
- generated_requests = []
146
- while request := await request_list .fetch_next_request ():
147
- generated_requests .append (request )
122
+ request_list = await RequestList .open (request_list_sources_input = request_list_sources_input )
123
+ generated_requests = []
124
+ while request := await request_list .fetch_next_request ():
125
+ generated_requests .append (request )
148
126
149
127
# Check correctly created requests' urls in request list
150
128
assert {generated_request .url for generated_request in generated_requests } == expected_urls
151
129
152
130
131
+ @respx .mock
153
132
async def test_request_list_open_from_url_additional_inputs () -> None :
154
133
"""Test that all generated request properties are correctly populated from input values."""
155
- expected_simple_url = 'https://www.someurl.com'
134
+ expected_url = 'https://www.someurl.com'
156
135
example_start_url_input : dict [str , Any ] = {
157
136
'requestsFromUrl' : 'https://crawlee.dev/file.txt' ,
158
137
'method' : 'POST' ,
@@ -161,17 +140,14 @@ async def test_request_list_open_from_url_additional_inputs() -> None:
161
140
'userData' : {'another_key' : 'another_value' },
162
141
}
163
142
164
- response_bodies = iter ((expected_simple_url ,))
165
- mocked_http_client = mock .Mock (spec_set = HttpxHttpClient )
166
- with mock .patch .object (mocked_http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
167
- request_list = await RequestList .open (
168
- request_list_sources_input = [example_start_url_input ], http_client = mocked_http_client
169
- )
170
- request = await request_list .fetch_next_request ()
143
+ respx .get (example_start_url_input ['requestsFromUrl' ]).mock (return_value = Response (200 , text = expected_url ))
144
+
145
+ request_list = await RequestList .open (request_list_sources_input = [example_start_url_input ])
146
+ request = await request_list .fetch_next_request ()
171
147
172
148
# Check all properties correctly created for request
173
149
assert request
174
- assert request .url == expected_simple_url
150
+ assert request .url == expected_url
175
151
assert request .method == example_start_url_input ['method' ]
176
152
assert request .headers .root == example_start_url_input ['headers' ]
177
153
assert request .payload == str (example_start_url_input ['payload' ]).encode ('utf-8' )
0 commit comments