2
2
3
3
import re
4
4
from dataclasses import dataclass
5
- from typing import Any , get_args
5
+ from typing import TYPE_CHECKING , Any , get_args
6
+ from unittest .mock import Mock
6
7
7
8
import pytest
8
- import respx
9
- from httpx import Response
9
+ from yarl import URL
10
10
11
11
from crawlee ._request import UserData
12
12
from crawlee ._types import HttpMethod
13
13
14
14
from apify .storages ._request_list import URL_NO_COMMAS_REGEX , RequestList
15
15
16
+ if TYPE_CHECKING :
17
+ from pytest_httpserver import HTTPServer
18
+ from werkzeug import Request , Response
19
+
16
20
17
21
@pytest .mark .parametrize (
18
22
argnames = 'request_method' ,
@@ -67,37 +71,48 @@ async def test_request_list_open_request_types(
67
71
assert request .headers .root == optional_input .get ('headers' , {})
68
72
69
73
70
- @respx .mock
71
- async def test_request_list_open_from_url_correctly_send_requests () -> None :
74
+ async def test_request_list_open_from_url_correctly_send_requests (httpserver : HTTPServer ) -> None :
72
75
"""Test that requests are sent to expected urls."""
73
76
request_list_sources_input : list [dict [str , Any ]] = [
74
77
{
75
- 'requestsFromUrl' : 'https://abc.dev/ file.txt' ,
78
+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
76
79
'method' : 'GET' ,
77
80
},
78
81
{
79
- 'requestsFromUrl' : 'https://www.abc.dev/ file2' ,
82
+ 'requestsFromUrl' : httpserver . url_for ( '/ file2') ,
80
83
'method' : 'PUT' ,
81
84
},
82
85
{
83
- 'requestsFromUrl' : 'https://www. something.som' ,
86
+ 'requestsFromUrl' : httpserver . url_for ( '/ something' ) ,
84
87
'method' : 'POST' ,
85
88
'headers' : {'key' : 'value' },
86
89
'payload' : 'some_payload' ,
87
90
'userData' : {'another_key' : 'another_value' },
88
91
},
89
92
]
90
93
91
- routes = [respx .get (entry ['requestsFromUrl' ]) for entry in request_list_sources_input ]
94
+ routes : dict [str , Mock ] = {}
95
+
96
+ def request_handler (request : Request , response : Response ) -> Response :
97
+ routes [request .url ]()
98
+ return response
99
+
100
+ for entry in request_list_sources_input :
101
+ path = str (URL (entry ['requestsFromUrl' ]).path )
102
+ httpserver .expect_oneshot_request (path ).with_post_hook (request_handler ).respond_with_data (status = 200 )
103
+ routes [entry ['requestsFromUrl' ]] = Mock ()
92
104
93
105
await RequestList .open (request_list_sources_input = request_list_sources_input )
94
106
95
- for route in routes :
96
- assert route .called
107
+ assert len (routes ) == len (request_list_sources_input )
97
108
109
+ for entity in request_list_sources_input :
110
+ entity_url = entity ['requestsFromUrl' ]
111
+ assert entity_url in routes
112
+ assert routes [entity_url ].called
98
113
99
- @ respx . mock
100
- async def test_request_list_open_from_url () -> None :
114
+
115
+ async def test_request_list_open_from_url (httpserver : HTTPServer ) -> None :
101
116
"""Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
102
117
expected_simple_url = 'https://www.someurl.com'
103
118
expected_remote_urls_1 = {'http://www.something.com' , 'https://www.somethingelse.com' , 'http://www.bla.net' }
@@ -111,11 +126,11 @@ class MockedUrlInfo:
111
126
112
127
mocked_urls = (
113
128
MockedUrlInfo (
114
- 'https://abc.dev/ file.txt' ,
129
+ httpserver . url_for ( '/ file.txt') ,
115
130
'blablabla{} more blablabla{} , even more blablabla. {} ' .format (* expected_remote_urls_1 ),
116
131
),
117
132
MockedUrlInfo (
118
- 'https://www.abc.dev/ file2' ,
133
+ httpserver . url_for ( '/ file2') ,
119
134
'some stuff{} more stuff{} www.false_positive.com' .format (* expected_remote_urls_2 ),
120
135
),
121
136
)
@@ -132,7 +147,8 @@ class MockedUrlInfo:
132
147
},
133
148
]
134
149
for mocked_url in mocked_urls :
135
- respx .get (mocked_url .url ).mock (return_value = Response (200 , text = mocked_url .response_text ))
150
+ path = str (URL (mocked_url .url ).path )
151
+ httpserver .expect_oneshot_request (path ).respond_with_data (status = 200 , response_data = mocked_url .response_text )
136
152
137
153
request_list = await RequestList .open (request_list_sources_input = request_list_sources_input )
138
154
generated_requests = []
@@ -143,23 +159,20 @@ class MockedUrlInfo:
143
159
assert {generated_request .url for generated_request in generated_requests } == expected_urls
144
160
145
161
146
- @respx .mock
147
- async def test_request_list_open_from_url_additional_inputs () -> None :
162
+ async def test_request_list_open_from_url_additional_inputs (httpserver : HTTPServer ) -> None :
148
163
"""Test that all generated request properties are correctly populated from input values."""
149
164
expected_url = 'https://www.someurl.com'
150
165
example_start_url_input : dict [str , Any ] = {
151
- 'requestsFromUrl' : 'https://crawlee.dev/ file.txt' ,
166
+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
152
167
'method' : 'POST' ,
153
168
'headers' : {'key' : 'value' },
154
169
'payload' : 'some_payload' ,
155
170
'userData' : {'another_key' : 'another_value' },
156
171
}
157
-
158
- respx .get (example_start_url_input ['requestsFromUrl' ]).mock (return_value = Response (200 , text = expected_url ))
172
+ httpserver .expect_oneshot_request ('/file.txt' ).respond_with_data (status = 200 , response_data = expected_url )
159
173
160
174
request_list = await RequestList .open (request_list_sources_input = [example_start_url_input ])
161
175
request = await request_list .fetch_next_request ()
162
-
163
176
# Check all properties correctly created for request
164
177
assert request
165
178
assert request .url == expected_url
0 commit comments