2
2
3
3
import re
4
4
from dataclasses import dataclass
5
- from typing import Any , get_args
5
+ from typing import TYPE_CHECKING , Any , get_args
6
+ from unittest .mock import Mock
6
7
7
8
import pytest
8
- import respx
9
- from httpx import Response
9
+ from yarl import URL
10
10
11
11
from crawlee ._request import UserData
12
12
from crawlee ._types import HttpMethod
13
13
14
14
from apify .request_loaders import ApifyRequestList
15
15
from apify .request_loaders ._apify_request_list import URL_NO_COMMAS_REGEX
16
16
17
+ if TYPE_CHECKING :
18
+ from pytest_httpserver import HTTPServer
19
+ from werkzeug import Request , Response
20
+
17
21
18
22
@pytest .mark .parametrize (
19
23
argnames = 'request_method' ,
@@ -68,37 +72,48 @@ async def test_request_list_open_request_types(
68
72
assert request .headers .root == optional_input .get ('headers' , {})
69
73
70
74
71
- @respx .mock
72
- async def test_request_list_open_from_url_correctly_send_requests () -> None :
75
+ async def test_request_list_open_from_url_correctly_send_requests (httpserver : HTTPServer ) -> None :
73
76
"""Test that requests are sent to expected urls."""
74
77
request_list_sources_input : list [dict [str , Any ]] = [
75
78
{
76
- 'requestsFromUrl' : 'https://abc.dev/ file.txt' ,
79
+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
77
80
'method' : 'GET' ,
78
81
},
79
82
{
80
- 'requestsFromUrl' : 'https://www.abc.dev/ file2' ,
83
+ 'requestsFromUrl' : httpserver . url_for ( '/ file2') ,
81
84
'method' : 'PUT' ,
82
85
},
83
86
{
84
- 'requestsFromUrl' : 'https://www. something.som' ,
87
+ 'requestsFromUrl' : httpserver . url_for ( '/ something' ) ,
85
88
'method' : 'POST' ,
86
89
'headers' : {'key' : 'value' },
87
90
'payload' : 'some_payload' ,
88
91
'userData' : {'another_key' : 'another_value' },
89
92
},
90
93
]
91
94
92
- routes = [respx .get (entry ['requestsFromUrl' ]) for entry in request_list_sources_input ]
95
+ routes : dict [str , Mock ] = {}
96
+
97
+ def request_handler (request : Request , response : Response ) -> Response :
98
+ routes [request .url ]()
99
+ return response
100
+
101
+ for entry in request_list_sources_input :
102
+ path = str (URL (entry ['requestsFromUrl' ]).path )
103
+ httpserver .expect_oneshot_request (path ).with_post_hook (request_handler ).respond_with_data (status = 200 )
104
+ routes [entry ['requestsFromUrl' ]] = Mock ()
93
105
94
106
await ApifyRequestList .open (request_list_sources_input = request_list_sources_input )
95
107
96
- for route in routes :
97
- assert route .called
108
+ assert len (routes ) == len (request_list_sources_input )
98
109
110
+ for entity in request_list_sources_input :
111
+ entity_url = entity ['requestsFromUrl' ]
112
+ assert entity_url in routes
113
+ assert routes [entity_url ].called
99
114
100
- @ respx . mock
101
- async def test_request_list_open_from_url () -> None :
115
+
116
+ async def test_request_list_open_from_url (httpserver : HTTPServer ) -> None :
102
117
"""Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
103
118
expected_simple_url = 'https://www.someurl.com'
104
119
expected_remote_urls_1 = {'http://www.something.com' , 'https://www.somethingelse.com' , 'http://www.bla.net' }
@@ -112,11 +127,11 @@ class MockedUrlInfo:
112
127
113
128
mocked_urls = (
114
129
MockedUrlInfo (
115
- 'https://abc.dev/ file.txt' ,
130
+ httpserver . url_for ( '/ file.txt') ,
116
131
'blablabla{} more blablabla{} , even more blablabla. {} ' .format (* expected_remote_urls_1 ),
117
132
),
118
133
MockedUrlInfo (
119
- 'https://www.abc.dev/ file2' ,
134
+ httpserver . url_for ( '/ file2') ,
120
135
'some stuff{} more stuff{} www.false_positive.com' .format (* expected_remote_urls_2 ),
121
136
),
122
137
)
@@ -133,7 +148,8 @@ class MockedUrlInfo:
133
148
},
134
149
]
135
150
for mocked_url in mocked_urls :
136
- respx .get (mocked_url .url ).mock (return_value = Response (200 , text = mocked_url .response_text ))
151
+ path = str (URL (mocked_url .url ).path )
152
+ httpserver .expect_oneshot_request (path ).respond_with_data (status = 200 , response_data = mocked_url .response_text )
137
153
138
154
request_list = await ApifyRequestList .open (request_list_sources_input = request_list_sources_input )
139
155
generated_requests = []
@@ -144,23 +160,20 @@ class MockedUrlInfo:
144
160
assert {generated_request .url for generated_request in generated_requests } == expected_urls
145
161
146
162
147
- @respx .mock
148
- async def test_request_list_open_from_url_additional_inputs () -> None :
163
+ async def test_request_list_open_from_url_additional_inputs (httpserver : HTTPServer ) -> None :
149
164
"""Test that all generated request properties are correctly populated from input values."""
150
165
expected_url = 'https://www.someurl.com'
151
166
example_start_url_input : dict [str , Any ] = {
152
- 'requestsFromUrl' : 'https://crawlee.dev/ file.txt' ,
167
+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
153
168
'method' : 'POST' ,
154
169
'headers' : {'key' : 'value' },
155
170
'payload' : 'some_payload' ,
156
171
'userData' : {'another_key' : 'another_value' },
157
172
}
158
-
159
- respx .get (example_start_url_input ['requestsFromUrl' ]).mock (return_value = Response (200 , text = expected_url ))
173
+ httpserver .expect_oneshot_request ('/file.txt' ).respond_with_data (status = 200 , response_data = expected_url )
160
174
161
175
request_list = await ApifyRequestList .open (request_list_sources_input = [example_start_url_input ])
162
176
request = await request_list .fetch_next_request ()
163
-
164
177
# Check all properties correctly created for request
165
178
assert request
166
179
assert request .url == expected_url
0 commit comments