22
33import re
44from dataclasses import dataclass
5- from typing import Any , get_args
5+ from typing import TYPE_CHECKING , Any , get_args
6+ from unittest .mock import Mock
67
78import pytest
8- import respx
9- from httpx import Response
9+ from yarl import URL
1010
1111from crawlee ._request import UserData
1212from crawlee ._types import HttpMethod
1313
1414from apify .storages ._request_list import URL_NO_COMMAS_REGEX , RequestList
1515
16+ if TYPE_CHECKING :
17+ from pytest_httpserver import HTTPServer
18+ from werkzeug import Request , Response
19+
1620
1721@pytest .mark .parametrize (
1822 argnames = 'request_method' ,
@@ -67,37 +71,48 @@ async def test_request_list_open_request_types(
6771 assert request .headers .root == optional_input .get ('headers' , {})
6872
6973
70- @respx .mock
71- async def test_request_list_open_from_url_correctly_send_requests () -> None :
74+ async def test_request_list_open_from_url_correctly_send_requests (httpserver : HTTPServer ) -> None :
7275 """Test that requests are sent to expected urls."""
7376 request_list_sources_input : list [dict [str , Any ]] = [
7477 {
75- 'requestsFromUrl' : 'https://abc.dev/ file.txt' ,
78+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
7679 'method' : 'GET' ,
7780 },
7881 {
79- 'requestsFromUrl' : 'https://www.abc.dev/ file2' ,
82+ 'requestsFromUrl' : httpserver . url_for ( '/ file2') ,
8083 'method' : 'PUT' ,
8184 },
8285 {
83- 'requestsFromUrl' : 'https://www. something.som' ,
86+ 'requestsFromUrl' : httpserver . url_for ( '/ something' ) ,
8487 'method' : 'POST' ,
8588 'headers' : {'key' : 'value' },
8689 'payload' : 'some_payload' ,
8790 'userData' : {'another_key' : 'another_value' },
8891 },
8992 ]
9093
91- routes = [respx .get (entry ['requestsFromUrl' ]) for entry in request_list_sources_input ]
94+ routes : dict [str , Mock ] = {}
95+
96+ def request_handler (request : Request , response : Response ) -> Response :
97+ routes [request .url ]()
98+ return response
99+
100+ for entry in request_list_sources_input :
101+ path = str (URL (entry ['requestsFromUrl' ]).path )
102+ httpserver .expect_oneshot_request (path ).with_post_hook (request_handler ).respond_with_data (status = 200 )
103+ routes [entry ['requestsFromUrl' ]] = Mock ()
92104
93105 await RequestList .open (request_list_sources_input = request_list_sources_input )
94106
95- for route in routes :
96- assert route .called
107+ assert len (routes ) == len (request_list_sources_input )
97108
109+ for entity in request_list_sources_input :
110+ entity_url = entity ['requestsFromUrl' ]
111+ assert entity_url in routes
112+ assert routes [entity_url ].called
98113
99- @ respx . mock
100- async def test_request_list_open_from_url () -> None :
114+
115+ async def test_request_list_open_from_url (httpserver : HTTPServer ) -> None :
101116 """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
102117 expected_simple_url = 'https://www.someurl.com'
103118 expected_remote_urls_1 = {'http://www.something.com' , 'https://www.somethingelse.com' , 'http://www.bla.net' }
@@ -111,11 +126,11 @@ class MockedUrlInfo:
111126
112127 mocked_urls = (
113128 MockedUrlInfo (
114- 'https://abc.dev/ file.txt' ,
129+ httpserver . url_for ( '/ file.txt') ,
115130 'blablabla{} more blablabla{} , even more blablabla. {} ' .format (* expected_remote_urls_1 ),
116131 ),
117132 MockedUrlInfo (
118- 'https://www.abc.dev/ file2' ,
133+ httpserver . url_for ( '/ file2') ,
119134 'some stuff{} more stuff{} www.false_positive.com' .format (* expected_remote_urls_2 ),
120135 ),
121136 )
@@ -132,7 +147,8 @@ class MockedUrlInfo:
132147 },
133148 ]
134149 for mocked_url in mocked_urls :
135- respx .get (mocked_url .url ).mock (return_value = Response (200 , text = mocked_url .response_text ))
150+ path = str (URL (mocked_url .url ).path )
151+ httpserver .expect_oneshot_request (path ).respond_with_data (status = 200 , response_data = mocked_url .response_text )
136152
137153 request_list = await RequestList .open (request_list_sources_input = request_list_sources_input )
138154 generated_requests = []
@@ -143,23 +159,20 @@ class MockedUrlInfo:
143159 assert {generated_request .url for generated_request in generated_requests } == expected_urls
144160
145161
146- @respx .mock
147- async def test_request_list_open_from_url_additional_inputs () -> None :
162+ async def test_request_list_open_from_url_additional_inputs (httpserver : HTTPServer ) -> None :
148163 """Test that all generated request properties are correctly populated from input values."""
149164 expected_url = 'https://www.someurl.com'
150165 example_start_url_input : dict [str , Any ] = {
151- 'requestsFromUrl' : 'https://crawlee.dev/ file.txt' ,
166+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
152167 'method' : 'POST' ,
153168 'headers' : {'key' : 'value' },
154169 'payload' : 'some_payload' ,
155170 'userData' : {'another_key' : 'another_value' },
156171 }
157-
158- respx .get (example_start_url_input ['requestsFromUrl' ]).mock (return_value = Response (200 , text = expected_url ))
172+ httpserver .expect_oneshot_request ('/file.txt' ).respond_with_data (status = 200 , response_data = expected_url )
159173
160174 request_list = await RequestList .open (request_list_sources_input = [example_start_url_input ])
161175 request = await request_list .fetch_next_request ()
162-
163176 # Check all properties correctly created for request
164177 assert request
165178 assert request .url == expected_url
0 commit comments