22
33import re
44from dataclasses import dataclass
5- from typing import Any , get_args
5+ from typing import TYPE_CHECKING , Any , get_args
6+ from unittest .mock import Mock
67
78import pytest
8- import respx
9- from httpx import Response
9+ from yarl import URL
1010
1111from crawlee ._request import UserData
1212from crawlee ._types import HttpMethod
1313
1414from apify .request_loaders import ApifyRequestList
1515from apify .request_loaders ._apify_request_list import URL_NO_COMMAS_REGEX
1616
17+ if TYPE_CHECKING :
18+ from pytest_httpserver import HTTPServer
19+ from werkzeug import Request , Response
20+
1721
1822@pytest .mark .parametrize (
1923 argnames = 'request_method' ,
@@ -68,37 +72,48 @@ async def test_request_list_open_request_types(
6872 assert request .headers .root == optional_input .get ('headers' , {})
6973
7074
71- @respx .mock
72- async def test_request_list_open_from_url_correctly_send_requests () -> None :
75+ async def test_request_list_open_from_url_correctly_send_requests (httpserver : HTTPServer ) -> None :
7376 """Test that requests are sent to expected urls."""
7477 request_list_sources_input : list [dict [str , Any ]] = [
7578 {
76- 'requestsFromUrl' : 'https://abc.dev/ file.txt' ,
79+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
7780 'method' : 'GET' ,
7881 },
7982 {
80- 'requestsFromUrl' : 'https://www.abc.dev/ file2' ,
83+ 'requestsFromUrl' : httpserver . url_for ( '/ file2') ,
8184 'method' : 'PUT' ,
8285 },
8386 {
84- 'requestsFromUrl' : 'https://www. something.som' ,
87+ 'requestsFromUrl' : httpserver . url_for ( '/ something' ) ,
8588 'method' : 'POST' ,
8689 'headers' : {'key' : 'value' },
8790 'payload' : 'some_payload' ,
8891 'userData' : {'another_key' : 'another_value' },
8992 },
9093 ]
9194
92- routes = [respx .get (entry ['requestsFromUrl' ]) for entry in request_list_sources_input ]
95+ routes : dict [str , Mock ] = {}
96+
97+ def request_handler (request : Request , response : Response ) -> Response :
98+ routes [request .url ]()
99+ return response
100+
101+ for entry in request_list_sources_input :
102+ path = str (URL (entry ['requestsFromUrl' ]).path )
103+ httpserver .expect_oneshot_request (path ).with_post_hook (request_handler ).respond_with_data (status = 200 )
104+ routes [entry ['requestsFromUrl' ]] = Mock ()
93105
94106 await ApifyRequestList .open (request_list_sources_input = request_list_sources_input )
95107
96- for route in routes :
97- assert route .called
108+ assert len (routes ) == len (request_list_sources_input )
98109
110+ for entity in request_list_sources_input :
111+ entity_url = entity ['requestsFromUrl' ]
112+ assert entity_url in routes
113+ assert routes [entity_url ].called
99114
100- @ respx . mock
101- async def test_request_list_open_from_url () -> None :
115+
116+ async def test_request_list_open_from_url (httpserver : HTTPServer ) -> None :
102117 """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
103118 expected_simple_url = 'https://www.someurl.com'
104119 expected_remote_urls_1 = {'http://www.something.com' , 'https://www.somethingelse.com' , 'http://www.bla.net' }
@@ -112,11 +127,11 @@ class MockedUrlInfo:
112127
113128 mocked_urls = (
114129 MockedUrlInfo (
115- 'https://abc.dev/ file.txt' ,
130+ httpserver . url_for ( '/ file.txt') ,
116131 'blablabla{} more blablabla{} , even more blablabla. {} ' .format (* expected_remote_urls_1 ),
117132 ),
118133 MockedUrlInfo (
119- 'https://www.abc.dev/ file2' ,
134+ httpserver . url_for ( '/ file2') ,
120135 'some stuff{} more stuff{} www.false_positive.com' .format (* expected_remote_urls_2 ),
121136 ),
122137 )
@@ -133,7 +148,8 @@ class MockedUrlInfo:
133148 },
134149 ]
135150 for mocked_url in mocked_urls :
136- respx .get (mocked_url .url ).mock (return_value = Response (200 , text = mocked_url .response_text ))
151+ path = str (URL (mocked_url .url ).path )
152+ httpserver .expect_oneshot_request (path ).respond_with_data (status = 200 , response_data = mocked_url .response_text )
137153
138154 request_list = await ApifyRequestList .open (request_list_sources_input = request_list_sources_input )
139155 generated_requests = []
@@ -144,23 +160,20 @@ class MockedUrlInfo:
144160 assert {generated_request .url for generated_request in generated_requests } == expected_urls
145161
146162
147- @respx .mock
148- async def test_request_list_open_from_url_additional_inputs () -> None :
163+ async def test_request_list_open_from_url_additional_inputs (httpserver : HTTPServer ) -> None :
149164 """Test that all generated request properties are correctly populated from input values."""
150165 expected_url = 'https://www.someurl.com'
151166 example_start_url_input : dict [str , Any ] = {
152- 'requestsFromUrl' : 'https://crawlee.dev/ file.txt' ,
167+ 'requestsFromUrl' : httpserver . url_for ( '/ file.txt') ,
153168 'method' : 'POST' ,
154169 'headers' : {'key' : 'value' },
155170 'payload' : 'some_payload' ,
156171 'userData' : {'another_key' : 'another_value' },
157172 }
158-
159- respx .get (example_start_url_input ['requestsFromUrl' ]).mock (return_value = Response (200 , text = expected_url ))
173+ httpserver .expect_oneshot_request ('/file.txt' ).respond_with_data (status = 200 , response_data = expected_url )
160174
161175 request_list = await ApifyRequestList .open (request_list_sources_input = [example_start_url_input ])
162176 request = await request_list .fetch_next_request ()
163-
164177 # Check all properties correctly created for request
165178 assert request
166179 assert request .url == expected_url
0 commit comments