11from __future__ import annotations
22
33import re
4- from typing import Any , Iterator , get_args
5- from unittest import mock
6- from unittest .mock import call
4+ from dataclasses import dataclass
5+ from typing import Any , get_args
76
87import pytest
8+ import respx
9+ from httpx import Response
910
1011from crawlee ._request import UserData
11- from crawlee ._types import HttpHeaders , HttpMethod
12- from crawlee .http_clients import HttpResponse , HttpxHttpClient
12+ from crawlee ._types import HttpMethod
1313
1414from apify .storages ._request_list import URL_NO_COMMAS_REGEX , RequestList
1515
@@ -52,30 +52,9 @@ async def test_request_list_open_request_types(request_method: HttpMethod, optio
5252 assert request .headers .root == optional_input .get ('headers' , {})
5353
5454
55- def _create_dummy_response (read_output : Iterator [str ]) -> HttpResponse :
56- """Create dummy_response that will iterate through read_output when called like dummy_response.read()"""
57-
58- class DummyResponse (HttpResponse ):
59- @property
60- def http_version (self ) -> str :
61- return ''
62-
63- @property
64- def status_code (self ) -> int :
65- return 200
66-
67- @property
68- def headers (self ) -> HttpHeaders :
69- return HttpHeaders ()
70-
71- def read (self ) -> bytes :
72- return next (read_output ).encode ('utf-8' )
73-
74- return DummyResponse ()
75-
76-
77- async def test__request_list_open_from_url_correctly_send_requests () -> None :
78- """Test that injected HttpClient's method send_request is called with properly passed arguments."""
55+ @respx .mock
56+ async def test_request_list_open_from_url_correctly_send_requests () -> None :
57+ """Test that requests are sent to expected urls."""
7958 request_list_sources_input : list [dict [str , Any ]] = [
8059 {
8160 'requestsFromUrl' : 'https://abc.dev/file.txt' ,
@@ -94,65 +73,65 @@ async def test__request_list_open_from_url_correctly_send_requests() -> None:
9473 },
9574 ]
9675
97- mocked_read_outputs = ( '' for url in request_list_sources_input )
76+ routes = [ respx . get ( entry [ 'requestsFromUrl' ]) for entry in request_list_sources_input ]
9877
99- mocked_http_client = mock .Mock (spec_set = HttpxHttpClient )
100- with mock .patch .object (
101- mocked_http_client , 'send_request' , return_value = _create_dummy_response (mocked_read_outputs )
102- ) as mocked_send_request :
103- await RequestList .open (request_list_sources_input = request_list_sources_input , http_client = mocked_http_client )
78+ await RequestList .open (request_list_sources_input = request_list_sources_input )
10479
105- expected_calls = [
106- call (
107- method = 'GET' ,
108- url = example_input ['requestsFromUrl' ],
109- )
110- for example_input in request_list_sources_input
111- ]
112- mocked_send_request .assert_has_calls (expected_calls )
80+ for route in routes :
81+ assert route .called
11382
11483
84+ @respx .mock
11585async def test_request_list_open_from_url () -> None :
11686 """Test that create_request_list is correctly reading urls from remote url sources and also from simple input."""
11787 expected_simple_url = 'https://www.someurl.com'
11888 expected_remote_urls_1 = {'http://www.something.com' , 'https://www.somethingelse.com' , 'http://www.bla.net' }
11989 expected_remote_urls_2 = {'http://www.ok.com' , 'https://www.true-positive.com' }
12090 expected_urls = expected_remote_urls_1 | expected_remote_urls_2 | {expected_simple_url }
121- response_bodies = iter (
122- (
91+
92+ @dataclass
93+ class MockedUrlInfo :
94+ url : str
95+ response_text : str
96+
97+ mocked_urls = (
98+ MockedUrlInfo (
99+ 'https://abc.dev/file.txt' ,
123100 'blablabla{} more blablabla{} , even more blablabla. {} ' .format (* expected_remote_urls_1 ),
124- 'some stuff{} more stuff{} www.falsepositive www.false_positive.com' .format (* expected_remote_urls_2 ),
125- )
101+ ),
102+ MockedUrlInfo (
103+ 'https://www.abc.dev/file2' ,
104+ 'some stuff{} more stuff{} www.false_positive.com' .format (* expected_remote_urls_2 ),
105+ ),
126106 )
127107
128108 request_list_sources_input = [
129109 {
130- 'requestsFromUrl' : 'https://abc.dev/file.txt' ,
110+ 'requestsFromUrl' : mocked_urls [ 0 ]. url ,
131111 'method' : 'GET' ,
132112 },
133113 {'url' : expected_simple_url , 'method' : 'GET' },
134114 {
135- 'requestsFromUrl' : 'https://www.abc.dev/file2' ,
115+ 'requestsFromUrl' : mocked_urls [ 1 ]. url ,
136116 'method' : 'GET' ,
137117 },
138118 ]
119+ for mocked_url in mocked_urls :
120+ respx .get (mocked_url .url ).mock (return_value = Response (200 , text = mocked_url .response_text ))
139121
140- mocked_http_client = mock .Mock (spec_set = HttpxHttpClient )
141- with mock .patch .object (mocked_http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
142- request_list = await RequestList .open (
143- request_list_sources_input = request_list_sources_input , http_client = mocked_http_client
144- )
145- generated_requests = []
146- while request := await request_list .fetch_next_request ():
147- generated_requests .append (request )
122+ request_list = await RequestList .open (request_list_sources_input = request_list_sources_input )
123+ generated_requests = []
124+ while request := await request_list .fetch_next_request ():
125+ generated_requests .append (request )
148126
149127 # Check correctly created requests' urls in request list
150128 assert {generated_request .url for generated_request in generated_requests } == expected_urls
151129
152130
131+ @respx .mock
153132async def test_request_list_open_from_url_additional_inputs () -> None :
154133 """Test that all generated request properties are correctly populated from input values."""
155- expected_simple_url = 'https://www.someurl.com'
134+ expected_url = 'https://www.someurl.com'
156135 example_start_url_input : dict [str , Any ] = {
157136 'requestsFromUrl' : 'https://crawlee.dev/file.txt' ,
158137 'method' : 'POST' ,
@@ -161,17 +140,14 @@ async def test_request_list_open_from_url_additional_inputs() -> None:
161140 'userData' : {'another_key' : 'another_value' },
162141 }
163142
164- response_bodies = iter ((expected_simple_url ,))
165- mocked_http_client = mock .Mock (spec_set = HttpxHttpClient )
166- with mock .patch .object (mocked_http_client , 'send_request' , return_value = _create_dummy_response (response_bodies )):
167- request_list = await RequestList .open (
168- request_list_sources_input = [example_start_url_input ], http_client = mocked_http_client
169- )
170- request = await request_list .fetch_next_request ()
143+ respx .get (example_start_url_input ['requestsFromUrl' ]).mock (return_value = Response (200 , text = expected_url ))
144+
145+ request_list = await RequestList .open (request_list_sources_input = [example_start_url_input ])
146+ request = await request_list .fetch_next_request ()
171147
172148 # Check all properties correctly created for request
173149 assert request
174- assert request .url == expected_simple_url
150+ assert request .url == expected_url
175151 assert request .method == example_start_url_input ['method' ]
176152 assert request .headers .root == example_start_url_input ['headers' ]
177153 assert request .payload == str (example_start_url_input ['payload' ]).encode ('utf-8' )
0 commit comments