2
2
3
3
import pytest
4
4
5
- from crawlee import service_locator
5
+ from crawlee import Request , service_locator
6
+ from crawlee ._types import BasicCrawlingContext
6
7
from crawlee .configuration import Configuration as CrawleeConfiguration
7
8
from crawlee .crawlers import BasicCrawler
8
9
from crawlee .errors import ServiceConflictError
@@ -77,14 +78,14 @@ async def test_existing_apify_config_throws_error_when_set_in_actor() -> None:
77
78
78
79
79
80
async def test_setting_config_after_actor_raises_exception () -> None :
80
- """Test that passing setting configuration in service locator after actor wa created raises an exception."""
81
+ """Test that setting configuration in service locator after actor was created raises an exception."""
81
82
async with Actor ():
82
83
with pytest .raises (ServiceConflictError ):
83
84
service_locator .set_configuration (ApifyConfiguration ())
84
85
85
86
86
87
async def test_actor_using_input_configuration () -> None :
87
- """Test that passing setting configuration in service locator after actor wa created raises an exception."""
88
+ """Test that setting configuration in service locator after actor was created raises an exception."""
88
89
apify_config = ApifyConfiguration ()
89
90
async with Actor (configuration = apify_config ):
90
91
pass
@@ -111,19 +112,37 @@ async def test_crawler_implicit_configuration() -> None:
111
112
assert Actor .config is service_locator .get_configuration () is crawler ._service_locator .get_configuration ()
112
113
113
114
114
- async def test_crawlers_own_configuration () -> None :
115
+ async def test_crawlers_own_configuration (tmp_path : Path ) -> None :
115
116
"""Test that crawlers can use own configurations without crashing."""
116
117
config_actor = ApifyConfiguration ()
117
- apify_crawler_1 = ApifyConfiguration ()
118
- apify_crawler_2 = ApifyConfiguration ()
118
+ dir_1 = tmp_path / 'dir_1'
119
+ dir_2 = tmp_path / 'dir_2'
120
+ config_crawler_1 = ApifyConfiguration ()
121
+ config_actor .storage_dir = str (dir_1 )
122
+ config_crawler_2 = ApifyConfiguration ()
123
+ config_crawler_2 .storage_dir = str (dir_2 )
119
124
120
125
async with Actor (configuration = config_actor ):
121
- crawler_1 = BasicCrawler (configuration = apify_crawler_1 )
122
- crawler_2 = BasicCrawler (configuration = apify_crawler_2 )
126
+
127
+ async def request_handler (context : BasicCrawlingContext ) -> None :
128
+ Actor .log .info (f'Processing: { context .request .url } ' )
129
+
130
+ crawler_1 = BasicCrawler (configuration = config_crawler_1 , request_handler = request_handler )
131
+ crawler_2 = BasicCrawler (configuration = config_crawler_2 , request_handler = request_handler )
132
+ await crawler_1 .add_requests ([Request .from_url (url = 'http://example.com/1' )])
133
+ await crawler_2 .add_requests (
134
+ [Request .from_url (url = 'http://example.com/2' ), Request .from_url (url = 'http://example.com/3' )]
135
+ )
136
+
137
+ await crawler_1 .run ()
138
+ await crawler_2 .run ()
123
139
124
140
assert service_locator .get_configuration () is config_actor
125
- assert crawler_1 ._service_locator .get_configuration () is apify_crawler_1
126
- assert crawler_2 ._service_locator .get_configuration () is apify_crawler_2
141
+ assert crawler_1 ._service_locator .get_configuration () is config_crawler_1
142
+ assert crawler_2 ._service_locator .get_configuration () is config_crawler_2
143
+
144
+ assert crawler_1 .statistics .state .requests_total == 1
145
+ assert crawler_2 .statistics .state .requests_total == 2
127
146
128
147
129
148
async def test_crawler_global_configuration () -> None :
@@ -163,8 +182,8 @@ async def test_storage_retrieved_is_different_with_different_config(tmp_path: Pa
163
182
164
183
165
184
async def test_storage_retrieved_is_same_with_equivalent_config () -> None :
166
- """Test that retrieving storage depends on used configuration. If two same configuration(even if they are different
167
- instances) are used it returns same storage."""
185
+ """Test that retrieving storage depends on used configuration. If two equivalent configuration(even if they are
186
+ different instances) are used it returns same storage."""
168
187
config_actor = ApifyConfiguration ()
169
188
apify_crawler = ApifyConfiguration ()
170
189
0 commit comments