@@ -78,7 +78,10 @@ def test_finds_rel_next(self):
7878 result .text = '<html><body><a href="/page2" rel="next">Next</a></body></html>'
7979 result .final_url = "https://example.com/"
8080
81- # Mock parser
81+ # Mock css_list to return href
82+ result .extractor .css_list .return_value = ["/page2" ]
83+
84+ # Mock parser for fallback
8285 anchor = MagicMock ()
8386 anchor .html = '<a href="/page2" rel="next">Next</a>'
8487 anchor .attributes = {"href" : "/page2" }
@@ -92,6 +95,10 @@ def test_finds_next_class(self):
9295 result .text = '<html><a href="/page2" class="pagination-next">Next</a></html>'
9396 result .final_url = "https://example.com/"
9497
98+ # Mock css_list to return href
99+ result .extractor .css_list .return_value = ["/page2" ]
100+
101+ # Mock parser for fallback
95102 anchor = MagicMock ()
96103 anchor .html = '<a href="/page2" class="pagination-next">Next</a>'
97104 anchor .attributes = {"href" : "/page2" }
@@ -105,6 +112,10 @@ def test_finds_next_text(self):
105112 result .text = '<html><a href="/page2">next</a></html>'
106113 result .final_url = "https://example.com/"
107114
115+ # Mock css_list to return empty for common selectors, fall back to regex
116+ result .extractor .css_list .return_value = []
117+
118+ # Mock parser for fallback
108119 anchor = MagicMock ()
109120 anchor .html = '<a href="/page2">next</a>'
110121 anchor .attributes = {"href" : "/page2" }
@@ -118,6 +129,10 @@ def test_handles_absolute_url(self):
118129 result .text = '<html><a href="https://other.com/page2" rel="next">Next</a></html>'
119130 result .final_url = "https://example.com/"
120131
132+ # Mock css_list to return absolute href
133+ result .extractor .css_list .return_value = ["https://other.com/page2" ]
134+
135+ # Mock parser for fallback
121136 anchor = MagicMock ()
122137 anchor .html = '<a href="https://other.com/page2" rel="next">Next</a>'
123138 anchor .attributes = {"href" : "https://other.com/page2" }
@@ -131,6 +146,10 @@ def test_custom_patterns(self):
131146 result .text = '<html><a href="/page2" data-action="load-more">More</a></html>'
132147 result .final_url = "https://example.com/"
133148
149+ # Mock css_list to return empty for common selectors
150+ result .extractor .css_list .return_value = []
151+
152+ # Mock parser for fallback
134153 anchor = MagicMock ()
135154 anchor .html = '<a href="/page2" data-action="load-more">More</a>'
136155 anchor .attributes = {"href" : "/page2" }
@@ -144,6 +163,10 @@ def test_no_matching_links(self):
144163 result .text = '<html><a href="/about">About</a></html>'
145164 result .final_url = "https://example.com/"
146165
166+ # Mock css_list to return empty
167+ result .extractor .css_list .return_value = []
168+
169+ # Mock parser for fallback
147170 anchor = MagicMock ()
148171 anchor .html = '<a href="/about">About</a>'
149172 anchor .attributes = {"href" : "/about" }
@@ -171,32 +194,24 @@ def test_max_pages_default(self):
171194 assert gen is not None
172195
173196 @patch ('easyscrape.pagination.scrape' )
174- @patch ('easyscrape.pagination.Session' )
175- def test_yields_results (self , mock_session , mock_scrape ):
176- mock_ctx = MagicMock ()
177- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
178- mock_session .return_value .__exit__ = MagicMock (return_value = False )
179-
197+ def test_yields_results (self , mock_scrape ):
180198 # First result with no next link
181199 result1 = MagicMock ()
182200 result1 .text = "<html></html>"
183201 result1 .final_url = "https://example.com/"
202+ result1 .extractor .css_list .return_value = []
184203 result1 .extractor .parser .css .return_value = []
185204 mock_scrape .return_value = result1
186205
187206 results = list (paginate ("https://example.com" , max_pages = 1 ))
188207 assert len (results ) == 1
189208
190209 @patch ('easyscrape.pagination.scrape' )
191- @patch ('easyscrape.pagination.Session' )
192- def test_stop_if_callback (self , mock_session , mock_scrape ):
193- mock_ctx = MagicMock ()
194- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
195- mock_session .return_value .__exit__ = MagicMock (return_value = False )
196-
210+ def test_stop_if_callback (self , mock_scrape ):
197211 result = MagicMock ()
198212 result .text = "<html></html>"
199213 result .final_url = "https://example.com/"
214+ result .extractor .css_list .return_value = []
200215 result .extractor .parser .css .return_value = []
201216 mock_scrape .return_value = result
202217
@@ -207,21 +222,16 @@ def stop_always(r):
207222 assert len (results ) == 1
208223
209224 @patch ('easyscrape.pagination.scrape' )
210- @patch ('easyscrape.pagination.Session' )
211- def test_uses_next_selector (self , mock_session , mock_scrape ):
212- mock_ctx = MagicMock ()
213- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
214- mock_session .return_value .__exit__ = MagicMock (return_value = False )
215-
225+ def test_uses_next_selector (self , mock_scrape ):
216226 result1 = MagicMock ()
217227 result1 .text = '<html><a class="next" href="/page2">Next</a></html>'
218228 result1 .final_url = "https://example.com/page1"
219- result1 .css . return_value = "/page2"
229+ result1 .extractor . css_list . return_value = [ "/page2" ]
220230
221231 result2 = MagicMock ()
222232 result2 .text = '<html></html>'
223233 result2 .final_url = "https://example.com/page2"
224- result2 .css . return_value = None
234+ result2 .extractor . css_list . return_value = []
225235
226236 mock_scrape .side_effect = [result1 , result2 ]
227237
@@ -246,12 +256,7 @@ def test_default_params(self):
246256 assert gen is not None
247257
248258 @patch ('easyscrape.pagination.scrape' )
249- @patch ('easyscrape.pagination.Session' )
250- def test_iterates_page_range (self , mock_session , mock_scrape ):
251- mock_ctx = MagicMock ()
252- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
253- mock_session .return_value .__exit__ = MagicMock (return_value = False )
254-
259+ def test_iterates_page_range (self , mock_scrape ):
255260 mock_scrape .return_value = MagicMock ()
256261
257262 results = list (paginate_param (
@@ -264,12 +269,7 @@ def test_iterates_page_range(self, mock_session, mock_scrape):
264269 assert mock_scrape .call_count == 3
265270
266271 @patch ('easyscrape.pagination.scrape' )
267- @patch ('easyscrape.pagination.Session' )
268- def test_stop_if_callback (self , mock_session , mock_scrape ):
269- mock_ctx = MagicMock ()
270- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
271- mock_session .return_value .__exit__ = MagicMock (return_value = False )
272-
272+ def test_stop_if_callback (self , mock_scrape ):
273273 mock_scrape .return_value = MagicMock ()
274274
275275 call_count = 0
@@ -287,12 +287,7 @@ def stop_after_two(r):
287287 assert len (results ) == 2
288288
289289 @patch ('easyscrape.pagination.scrape' )
290- @patch ('easyscrape.pagination.Session' )
291- def test_handles_scrape_exception (self , mock_session , mock_scrape ):
292- mock_ctx = MagicMock ()
293- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
294- mock_session .return_value .__exit__ = MagicMock (return_value = False )
295-
290+ def test_handles_scrape_exception (self , mock_scrape ):
296291 mock_scrape .side_effect = Exception ("Network error" )
297292
298293 results = list (paginate_param ("https://example.com" , start = 1 , end = 5 ))
@@ -312,12 +307,7 @@ def test_default_params(self):
312307 assert gen is not None
313308
314309 @patch ('easyscrape.pagination.scrape' )
315- @patch ('easyscrape.pagination.Session' )
316- def test_iterates_offsets (self , mock_session , mock_scrape ):
317- mock_ctx = MagicMock ()
318- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
319- mock_session .return_value .__exit__ = MagicMock (return_value = False )
320-
310+ def test_iterates_offsets (self , mock_scrape ):
321311 mock_scrape .return_value = MagicMock ()
322312
323313 results = list (paginate_offset (
@@ -330,12 +320,7 @@ def test_iterates_offsets(self, mock_session, mock_scrape):
330320 assert len (results ) == 3
331321
332322 @patch ('easyscrape.pagination.scrape' )
333- @patch ('easyscrape.pagination.Session' )
334- def test_stop_if_callback (self , mock_session , mock_scrape ):
335- mock_ctx = MagicMock ()
336- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
337- mock_session .return_value .__exit__ = MagicMock (return_value = False )
338-
323+ def test_stop_if_callback (self , mock_scrape ):
339324 mock_scrape .return_value = MagicMock ()
340325
341326 def stop_always (r ):
@@ -349,12 +334,7 @@ def stop_always(r):
349334 assert len (results ) == 1
350335
351336 @patch ('easyscrape.pagination.scrape' )
352- @patch ('easyscrape.pagination.Session' )
353- def test_custom_param_name (self , mock_session , mock_scrape ):
354- mock_ctx = MagicMock ()
355- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
356- mock_session .return_value .__exit__ = MagicMock (return_value = False )
357-
337+ def test_custom_param_name (self , mock_scrape ):
358338 mock_scrape .return_value = MagicMock ()
359339
360340 list (paginate_offset (
@@ -379,12 +359,7 @@ def test_returns_generator(self):
379359 assert hasattr (gen , "__next__" )
380360
381361 @patch ('easyscrape.pagination.scrape' )
382- @patch ('easyscrape.pagination.Session' )
383- def test_respects_max_pages (self , mock_session , mock_scrape ):
384- mock_ctx = MagicMock ()
385- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
386- mock_session .return_value .__exit__ = MagicMock (return_value = False )
387-
362+ def test_respects_max_pages (self , mock_scrape ):
388363 result = MagicMock ()
389364 result .final_url = "https://example.com/"
390365 result .links .return_value = []
@@ -394,12 +369,7 @@ def test_respects_max_pages(self, mock_session, mock_scrape):
394369 assert len (results ) == 1
395370
396371 @patch ('easyscrape.pagination.scrape' )
397- @patch ('easyscrape.pagination.Session' )
398- def test_follows_links (self , mock_session , mock_scrape ):
399- mock_ctx = MagicMock ()
400- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
401- mock_session .return_value .__exit__ = MagicMock (return_value = False )
402-
372+ def test_follows_links (self , mock_scrape ):
403373 result1 = MagicMock ()
404374 result1 .final_url = "https://example.com/"
405375 result1 .links .return_value = ["https://example.com/page2" ]
@@ -414,12 +384,7 @@ def test_follows_links(self, mock_session, mock_scrape):
414384 assert len (results ) == 2
415385
416386 @patch ('easyscrape.pagination.scrape' )
417- @patch ('easyscrape.pagination.Session' )
418- def test_same_domain_filter (self , mock_session , mock_scrape ):
419- mock_ctx = MagicMock ()
420- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
421- mock_session .return_value .__exit__ = MagicMock (return_value = False )
422-
387+ def test_same_domain_filter (self , mock_scrape ):
423388 result = MagicMock ()
424389 result .final_url = "https://example.com/"
425390 result .links .return_value = [
@@ -438,12 +403,7 @@ def test_same_domain_filter(self, mock_session, mock_scrape):
438403 assert len (results ) == 2
439404
440405 @patch ('easyscrape.pagination.scrape' )
441- @patch ('easyscrape.pagination.Session' )
442- def test_stop_if_callback (self , mock_session , mock_scrape ):
443- mock_ctx = MagicMock ()
444- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
445- mock_session .return_value .__exit__ = MagicMock (return_value = False )
446-
406+ def test_stop_if_callback (self , mock_scrape ):
447407 result = MagicMock ()
448408 result .final_url = "https://example.com/"
449409 result .links .return_value = ["https://example.com/page2" ]
@@ -456,12 +416,7 @@ def stop_always(r):
456416 assert len (results ) == 1
457417
458418 @patch ('easyscrape.pagination.scrape' )
459- @patch ('easyscrape.pagination.Session' )
460- def test_deduplicates_urls (self , mock_session , mock_scrape ):
461- mock_ctx = MagicMock ()
462- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
463- mock_session .return_value .__exit__ = MagicMock (return_value = False )
464-
419+ def test_deduplicates_urls (self , mock_scrape ):
465420 result = MagicMock ()
466421 result .final_url = "https://example.com/"
467422 # Returns the same URL multiple times
@@ -477,18 +432,9 @@ def test_deduplicates_urls(self, mock_session, mock_scrape):
477432 assert len (results ) == 1
478433
479434 @patch ('easyscrape.pagination.scrape' )
480- @patch ('easyscrape.pagination.Session' )
481- def test_handles_scrape_exception (self , mock_session , mock_scrape ):
482- mock_ctx = MagicMock ()
483- mock_session .return_value .__enter__ = MagicMock (return_value = mock_ctx )
484- mock_session .return_value .__exit__ = MagicMock (return_value = False )
485-
435+ def test_handles_scrape_exception (self , mock_scrape ):
486436 mock_scrape .side_effect = Exception ("Network error" )
487437
488438 results = list (crawl ("https://example.com" , max_pages = 5 ))
489439 # Should handle exception and continue (return empty)
490440 assert len (results ) == 0
491-
492-
493-
494-
0 commit comments