Skip to content

Commit eee6b68

Browse files
authored
Refactor test pagination to use css_list mocks
Updated tests to mock css_list return values for pagination scenarios.
1 parent ae68323 commit eee6b68

File tree

1 file changed

+43
-97
lines changed

1 file changed

+43
-97
lines changed

tests/test_pagination.py

Lines changed: 43 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,10 @@ def test_finds_rel_next(self):
7878
result.text = '<html><body><a href="/page2" rel="next">Next</a></body></html>'
7979
result.final_url = "https://example.com/"
8080

81-
# Mock parser
81+
# Mock css_list to return href
82+
result.extractor.css_list.return_value = ["/page2"]
83+
84+
# Mock parser for fallback
8285
anchor = MagicMock()
8386
anchor.html = '<a href="/page2" rel="next">Next</a>'
8487
anchor.attributes = {"href": "/page2"}
@@ -92,6 +95,10 @@ def test_finds_next_class(self):
9295
result.text = '<html><a href="/page2" class="pagination-next">Next</a></html>'
9396
result.final_url = "https://example.com/"
9497

98+
# Mock css_list to return href
99+
result.extractor.css_list.return_value = ["/page2"]
100+
101+
# Mock parser for fallback
95102
anchor = MagicMock()
96103
anchor.html = '<a href="/page2" class="pagination-next">Next</a>'
97104
anchor.attributes = {"href": "/page2"}
@@ -105,6 +112,10 @@ def test_finds_next_text(self):
105112
result.text = '<html><a href="/page2">next</a></html>'
106113
result.final_url = "https://example.com/"
107114

115+
# Mock css_list to return empty for common selectors, fall back to regex
116+
result.extractor.css_list.return_value = []
117+
118+
# Mock parser for fallback
108119
anchor = MagicMock()
109120
anchor.html = '<a href="/page2">next</a>'
110121
anchor.attributes = {"href": "/page2"}
@@ -118,6 +129,10 @@ def test_handles_absolute_url(self):
118129
result.text = '<html><a href="https://other.com/page2" rel="next">Next</a></html>'
119130
result.final_url = "https://example.com/"
120131

132+
# Mock css_list to return absolute href
133+
result.extractor.css_list.return_value = ["https://other.com/page2"]
134+
135+
# Mock parser for fallback
121136
anchor = MagicMock()
122137
anchor.html = '<a href="https://other.com/page2" rel="next">Next</a>'
123138
anchor.attributes = {"href": "https://other.com/page2"}
@@ -131,6 +146,10 @@ def test_custom_patterns(self):
131146
result.text = '<html><a href="/page2" data-action="load-more">More</a></html>'
132147
result.final_url = "https://example.com/"
133148

149+
# Mock css_list to return empty for common selectors
150+
result.extractor.css_list.return_value = []
151+
152+
# Mock parser for fallback
134153
anchor = MagicMock()
135154
anchor.html = '<a href="/page2" data-action="load-more">More</a>'
136155
anchor.attributes = {"href": "/page2"}
@@ -144,6 +163,10 @@ def test_no_matching_links(self):
144163
result.text = '<html><a href="/about">About</a></html>'
145164
result.final_url = "https://example.com/"
146165

166+
# Mock css_list to return empty
167+
result.extractor.css_list.return_value = []
168+
169+
# Mock parser for fallback
147170
anchor = MagicMock()
148171
anchor.html = '<a href="/about">About</a>'
149172
anchor.attributes = {"href": "/about"}
@@ -171,32 +194,24 @@ def test_max_pages_default(self):
171194
assert gen is not None
172195

173196
@patch('easyscrape.pagination.scrape')
174-
@patch('easyscrape.pagination.Session')
175-
def test_yields_results(self, mock_session, mock_scrape):
176-
mock_ctx = MagicMock()
177-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
178-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
179-
197+
def test_yields_results(self, mock_scrape):
180198
# First result with no next link
181199
result1 = MagicMock()
182200
result1.text = "<html></html>"
183201
result1.final_url = "https://example.com/"
202+
result1.extractor.css_list.return_value = []
184203
result1.extractor.parser.css.return_value = []
185204
mock_scrape.return_value = result1
186205

187206
results = list(paginate("https://example.com", max_pages=1))
188207
assert len(results) == 1
189208

190209
@patch('easyscrape.pagination.scrape')
191-
@patch('easyscrape.pagination.Session')
192-
def test_stop_if_callback(self, mock_session, mock_scrape):
193-
mock_ctx = MagicMock()
194-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
195-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
196-
210+
def test_stop_if_callback(self, mock_scrape):
197211
result = MagicMock()
198212
result.text = "<html></html>"
199213
result.final_url = "https://example.com/"
214+
result.extractor.css_list.return_value = []
200215
result.extractor.parser.css.return_value = []
201216
mock_scrape.return_value = result
202217

@@ -207,21 +222,16 @@ def stop_always(r):
207222
assert len(results) == 1
208223

209224
@patch('easyscrape.pagination.scrape')
210-
@patch('easyscrape.pagination.Session')
211-
def test_uses_next_selector(self, mock_session, mock_scrape):
212-
mock_ctx = MagicMock()
213-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
214-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
215-
225+
def test_uses_next_selector(self, mock_scrape):
216226
result1 = MagicMock()
217227
result1.text = '<html><a class="next" href="/page2">Next</a></html>'
218228
result1.final_url = "https://example.com/page1"
219-
result1.css.return_value = "/page2"
229+
result1.extractor.css_list.return_value = ["/page2"]
220230

221231
result2 = MagicMock()
222232
result2.text = '<html></html>'
223233
result2.final_url = "https://example.com/page2"
224-
result2.css.return_value = None
234+
result2.extractor.css_list.return_value = []
225235

226236
mock_scrape.side_effect = [result1, result2]
227237

@@ -246,12 +256,7 @@ def test_default_params(self):
246256
assert gen is not None
247257

248258
@patch('easyscrape.pagination.scrape')
249-
@patch('easyscrape.pagination.Session')
250-
def test_iterates_page_range(self, mock_session, mock_scrape):
251-
mock_ctx = MagicMock()
252-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
253-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
254-
259+
def test_iterates_page_range(self, mock_scrape):
255260
mock_scrape.return_value = MagicMock()
256261

257262
results = list(paginate_param(
@@ -264,12 +269,7 @@ def test_iterates_page_range(self, mock_session, mock_scrape):
264269
assert mock_scrape.call_count == 3
265270

266271
@patch('easyscrape.pagination.scrape')
267-
@patch('easyscrape.pagination.Session')
268-
def test_stop_if_callback(self, mock_session, mock_scrape):
269-
mock_ctx = MagicMock()
270-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
271-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
272-
272+
def test_stop_if_callback(self, mock_scrape):
273273
mock_scrape.return_value = MagicMock()
274274

275275
call_count = 0
@@ -287,12 +287,7 @@ def stop_after_two(r):
287287
assert len(results) == 2
288288

289289
@patch('easyscrape.pagination.scrape')
290-
@patch('easyscrape.pagination.Session')
291-
def test_handles_scrape_exception(self, mock_session, mock_scrape):
292-
mock_ctx = MagicMock()
293-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
294-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
295-
290+
def test_handles_scrape_exception(self, mock_scrape):
296291
mock_scrape.side_effect = Exception("Network error")
297292

298293
results = list(paginate_param("https://example.com", start=1, end=5))
@@ -312,12 +307,7 @@ def test_default_params(self):
312307
assert gen is not None
313308

314309
@patch('easyscrape.pagination.scrape')
315-
@patch('easyscrape.pagination.Session')
316-
def test_iterates_offsets(self, mock_session, mock_scrape):
317-
mock_ctx = MagicMock()
318-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
319-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
320-
310+
def test_iterates_offsets(self, mock_scrape):
321311
mock_scrape.return_value = MagicMock()
322312

323313
results = list(paginate_offset(
@@ -330,12 +320,7 @@ def test_iterates_offsets(self, mock_session, mock_scrape):
330320
assert len(results) == 3
331321

332322
@patch('easyscrape.pagination.scrape')
333-
@patch('easyscrape.pagination.Session')
334-
def test_stop_if_callback(self, mock_session, mock_scrape):
335-
mock_ctx = MagicMock()
336-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
337-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
338-
323+
def test_stop_if_callback(self, mock_scrape):
339324
mock_scrape.return_value = MagicMock()
340325

341326
def stop_always(r):
@@ -349,12 +334,7 @@ def stop_always(r):
349334
assert len(results) == 1
350335

351336
@patch('easyscrape.pagination.scrape')
352-
@patch('easyscrape.pagination.Session')
353-
def test_custom_param_name(self, mock_session, mock_scrape):
354-
mock_ctx = MagicMock()
355-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
356-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
357-
337+
def test_custom_param_name(self, mock_scrape):
358338
mock_scrape.return_value = MagicMock()
359339

360340
list(paginate_offset(
@@ -379,12 +359,7 @@ def test_returns_generator(self):
379359
assert hasattr(gen, "__next__")
380360

381361
@patch('easyscrape.pagination.scrape')
382-
@patch('easyscrape.pagination.Session')
383-
def test_respects_max_pages(self, mock_session, mock_scrape):
384-
mock_ctx = MagicMock()
385-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
386-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
387-
362+
def test_respects_max_pages(self, mock_scrape):
388363
result = MagicMock()
389364
result.final_url = "https://example.com/"
390365
result.links.return_value = []
@@ -394,12 +369,7 @@ def test_respects_max_pages(self, mock_session, mock_scrape):
394369
assert len(results) == 1
395370

396371
@patch('easyscrape.pagination.scrape')
397-
@patch('easyscrape.pagination.Session')
398-
def test_follows_links(self, mock_session, mock_scrape):
399-
mock_ctx = MagicMock()
400-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
401-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
402-
372+
def test_follows_links(self, mock_scrape):
403373
result1 = MagicMock()
404374
result1.final_url = "https://example.com/"
405375
result1.links.return_value = ["https://example.com/page2"]
@@ -414,12 +384,7 @@ def test_follows_links(self, mock_session, mock_scrape):
414384
assert len(results) == 2
415385

416386
@patch('easyscrape.pagination.scrape')
417-
@patch('easyscrape.pagination.Session')
418-
def test_same_domain_filter(self, mock_session, mock_scrape):
419-
mock_ctx = MagicMock()
420-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
421-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
422-
387+
def test_same_domain_filter(self, mock_scrape):
423388
result = MagicMock()
424389
result.final_url = "https://example.com/"
425390
result.links.return_value = [
@@ -438,12 +403,7 @@ def test_same_domain_filter(self, mock_session, mock_scrape):
438403
assert len(results) == 2
439404

440405
@patch('easyscrape.pagination.scrape')
441-
@patch('easyscrape.pagination.Session')
442-
def test_stop_if_callback(self, mock_session, mock_scrape):
443-
mock_ctx = MagicMock()
444-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
445-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
446-
406+
def test_stop_if_callback(self, mock_scrape):
447407
result = MagicMock()
448408
result.final_url = "https://example.com/"
449409
result.links.return_value = ["https://example.com/page2"]
@@ -456,12 +416,7 @@ def stop_always(r):
456416
assert len(results) == 1
457417

458418
@patch('easyscrape.pagination.scrape')
459-
@patch('easyscrape.pagination.Session')
460-
def test_deduplicates_urls(self, mock_session, mock_scrape):
461-
mock_ctx = MagicMock()
462-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
463-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
464-
419+
def test_deduplicates_urls(self, mock_scrape):
465420
result = MagicMock()
466421
result.final_url = "https://example.com/"
467422
# Returns the same URL multiple times
@@ -477,18 +432,9 @@ def test_deduplicates_urls(self, mock_session, mock_scrape):
477432
assert len(results) == 1
478433

479434
@patch('easyscrape.pagination.scrape')
480-
@patch('easyscrape.pagination.Session')
481-
def test_handles_scrape_exception(self, mock_session, mock_scrape):
482-
mock_ctx = MagicMock()
483-
mock_session.return_value.__enter__ = MagicMock(return_value=mock_ctx)
484-
mock_session.return_value.__exit__ = MagicMock(return_value=False)
485-
435+
def test_handles_scrape_exception(self, mock_scrape):
486436
mock_scrape.side_effect = Exception("Network error")
487437

488438
results = list(crawl("https://example.com", max_pages=5))
489439
# Should handle exception and continue (return empty)
490440
assert len(results) == 0
491-
492-
493-
494-

0 commit comments

Comments
 (0)