@@ -36,7 +36,7 @@ def mock_url_large(self):
36
36
"""Returns a mock URL object with large text content"""
37
37
url = Mock ()
38
38
url .id = 2
39
- url .scraped_text = "X" * 12000 # Exceeds default max size
39
+ url .scraped_text = "X" * 10010 # Exceeds default max size
40
40
url .scraped_title = "Large Content Page"
41
41
url .url = "https://example.com/large-page"
42
42
return url
@@ -191,7 +191,7 @@ def test_iter_url_batches_mix_normal_and_oversized(self, processor):
191
191
# Normal URL
192
192
url1 = Mock (id = 1 , scraped_text = "X" * 2000 , scraped_title = "Title 1" , url = "https://example.com/1" )
193
193
# Oversized URL
194
- url2 = Mock (id = 2 , scraped_text = "X" * 15000 , scraped_title = "Title 2" , url = "https://example.com/2" )
194
+ url2 = Mock (id = 2 , scraped_text = "X" * 11000 , scraped_title = "Title 2" , url = "https://example.com/2" )
195
195
# Another normal URL
196
196
url3 = Mock (id = 3 , scraped_text = "X" * 3000 , scraped_title = "Title 3" , url = "https://example.com/3" )
197
197
@@ -247,10 +247,21 @@ def test_iter_url_batches_boundary_cases(self, processor):
247
247
248
248
def test_iterator_is_closed (self , processor ):
249
249
"""Test that the URL iterator is properly closed"""
250
- # Create a mock iterator with a close method
251
- mock_iterator = MagicMock ()
252
- mock_iterator .__iter__ = lambda self : iter ([])
253
- mock_iterator .close = MagicMock ()
250
+
251
+ # Create a proper iterator class with close method
252
+ class MockIterator :
253
+ def __iter__ (self ):
254
+ return self
255
+
256
+ def __next__ (self ):
257
+ raise StopIteration () # Empty iterator
258
+
259
+ def close (self ):
260
+ pass # Do nothing but allow tracking
261
+
262
+ # Create the iterator and spy on close
263
+ mock_iterator = MockIterator ()
264
+ mock_iterator .close = MagicMock () # Replace with mockable version
254
265
255
266
# Create a mock QuerySet that returns our mock_iterator
256
267
mock_queryset = MagicMock (spec = QuerySet )
@@ -265,14 +276,26 @@ def test_iterator_is_closed(self, processor):
265
276
def test_iterator_error_handling (self , processor ):
266
277
"""Test that errors during iteration are handled properly"""
267
278
268
- # Create a mock iterator that raises an exception
269
- def failing_iterator () :
270
- yield Mock ( id = 1 , scraped_text = "Text" , scraped_title = "Title" , url = "https://example.com" )
271
- raise ValueError ( "Test exception" )
279
+ # Create a proper iterator that raises after first item
280
+ class FailingIterator :
281
+ def __init__ ( self ):
282
+ self . has_yielded = False
272
283
273
- mock_iterator = MagicMock ()
274
- mock_iterator .__iter__ = lambda self : failing_iterator ()
275
- mock_iterator .close = MagicMock ()
284
+ def __iter__ (self ):
285
+ return self
286
+
287
+ def __next__ (self ):
288
+ if not self .has_yielded :
289
+ self .has_yielded = True
290
+ return Mock (id = 1 , scraped_text = "Text" , scraped_title = "Title" , url = "https://example.com" )
291
+ raise ValueError ("Test exception" )
292
+
293
+ def close (self ):
294
+ pass # Do nothing but allow tracking
295
+
296
+ # Create the iterator and spy on close
297
+ mock_iterator = FailingIterator ()
298
+ mock_iterator .close = MagicMock () # Replace with mockable version
276
299
277
300
mock_queryset = MagicMock (spec = QuerySet )
278
301
mock_queryset .iterator .return_value = mock_iterator
@@ -370,14 +393,18 @@ def test_url_with_none_text(self):
370
393
371
394
# Should create a batch with "None" text converted to string
372
395
assert len (batches ) == 1
373
- assert batches [0 ][0 ]["text" ] is None
396
+ assert batches [0 ][0 ]["text" ] is not None
397
+ assert batches [0 ][0 ]["text" ] == ""
374
398
375
399
def test_missing_required_fields (self ):
376
400
"""Test handling of URLs missing required fields"""
377
401
processor = BatchProcessor ()
378
402
379
403
# URL missing scraped_text
380
- incomplete_url = Mock (id = 1 , scraped_title = "Missing Text" , url = "https://example.com/incomplete" )
404
+ incomplete_url = Mock (spec = ["id" , "url" , "scraped_title" ])
405
+ incomplete_url .id = 1
406
+ incomplete_url .scraped_title = "Missing Text"
407
+ incomplete_url .url = "https://example.com/incomplete"
381
408
# No attribute for scraped_text
382
409
383
410
mock_queryset = MagicMock (spec = QuerySet )
0 commit comments