@@ -426,27 +426,31 @@ def __init__(self):
426426 extractor .pipeline = cache
427427 extractor .key = key
428428 extractor .cwd = str (tmpdir )
429+ extractor .restore_attrs = Mock ()
429430
430431 # Track fetched chunks across tests
431432 fetched_chunks = []
432433
433- def create_mock_chunks (test_data , chunk_size = 512 ):
434- """Helper function to create mock chunks from test data"""
434+ def create_mock_chunks (item_data , chunk_size = 4 ):
435+ """Helper function to create mock chunks from archive data"""
435436 chunks = []
436- for i in range (0 , len (test_data ), chunk_size ):
437- chunk_data = test_data [i : i + chunk_size ]
437+ for i in range (0 , len (item_data ), chunk_size ):
438+ chunk_data = item_data [i : i + chunk_size ]
438439 chunk_id = key .id_hash (chunk_data )
439440 chunks .append (Mock (id = chunk_id , size = len (chunk_data )))
440441 cache .objects [chunk_id ] = chunk_data
441442
442- item = Mock (chunks = chunks , size = len (test_data ))
443- target_path = str (tmpdir .join ("test.txt" ))
444- return item , target_path
443+ item = Mock (spec = ["chunks" , "size" , "__contains__" , "get" ])
444+ item .chunks = chunks # Use actual list for chunks
445+ item .size = len (item_data )
446+ item .__contains__ = lambda self , item : item == "size"
445447
446- def mock_fetch_many (chunk_ids , ro_type ):
448+ return item , str (tmpdir .join ("test.txt" ))
449+
450+ def mock_fetch_many (chunk_ids , is_preloaded = True , ro_type = None ):
447451 """Helper function to track and mock chunk fetching"""
448452 fetched_chunks .extend (chunk_ids )
449- return [cache .objects [chunk_id ] for chunk_id in chunk_ids ]
453+ return iter ( [cache .objects [chunk_id ] for chunk_id in chunk_ids ])
450454
451455 def clear_fetched_chunks ():
452456 """Helper function to clear tracked chunks between tests"""
@@ -462,99 +466,85 @@ def get_fetched_chunks():
462466
463467
464468@pytest .mark .parametrize (
465- "name, test_data, initial_data , expected_fetched_chunks, expected_success " ,
469+ "name, item_data, fs_data , expected_fetched_chunks" ,
466470 [
467471 (
468472 "no_changes" ,
469- b"A" * 512 , # One complete chunk, no changes needed
470- b"A" * 512 , # Identical content
473+ b"1111" , # One complete chunk, no changes needed
474+ b"1111" , # Identical content
471475 0 , # No chunks should be fetched
472- True ,
473476 ),
474477 (
475478 "single_chunk_change" ,
476- b"A" * 512 + b"B" * 512 , # Two chunks
477- b"A" * 512 + b"X" * 512 , # Second chunk different
479+ b"11112222" , # Two chunks
480+ b"1111XXXX" , # Second chunk different
478481 1 , # Only second chunk should be fetched
479- True ,
480482 ),
481483 (
482484 "cross_boundary_change" ,
483- b"A" * 512 + b"B" * 512 , # Two chunks
484- b"A" * 500 + b"X" * 24 , # Change crosses chunk boundary
485+ b"11112222" , # Two chunks
486+ b"111XX22" , # Change crosses chunk boundary
485487 2 , # Both chunks need update
486- True ,
487488 ),
488489 (
489490 "exact_multiple_chunks" ,
490- b"A" * 512 + b"B" * 512 + b"C" * 512 , # Three complete chunks
491- b"A" * 512 + b"X" * 512 + b"C" * 512 , # Middle chunk different
491+ b"11112222333" , # Three chunks (last one partial)
492+ b"1111XXXX333" , # Middle chunk different
492493 1 , # Only middle chunk fetched
493- True ,
494494 ),
495495 (
496496 "first_chunk_change" ,
497- b"A" * 512 + b"B" * 512 , # Two chunks
498- b"X" * 512 + b"B" * 512 , # First chunk different
497+ b"11112222" , # Two chunks
498+ b"XXXX2222" , # First chunk different
499499 1 , # Only first chunk should be fetched
500- True ,
501500 ),
502501 (
503502 "all_chunks_different" ,
504- b"A" * 512 + b"B" * 512 , # Two chunks
505- b"X" * 512 + b"Y" * 512 , # Both chunks different
503+ b"11112222" , # Two chunks
504+ b"XXXXYYYY" , # Both chunks different
506505 2 , # Both chunks should be fetched
507- True ,
508506 ),
509507 (
510508 "partial_last_chunk" ,
511- b"A" * 512 + b"B" * 100 , # One full chunk + partial
512- b"A" * 512 + b"X" * 100 , # Partial chunk different
509+ b"111122" , # One full chunk + partial
510+ b"1111XX" , # Partial chunk different
513511 1 , # Only second chunk should be fetched
514- True ,
515512 ),
516513 ],
517514)
518- def test_compare_and_extract_chunks (
519- setup_extractor , name , test_data , initial_data , expected_fetched_chunks , expected_success
520- ):
515+ def test_compare_and_extract_chunks (setup_extractor , name , item_data , fs_data , expected_fetched_chunks ):
521516 """Test chunk comparison and extraction"""
522517 extractor , key , cache , tmpdir , create_mock_chunks , get_fetched_chunks , clear_fetched_chunks = setup_extractor
523518 clear_fetched_chunks ()
524519
525- item , target_path = create_mock_chunks (test_data , chunk_size = 512 )
520+ chunk_size = 4
521+ item , target_path = create_mock_chunks (item_data , chunk_size = chunk_size )
526522
527523 original_chunk_ids = [chunk .id for chunk in item .chunks ]
528524
529525 # Write initial file state
530526 with open (target_path , "wb" ) as f :
531- f .write (initial_data )
532-
533- result = extractor .compare_and_extract_chunks (item , target_path )
534- assert result == expected_success
535-
536- if expected_success :
537- # Verify only the expected chunks were fetched
538- fetched_chunks = get_fetched_chunks ()
539- assert (
540- len (fetched_chunks ) == expected_fetched_chunks
541- ), f"Expected { expected_fetched_chunks } chunks to be fetched, got { len (fetched_chunks )} "
542-
543- # For single chunk changes, verify it's the correct chunk
544- if expected_fetched_chunks == 1 :
545- # Find which chunk should have changed by comparing initial_data with test_data
546- for i , (orig_chunk , mod_chunk ) in enumerate (
547- zip (
548- [test_data [i : i + 512 ] for i in range (0 , len (test_data ), 512 )],
549- [initial_data [i : i + 512 ] for i in range (0 , len (initial_data ), 512 )],
550- )
551- ):
552- if orig_chunk != mod_chunk :
553- assert (
554- fetched_chunks [0 ] == original_chunk_ids [i ]
555- ), f"Wrong chunk fetched. Expected chunk at position { i } "
556- break
557-
558- # Verify final content
559- with open (target_path , "rb" ) as f :
560- assert f .read () == test_data
527+ f .write (fs_data )
528+
529+ st = os .stat (target_path )
530+ result = extractor .compare_and_extract_chunks (item , target_path , st = st ) # Pass st parameter
531+ assert result
532+
533+ # Verify only the expected chunks were fetched
534+ fetched_chunks = get_fetched_chunks ()
535+ assert len (fetched_chunks ) == expected_fetched_chunks
536+
537+ # For single chunk changes, verify it's the correct chunk
538+ if expected_fetched_chunks == 1 :
539+ item_chunks = [item_data [i : i + chunk_size ] for i in range (0 , len (item_data ), chunk_size )]
540+ fs_chunks = [fs_data [i : i + chunk_size ] for i in range (0 , len (fs_data ), chunk_size )]
541+
542+ # Find which chunk should have changed by comparing item_data with fs_data
543+ for i , (item_chunk , fs_chunk ) in enumerate (zip (item_chunks , fs_chunks )):
544+ if item_chunk != fs_chunk :
545+ assert fetched_chunks [0 ] == original_chunk_ids [i ]
546+ break
547+
548+ # Verify final content
549+ with open (target_path , "rb" ) as f :
550+ assert f .read () == item_data
0 commit comments