@@ -727,67 +727,62 @@ def compare_and_extract_chunks(self, item, fs_path, st=None, *, pi=None, sparse=
727727 try :
728728 # First pass: Build fs chunks list
729729 fs_chunks = []
730- offset = 0
731730 with backup_io ("open" ):
732731 fs_file = open (fs_path , "rb" )
733732 with fs_file :
734733 for chunk in item .chunks :
735- with backup_io ("seek" ):
736- fs_file .seek (offset )
737734 with backup_io ("read" ):
738735 data = fs_file .read (chunk .size )
739- if len (data ) != chunk .size :
740- fs_chunks .append (None )
741- else :
742- fs_chunks .append (ChunkListEntry (id = self .key .id_hash (data ), size = chunk .size ))
743- offset += chunk .size
736+
737+ fs_chunks .append (ChunkListEntry (id = self .key .id_hash (data ), size = len (data )))
744738
745739 # Compare chunks and collect needed chunk IDs
746740 needed_chunks = []
747741 for fs_chunk , item_chunk in zip (fs_chunks , item .chunks ):
748- if fs_chunk is None or fs_chunk . id != item_chunk . id :
742+ if fs_chunk != item_chunk :
749743 needed_chunks .append (item_chunk )
750744
751745 if not needed_chunks :
752746 return True
753747
754748 # Fetch all needed chunks and iterate through ALL of them
755- chunk_data_iter = self .pipeline .fetch_many (
756- [chunk .id for chunk in needed_chunks ], is_preloaded = True , ro_type = ROBJ_FILE_STREAM
757- )
749+ chunk_data_iter = self .pipeline .fetch_many ([chunk .id for chunk in needed_chunks ], ro_type = ROBJ_FILE_STREAM )
758750
759- # Second pass: Update file and consume EVERY chunk from the iterator
751+ # Second pass: Update file
760752 offset = 0
761- item_chunk_size = 0
762753 with backup_io ("open" ):
763754 fs_file = open (fs_path , "rb+" )
764755 with fs_file :
765756 for fs_chunk , item_chunk in zip (fs_chunks , item .chunks ):
766- with backup_io ( "seek" ) :
767- fs_file . seek ( offset )
768- if fs_chunk is not None and fs_chunk . id == item_chunk . id :
757+ if fs_chunk == item_chunk :
758+ with backup_io ( "seek" ):
759+ fs_file . seek ( offset )
769760 offset += item_chunk .size
770- item_chunk_size += item_chunk .size
771761 else :
772762 chunk_data = next (chunk_data_iter )
773- if pi :
774- pi .show (increase = len (chunk_data ), info = [remove_surrogates (item .path )])
763+ # Need explicit seek before write in rb+ mode to ensure correct file position Without an
764+ # explicit seek before write, the file contents got corrupted (wrote chunks in wrong positions)
765+ with backup_io ("seek" ):
766+ fs_file .seek (offset )
767+
775768 with backup_io ("write" ):
776769 if sparse and not chunk_data .strip (b"\0 " ):
777- fs_file .seek ( len (chunk_data ), 1 ) # Seek over sparse section
770+ fs_file .write ( b" \0 " * len (chunk_data )) # Write zeros to overwrite old content
778771 offset += len (chunk_data )
779772 else :
780773 fs_file .write (chunk_data )
781774 offset += len (chunk_data )
782- item_chunk_size += len (chunk_data )
775+ if pi : # <-- Moved outside to track progress in both cases
776+ pi .show (increase = len (chunk_data ), info = [remove_surrogates (item .path )])
777+
783778 with backup_io ("truncate_and_attrs" ):
784779 fs_file .truncate (item .size )
785780 fs_file .flush ()
786781 self .restore_attrs (fs_path , item , fd = fs_file .fileno ())
787782
788783 # Size verification like extract_item
789- if "size" in item and item .size != item_chunk_size :
790- raise BackupError (f"Size inconsistency detected: size { item .size } , chunks size { item_chunk_size } " )
784+ if "size" in item and item .size != offset :
785+ raise BackupError (f"Size inconsistency detected: size { item .size } , chunks size { offset } " )
791786
792787 # Damaged chunks check like extract_item
793788 if "chunks_healthy" in item and not item .chunks_healthy :
0 commit comments