From 54203c99951e72796ae9258c298bedfb1b110826 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 22 Nov 2025 03:58:35 +0100 Subject: [PATCH 1/5] Fix ChunkerFixed sparse handling and update tests - Update fixed_test.py expectations for non-sparse chunking. - Enable `sparse=True` in interaction_test.py and reader_test.py where zero detection is required. - Catch `ValueError` in _build_fmap to support `BytesIO` seeking. --- src/borg/chunkers/reader.pyx | 5 ++- src/borg/testsuite/chunkers/fixed_test.py | 35 ++++++++++++++++++- .../testsuite/chunkers/interaction_test.py | 2 +- src/borg/testsuite/chunkers/reader_test.py | 4 +-- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/borg/chunkers/reader.pyx b/src/borg/chunkers/reader.pyx index 26258914b9..7c8a09df04 100644 --- a/src/borg/chunkers/reader.pyx +++ b/src/borg/chunkers/reader.pyx @@ -137,7 +137,7 @@ class FileFMAPReader: if self.try_sparse: try: fmap = list(sparsemap(self.fd, self.fh)) - except OSError as err: + except (OSError, ValueError) as err: # seeking did not work pass @@ -170,6 +170,9 @@ class FileFMAPReader: # read block from the range data = dread(offset, wanted, self.fd, self.fh) got = len(data) + # Detect zero-filled blocks regardless of sparse mode. + # Zero detection is important to avoid reading/storing allocated zeros + # even when we are not using sparse file handling based on SEEK_HOLE/SEEK_DATA. if zeros.startswith(data): data = None allocation = CH_ALLOC diff --git a/src/borg/testsuite/chunkers/fixed_test.py b/src/borg/testsuite/chunkers/fixed_test.py index b8598a9266..8fac894f19 100644 --- a/src/borg/testsuite/chunkers/fixed_test.py +++ b/src/borg/testsuite/chunkers/fixed_test.py @@ -40,7 +40,40 @@ def get_chunks(fname, sparse, header_size): fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) - get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size) + expected_content = make_content(sparse_map, header_size=header_size) + + # ChunkerFixed splits everything into fixed-size chunks (except maybe the header) + # We need to split the expected content similarly. + expected = [] + + # Handle header if present (it's the first item if header_size > 0) + if header_size > 0: + header = expected_content.pop(0) + expected.append(header) + + # Flatten the rest and split into 4096 chunks + current_chunk_size = 4096 + for item in expected_content: + if isinstance(item, int): + # Hole + count = item + while count > 0: + size = min(count, current_chunk_size) + expected.append(size) + count -= size + else: + # Data + data = item + while len(data) > 0: + size = min(len(data), current_chunk_size) + expected.append(data[:size]) + data = data[size:] + + if not sparse: + # if the chunker is not sparse-aware, it will read holes as zeros + expected = [b"\0" * x if isinstance(x, int) else x for x in expected] + + assert get_chunks(fn, sparse=sparse, header_size=header_size) == expected @pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") diff --git a/src/borg/testsuite/chunkers/interaction_test.py b/src/borg/testsuite/chunkers/interaction_test.py index 45c175299d..4417dc423f 100644 --- a/src/borg/testsuite/chunkers/interaction_test.py +++ b/src/borg/testsuite/chunkers/interaction_test.py @@ -29,7 +29,7 @@ def test_reader_chunker_interaction(chunker_params): random_data = os.urandom(data_size // 3) + b"\0" * (data_size // 3) + os.urandom(data_size // 3) # Chunk the data - chunker = get_chunker(*chunker_params) + chunker = get_chunker(*chunker_params, sparse=True) data_file = BytesIO(random_data) chunks = list(chunker.chunkify(data_file)) diff --git a/src/borg/testsuite/chunkers/reader_test.py b/src/borg/testsuite/chunkers/reader_test.py index ff56350ee1..f269d95cbb 100644 --- a/src/borg/testsuite/chunkers/reader_test.py +++ b/src/borg/testsuite/chunkers/reader_test.py @@ -170,7 +170,7 @@ def blockify(self): ) def test_filefmapreader_basic(file_content, read_size, expected_chunks): """Test basic functionality of FileFMAPReader with different file contents.""" - reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None) + reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None) # Collect all chunks from blockify chunks = list(reader.blockify()) @@ -252,7 +252,7 @@ def test_filefmapreader_allocation_types(zeros_length, read_size, expected_alloc # Create a file with all zeros file_content = b"\0" * zeros_length - reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None) + reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None) # Collect all chunks from blockify chunks = list(reader.blockify()) From 6d92d585fcdf1e354139548b6bfe8ce353ff5862 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 26 Nov 2025 12:07:29 +0100 Subject: [PATCH 2/5] comment / use BS in test --- src/borg/chunkers/reader.pyx | 5 ++++- src/borg/testsuite/chunkers/fixed_test.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/borg/chunkers/reader.pyx b/src/borg/chunkers/reader.pyx index 7c8a09df04..9169ed3262 100644 --- a/src/borg/chunkers/reader.pyx +++ b/src/borg/chunkers/reader.pyx @@ -138,7 +138,10 @@ class FileFMAPReader: try: fmap = list(sparsemap(self.fd, self.fh)) except (OSError, ValueError) as err: - # seeking did not work + # Building a sparse map failed: + # - OSError: low-level lseek with SEEK_HOLE/SEEK_DATA not supported by FS/OS. + # - ValueError: high-level file objects (e.g. io.BytesIO or some fd wrappers) + # don't accept SEEK_HOLE/SEEK_DATA as a valid "whence" and raise ValueError. pass if fmap is None: diff --git a/src/borg/testsuite/chunkers/fixed_test.py b/src/borg/testsuite/chunkers/fixed_test.py index 8fac894f19..80782d4d00 100644 --- a/src/borg/testsuite/chunkers/fixed_test.py +++ b/src/borg/testsuite/chunkers/fixed_test.py @@ -34,7 +34,7 @@ ) def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse): def get_chunks(fname, sparse, header_size): - chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse) + chunker = ChunkerFixed(BS, header_size=header_size, sparse=sparse) with open(fname, "rb") as fd: return cf(chunker.chunkify(fd)) From 1ec1e84ec0849eba1d9948db7c96b2fc545cf7c4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 26 Nov 2025 12:11:59 +0100 Subject: [PATCH 3/5] simplify fixed test --- src/borg/testsuite/chunkers/fixed_test.py | 35 ++--------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/src/borg/testsuite/chunkers/fixed_test.py b/src/borg/testsuite/chunkers/fixed_test.py index 80782d4d00..a06d6bdb14 100644 --- a/src/borg/testsuite/chunkers/fixed_test.py +++ b/src/borg/testsuite/chunkers/fixed_test.py @@ -38,42 +38,11 @@ def get_chunks(fname, sparse, header_size): with open(fname, "rb") as fd: return cf(chunker.chunkify(fd)) + # this only works if sparse map blocks are same size as fixed chunker blocks fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) expected_content = make_content(sparse_map, header_size=header_size) - - # ChunkerFixed splits everything into fixed-size chunks (except maybe the header) - # We need to split the expected content similarly. - expected = [] - - # Handle header if present (it's the first item if header_size > 0) - if header_size > 0: - header = expected_content.pop(0) - expected.append(header) - - # Flatten the rest and split into 4096 chunks - current_chunk_size = 4096 - for item in expected_content: - if isinstance(item, int): - # Hole - count = item - while count > 0: - size = min(count, current_chunk_size) - expected.append(size) - count -= size - else: - # Data - data = item - while len(data) > 0: - size = min(len(data), current_chunk_size) - expected.append(data[:size]) - data = data[size:] - - if not sparse: - # if the chunker is not sparse-aware, it will read holes as zeros - expected = [b"\0" * x if isinstance(x, int) else x for x in expected] - - assert get_chunks(fn, sparse=sparse, header_size=header_size) == expected + assert get_chunks(fn, sparse=sparse, header_size=header_size) == expected_content @pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") From 7fe093adad6817d5a80681a7425f98aeb512a84e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 26 Nov 2025 12:20:32 +0100 Subject: [PATCH 4/5] don't use sparse with BytesIO doesn't work anyway. --- src/borg/testsuite/chunkers/interaction_test.py | 2 +- src/borg/testsuite/chunkers/reader_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/borg/testsuite/chunkers/interaction_test.py b/src/borg/testsuite/chunkers/interaction_test.py index 4417dc423f..45c175299d 100644 --- a/src/borg/testsuite/chunkers/interaction_test.py +++ b/src/borg/testsuite/chunkers/interaction_test.py @@ -29,7 +29,7 @@ def test_reader_chunker_interaction(chunker_params): random_data = os.urandom(data_size // 3) + b"\0" * (data_size // 3) + os.urandom(data_size // 3) # Chunk the data - chunker = get_chunker(*chunker_params, sparse=True) + chunker = get_chunker(*chunker_params) data_file = BytesIO(random_data) chunks = list(chunker.chunkify(data_file)) diff --git a/src/borg/testsuite/chunkers/reader_test.py b/src/borg/testsuite/chunkers/reader_test.py index f269d95cbb..ff56350ee1 100644 --- a/src/borg/testsuite/chunkers/reader_test.py +++ b/src/borg/testsuite/chunkers/reader_test.py @@ -170,7 +170,7 @@ def blockify(self): ) def test_filefmapreader_basic(file_content, read_size, expected_chunks): """Test basic functionality of FileFMAPReader with different file contents.""" - reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None) + reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None) # Collect all chunks from blockify chunks = list(reader.blockify()) @@ -252,7 +252,7 @@ def test_filefmapreader_allocation_types(zeros_length, read_size, expected_alloc # Create a file with all zeros file_content = b"\0" * zeros_length - reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None) + reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None) # Collect all chunks from blockify chunks = list(reader.blockify()) From 031b779afbe32124035e2216b9e2b04059d56286 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 26 Nov 2025 12:41:17 +0100 Subject: [PATCH 5/5] maps: always use BS sized blocks the fixed chunker also cuts BS blocks. --- src/borg/testsuite/chunkers/__init__.py | 32 +++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/borg/testsuite/chunkers/__init__.py b/src/borg/testsuite/chunkers/__init__.py index 2e01e98e33..ce9ff11e21 100644 --- a/src/borg/testsuite/chunkers/__init__.py +++ b/src/borg/testsuite/chunkers/__init__.py @@ -77,14 +77,38 @@ def fs_supports_sparse(): BS = 4096 # filesystem block size # Some sparse files. X = content blocks, _ = sparse blocks. +# Block size must always be BS. + # X__XXX____ -map_sparse1 = [(0 * BS, 1 * BS, True), (1 * BS, 2 * BS, False), (3 * BS, 3 * BS, True), (6 * BS, 4 * BS, False)] +map_sparse1 = [ + (0, BS, True), + (1 * BS, BS, False), + (2 * BS, BS, False), + (3 * BS, BS, True), + (4 * BS, BS, True), + (5 * BS, BS, True), + (6 * BS, BS, False), + (7 * BS, BS, False), + (8 * BS, BS, False), + (9 * BS, BS, False), +] # _XX___XXXX -map_sparse2 = [(0 * BS, 1 * BS, False), (1 * BS, 2 * BS, True), (3 * BS, 3 * BS, False), (6 * BS, 4 * BS, True)] +map_sparse2 = [ + (0, BS, False), + (1 * BS, BS, True), + (2 * BS, BS, True), + (3 * BS, BS, False), + (4 * BS, BS, False), + (5 * BS, BS, False), + (6 * BS, BS, True), + (7 * BS, BS, True), + (8 * BS, BS, True), + (9 * BS, BS, True), +] # XXX -map_notsparse = [(0 * BS, 3 * BS, True)] +map_notsparse = [(0, BS, True), (BS, BS, True), (2 * BS, BS, True)] # ___ -map_onlysparse = [(0 * BS, 3 * BS, False)] +map_onlysparse = [(0, BS, False), (BS, BS, False), (2 * BS, BS, False)]