Skip to content

Commit 3d65cb3

Browse files
transfer: verify chunks we get using assert_id, fixes #7383
this needs to decompress and to hash the chunk data, but better let's play safe. at least we still can avoid the (re-)compression with borg transfer (which is often much more expensive than decompression).
1 parent 8dc52b2 commit 3d65cb3

File tree

3 files changed

+43
-20
lines changed

3 files changed

+43
-20
lines changed

src/borg/archiver/transfer_cmd.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,11 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non
106106
if refcount == 0: # target repo does not yet have this chunk
107107
if not dry_run:
108108
cdata = other_repository.get(chunk_id)
109-
# keep compressed payload same, avoid decompression / recompression
110-
meta, data = other_manifest.repo_objs.parse(chunk_id, cdata, decompress=False)
109+
# keep compressed payload same, verify via assert_id (that will
110+
# decompress, but avoid needing to compress it again):
111+
meta, data = other_manifest.repo_objs.parse(
112+
chunk_id, cdata, decompress=True, want_compressed=True
113+
)
111114
meta, data = upgrader.upgrade_compressed_chunk(meta, data)
112115
chunk_entry = cache.add_chunk(
113116
chunk_id,

src/borg/repoobj.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,20 @@ def parse_meta(self, id: bytes, cdata: bytes) -> dict:
7070
meta = msgpack.unpackb(meta_packed)
7171
return meta
7272

73-
def parse(self, id: bytes, cdata: bytes, decompress: bool = True) -> tuple[dict, bytes]:
73+
def parse(
74+
self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False
75+
) -> tuple[dict, bytes]:
76+
"""
77+
Parse a repo object into metadata and data (decrypt it, maybe decompress, maybe verify if the chunk plaintext
78+
corresponds to the chunk id via assert_id()).
79+
80+
Tweaking options (default is usually fine):
81+
- decompress=True, want_compressed=False: slow, verifying. returns decompressed data (default).
82+
- decompress=True, want_compressed=True: slow, verifying. returns compressed data (caller wants to reuse it).
83+
- decompress=False, want_compressed=True: quick, not verifying. returns compressed data (caller wants to reuse).
84+
- decompress=False, want_compressed=False: invalid
85+
"""
86+
assert not (not decompress and not want_compressed), "invalid parameter combination!"
7487
assert isinstance(id, bytes)
7588
assert isinstance(cdata, bytes)
7689
obj = memoryview(cdata)
@@ -81,24 +94,26 @@ def parse(self, id: bytes, cdata: bytes, decompress: bool = True) -> tuple[dict,
8194
meta_encrypted = obj[offs : offs + len_meta_encrypted]
8295
offs += len_meta_encrypted
8396
meta_packed = self.key.decrypt(id, meta_encrypted)
84-
meta = msgpack.unpackb(meta_packed)
97+
meta_compressed = msgpack.unpackb(meta_packed) # means: before adding more metadata in decompress block
8598
data_encrypted = obj[offs:]
86-
data_compressed = self.key.decrypt(id, data_encrypted)
99+
data_compressed = self.key.decrypt(id, data_encrypted) # does not include the type/level bytes
87100
if decompress:
88-
ctype = meta["ctype"]
89-
clevel = meta["clevel"]
90-
csize = meta["csize"] # always the overall size
101+
ctype = meta_compressed["ctype"]
102+
clevel = meta_compressed["clevel"]
103+
csize = meta_compressed["csize"] # always the overall size
91104
assert csize == len(data_compressed)
92-
psize = meta.get("psize", csize) # obfuscation: psize (payload size) is potentially less than csize.
105+
psize = meta_compressed.get(
106+
"psize", csize
107+
) # obfuscation: psize (payload size) is potentially less than csize.
93108
assert psize <= csize
94109
compr_hdr = bytes((ctype, clevel))
95110
compressor_cls, compression_level = Compressor.detect(compr_hdr)
96111
compressor = compressor_cls(level=compression_level)
97-
meta, data = compressor.decompress(meta, data_compressed[:psize])
112+
meta, data = compressor.decompress(dict(meta_compressed), data_compressed[:psize])
98113
self.key.assert_id(id, data)
99114
else:
100-
data = data_compressed # does not include the type/level bytes
101-
return meta, data
115+
meta, data = None, None
116+
return meta_compressed if want_compressed else meta, data_compressed if want_compressed else data
102117

103118

104119
class RepoObj1: # legacy
@@ -140,19 +155,22 @@ def format(
140155
def parse_meta(self, id: bytes, cdata: bytes) -> dict:
141156
raise NotImplementedError("parse_meta is not available for RepoObj1")
142157

143-
def parse(self, id: bytes, cdata: bytes, decompress: bool = True) -> tuple[dict, bytes]:
158+
def parse(
159+
self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False
160+
) -> tuple[dict, bytes]:
161+
assert not (not decompress and not want_compressed), "invalid parameter combination!"
144162
assert isinstance(id, bytes)
145163
assert isinstance(cdata, bytes)
146164
data_compressed = self.key.decrypt(id, cdata)
147165
compressor_cls, compression_level = Compressor.detect(data_compressed[:2])
148166
compressor = compressor_cls(level=compression_level, legacy_mode=True)
167+
meta_compressed = {}
168+
meta_compressed["ctype"] = compressor.ID
169+
meta_compressed["clevel"] = compressor.level
170+
meta_compressed["csize"] = len(data_compressed)
149171
if decompress:
150172
meta, data = compressor.decompress(None, data_compressed)
151173
self.key.assert_id(id, data)
152174
else:
153-
meta = {}
154-
meta["ctype"] = compressor.ID
155-
meta["clevel"] = compressor.level
156-
meta["csize"] = len(data_compressed)
157-
data = data_compressed
158-
return meta, data
175+
meta, data = None, None
176+
return meta_compressed if want_compressed else meta, data_compressed if want_compressed else data

src/borg/testsuite/repoobj.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ def test_borg1_borg2_transition(key):
6868
repo_objs1 = RepoObj1(key)
6969
id = repo_objs1.id_hash(data)
7070
borg1_cdata = repo_objs1.format(id, meta, data)
71-
meta1, compr_data1 = repo_objs1.parse(id, borg1_cdata, decompress=False) # borg transfer avoids (de)compression
71+
meta1, compr_data1 = repo_objs1.parse(
72+
id, borg1_cdata, decompress=True, want_compressed=True
73+
) # avoid re-compression
7274
# in borg 1, we can only get this metadata after decrypting the whole chunk (and we do not have "size" here):
7375
assert meta1["ctype"] == LZ4.ID # default compression
7476
assert meta1["clevel"] == 0xFF # lz4 does not know levels (yet?)

0 commit comments

Comments
 (0)