Skip to content

Commit f3ac2e0

Browse files
Merge pull request #9392 from mr-raj12/fix-syncfile-seek-tell
cache: add seek()/tell() to SyncFile, use SaveFile in _write_files_cache, fixes #9390
2 parents 7092d77 + 2357071 commit f3ac2e0

File tree

4 files changed

+88
-29
lines changed

4 files changed

+88
-29
lines changed

src/borg/cache.py

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -582,36 +582,39 @@ def _write_files_cache(self, files):
582582
discard_after = min(newest_cmtime, start_backup_time)
583583
ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
584584
files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
585-
# TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
586-
with IntegrityCheckedFile(path=str(self.path / self.files_cache_name()), write=True) as fd:
587-
entries = 0
588-
age_discarded = 0
589-
race_discarded = 0
590-
for path_hash, entry in files.items():
591-
entry = self.decompress_entry(entry)
592-
if entry.age == 0: # current entries
593-
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
594-
# Only keep files seen in this backup that old enough not to suffer race conditions relating
595-
# to filesystem snapshots and ctime/mtime granularity or being modified while we read them.
596-
keep = True
597-
else:
598-
keep = False
599-
race_discarded += 1
600-
else: # old entries
601-
if entry.age < ttl:
602-
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
603-
keep = True
604-
else:
605-
keep = False
606-
age_discarded += 1
607-
if keep:
608-
msgpack.pack((path_hash, entry), fd)
609-
entries += 1
585+
cache_path = str(self.path / self.files_cache_name())
586+
with SaveFile(cache_path, binary=True) as sync_file:
587+
with IntegrityCheckedFile(path=cache_path, write=True, override_fd=sync_file) as fd:
588+
entries = 0
589+
age_discarded = 0
590+
race_discarded = 0
591+
for path_hash, entry in files.items():
592+
entry = self.decompress_entry(entry)
593+
if entry.age == 0: # current entries
594+
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
595+
# Only keep files seen in this backup that old enough not to suffer race conditions
596+
# relating to filesystem snapshots and ctime/mtime granularity or being modified
597+
# while we read them.
598+
keep = True
599+
else:
600+
keep = False
601+
race_discarded += 1
602+
else: # old entries
603+
if entry.age < ttl:
604+
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
605+
keep = True
606+
else:
607+
keep = False
608+
age_discarded += 1
609+
if keep:
610+
msgpack.pack((path_hash, entry), fd)
611+
entries += 1
612+
integrity_data = fd.integrity_data
610613
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]")
611614
t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat()
612615
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}")
613616
files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.")
614-
return fd.integrity_data
617+
return integrity_data
615618

616619
def file_known_and_unchanged(self, hashed_path, path_hash, st):
617620
"""

src/borg/platform/base.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import errno
2+
import io
23
import os
34
import socket
45
import unicodedata
@@ -163,7 +164,7 @@ def __init__(self, path, *, fd=None, binary=False):
163164
that corresponds to path (like from os.open(path, ...) or os.mkstemp(...))
164165
:param binary: whether to open in binary mode, default is False.
165166
"""
166-
mode = "xb" if binary else "x" # x -> raise FileExists exception in open() if file exists already
167+
mode = "x+b" if binary else "x+" # x -> raise FileExists exception in open() if file exists already
167168
self.path = path
168169
if fd is None:
169170
self.f = open(str(path), mode=mode) # Python file object
@@ -180,6 +181,15 @@ def __exit__(self, exc_type, exc_val, exc_tb):
180181
def write(self, data):
181182
self.f.write(data)
182183

184+
def read(self, *args, **kwargs):
185+
return self.f.read(*args, **kwargs)
186+
187+
def seek(self, offset, whence=io.SEEK_SET):
188+
return self.f.seek(offset, whence)
189+
190+
def tell(self):
191+
return self.f.tell()
192+
183193
def sync(self):
184194
"""
185195
Synchronize file contents. Everything written prior to sync() must become durable before anything written
@@ -195,6 +205,8 @@ def sync(self):
195205

196206
def close(self):
197207
"""sync() and close."""
208+
if self.f.closed:
209+
return
198210
from .. import platform
199211

200212
dirname = None

src/borg/testsuite/crypto/file_integrity_test.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22

3-
from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError
3+
from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError, IntegrityCheckedFile
4+
from ...platform import SyncFile
45

56

67
class TestReadIntegrityFile:
@@ -130,3 +131,19 @@ def test_part_independence(self, integrity_protected_file, partial_read):
130131
if not partial_read:
131132
fd.read()
132133
# But overall it explodes with the final digest. Neat, eh?
134+
135+
136+
class TestIntegrityCheckedFileWithSyncFile:
137+
def test_write_and_verify_with_syncfile(self, tmp_path):
138+
"""IntegrityCheckedFile works correctly with SyncFile as override_fd."""
139+
path = str(tmp_path / "testfile")
140+
with SyncFile(path, binary=True) as sf:
141+
with IntegrityCheckedFile(path=path, write=True, override_fd=sf) as fd:
142+
fd.write(b"test data for integrity check")
143+
integrity_data = fd.integrity_data
144+
145+
assert integrity_data is not None
146+
147+
# verify the written data can be read back with integrity check
148+
with IntegrityCheckedFile(path=path, write=False, integrity_data=integrity_data) as fd:
149+
assert fd.read() == b"test data for integrity check"

src/borg/testsuite/platform/all_test.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from ...platform import swidth
1+
import io
2+
3+
from ...platform import swidth, SyncFile
24

35

46
def test_swidth_ascii():
@@ -11,3 +13,28 @@ def test_swidth_cjk():
1113

1214
def test_swidth_mixed():
1315
assert swidth("borgバックアップ") == 4 + 6 * 2
16+
17+
18+
def test_syncfile_seek_tell(tmp_path):
19+
"""SyncFile exposes seek() and tell() from the underlying file object."""
20+
path = tmp_path / "testfile"
21+
with SyncFile(path, binary=True) as sf:
22+
sf.write(b"hello world")
23+
assert sf.tell() == 11
24+
sf.seek(0, io.SEEK_SET)
25+
assert sf.tell() == 0
26+
sf.seek(0, io.SEEK_END)
27+
assert sf.tell() == 11
28+
sf.seek(5, io.SEEK_SET)
29+
assert sf.tell() == 5
30+
assert sf.read() == b" world"
31+
assert path.read_bytes() == b"hello world"
32+
33+
34+
def test_syncfile_close_idempotent(tmp_path):
35+
"""Calling SyncFile.close() twice does not raise."""
36+
path = tmp_path / "testfile"
37+
sf = SyncFile(path, binary=True)
38+
sf.write(b"data")
39+
sf.close()
40+
sf.close() # must not raise

0 commit comments

Comments
 (0)