Skip to content

Commit 9a43efa

Browse files
extract --continue: optimize processing of already existing dirs
if an already existing fs directory has the correct (as archived) mtime, we have already extracted it in a previous borg extract run and we do not need and should not call restore_attrs for it again. if the directory exists, but does not have the correct mtime, restore_attrs will be called and its attributes will be extracted (and mtime set to correct value).
1 parent 98d189d commit 9a43efa

File tree

2 files changed

+46
-22
lines changed

2 files changed

+46
-22
lines changed

src/borg/archive.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -782,12 +782,17 @@ def extract_item(
782782

783783
def same_item(item, st):
784784
"""Is the archived item the same as the filesystem item at the same path with stat st?"""
785-
if not stat.S_ISREG(st.st_mode):
786-
# we only "optimize" for regular files.
785+
is_file = stat.S_ISREG(st.st_mode)
786+
is_dir = stat.S_ISDIR(st.st_mode)
787+
if not (is_file or is_dir):
788+
# we only "optimize" for regular files and directories.
787789
# other file types are less frequent and have no content extraction we could "optimize away".
788790
return False
789-
if item.mode != st.st_mode or item.size != st.st_size:
790-
# the size check catches incomplete previous file extraction
791+
if item.mode != st.st_mode:
792+
# we want to extract a different type of file than what is present in the filesystem.
793+
return False
794+
if is_file and item.size != st.st_size:
795+
# the size check catches incomplete previous regular file extraction
791796
return False
792797
if item.get("mtime") != st.st_mtime_ns:
793798
# note: mtime is "extracted" late, after xattrs and ACLs, but before flags.

src/borg/testsuite/archiver/extract_cmd_test.py

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import errno
22
import os
3+
from pathlib import Path
34
import shutil
45
import stat
56
from unittest.mock import patch
@@ -707,38 +708,56 @@ def test_extract_continue(archivers, request):
707708
archiver = request.getfixturevalue(archivers)
708709
CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300
709710
cmd(archiver, "repo-create", RK_ENCRYPTION)
710-
create_regular_file(archiver.input_path, "file1", contents=CONTENTS1)
711-
create_regular_file(archiver.input_path, "file2", contents=CONTENTS2)
712-
create_regular_file(archiver.input_path, "file3", contents=CONTENTS3)
711+
create_regular_file(archiver.input_path, "dir1/file1", contents=CONTENTS1)
712+
create_regular_file(archiver.input_path, "dir2/file2", contents=CONTENTS2)
713+
create_regular_file(archiver.input_path, "dir3/file3", contents=CONTENTS3)
713714
cmd(archiver, "create", "arch", "input")
714715

716+
granularity_sleep()
717+
715718
with changedir("output"):
716719
# we simulate an interrupted/partial extraction:
717720
cmd(archiver, "extract", "arch")
718-
# do not modify file1, it stands for a successfully extracted file
719-
file1_st = os.stat("input/file1")
721+
# do not modify dir1 and file1, they stand for a successfully extracted files
722+
dir1_st = os.stat("input/dir1")
723+
file1_st = os.stat("input/dir1/file1")
724+
# simulate a partially extracted dir2 (wrong mtime)
720725
# simulate a partially extracted file2 (smaller size, archived mtime not yet set)
721-
file2_st = os.stat("input/file2")
726+
dir2_st = os.stat("input/dir2")
727+
file2_st = os.stat("input/dir2/file2")
722728
# make a hard link, so it does not free the inode when unlinking input/file2
723-
os.link("input/file2", "hardlink-to-keep-inode-f2")
724-
os.truncate("input/file2", 123) # -> incorrect size, incorrect mtime
725-
# simulate file3 has not yet been extracted
726-
file3_st = os.stat("input/file3")
729+
os.link("input/dir2/file2", "hardlink-to-keep-inode-f2")
730+
os.truncate("input/dir2/file2", 123) # -> incorrect size, incorrect mtime
731+
Path("input/dir2").touch() # -> mtime "incorrect" (not as archived)
732+
# simulate dir3 and file3 have not yet been extracted
733+
dir3_st = os.stat("input/dir3")
734+
file3_st = os.stat("input/dir3/file3")
727735
# make a hard link, so it does not free the inode when unlinking input/file3
728-
os.link("input/file3", "hardlink-to-keep-inode-f3")
729-
os.remove("input/file3")
736+
os.link("input/dir3/file3", "hardlink-to-keep-inode-f3")
737+
os.remove("input/dir3/file3")
738+
os.rmdir("input/dir3")
739+
730740
granularity_sleep()
731741

732742
with changedir("output"):
733743
# now try to continue extracting, using the same archive, same output dir:
734744
cmd(archiver, "extract", "arch", "--continue")
735-
now_file1_st = os.stat("input/file1")
745+
now_dir1_st = os.stat("input/dir1")
746+
now_file1_st = os.stat("input/dir1/file1")
747+
assert dir1_st.st_ino == now_dir1_st.st_ino # dir1 was NOT extracted again
748+
assert dir1_st.st_mtime_ns == now_dir1_st.st_mtime_ns # dir1 has correct mtime
736749
assert file1_st.st_ino == now_file1_st.st_ino # file1 was NOT extracted again
737750
assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns # has correct mtime
738-
new_file2_st = os.stat("input/file2")
751+
now_dir2_st = os.stat("input/dir2")
752+
new_file2_st = os.stat("input/dir2/file2")
753+
assert dir2_st.st_ino == now_dir2_st.st_ino # dir2 was not removed/recreated
754+
assert dir2_st.st_mtime_ns == now_dir2_st.st_mtime_ns # dir2 mtime was fixed
739755
assert file2_st.st_ino != new_file2_st.st_ino # file2 was extracted again
740756
assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns # has correct mtime
741-
new_file3_st = os.stat("input/file3")
757+
new_dir3_st = os.stat("input/dir3")
758+
new_file3_st = os.stat("input/dir3/file3")
759+
assert dir3_st.st_ino != new_dir3_st.st_ino # dir3 was created freshly
760+
assert dir3_st.st_mtime_ns == new_dir3_st.st_mtime_ns # dir3 mtime was extracted
742761
assert file3_st.st_ino != new_file3_st.st_ino # file3 was extracted again
743762
assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # has correct mtime
744763
# windows has a strange ctime behaviour when deleting and recreating a file
@@ -747,11 +766,11 @@ def test_extract_continue(archivers, request):
747766
assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns # file extracted again
748767
assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns # file extracted again
749768
# check if all contents (and thus also file sizes) are correct:
750-
with open("input/file1", "rb") as f:
769+
with open("input/dir1/file1", "rb") as f:
751770
assert f.read() == CONTENTS1
752-
with open("input/file2", "rb") as f:
771+
with open("input/dir2/file2", "rb") as f:
753772
assert f.read() == CONTENTS2
754-
with open("input/file3", "rb") as f:
773+
with open("input/dir3/file3", "rb") as f:
755774
assert f.read() == CONTENTS3
756775

757776

0 commit comments

Comments
 (0)