Skip to content

Commit 8df00d8

Browse files
committed
filter-repo: avoid failures with LFS objects and submodules
Our attempts to filter LFS objects with --sensitive-data-removal cause us to try to look at the sizes of all blobs and read the contents of those that are small enough. That's fine, but we automatically assume all the FileChanges in a commit are blobs, when some might be submodules. Since submodule oids are unlikely to exist in the current repository, that is likely to lead to tracebacks of the form ... File "/path/to/git-filter-repo", line 1115, in _parse_optional_filechange self._lfs_object_tracker.check_file_change_data(value, True) File "/path/to/git-filter-repo", line 3026, in check_file_change_data size = self.file_info.get_size_by_identifier(git_id) File "/path/to/git-filter-repo", line 2956, in get_size_by_identifier (oid, oidtype, size) = line.split() ValueError: not enough values to unpack (expected 3, got 2) fatal: stream ends early fast-import: dumping crash report to .git/fast_import_crash_774517 Add some checks to avoid these problems so that we don't query `git cat-file --batch-command` for these oids. Signed-off-by: Elijah Newren <[email protected]>
1 parent bc8f191 commit 8df00d8

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

git-filter-repo

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ class FastExportParser(object):
11101110
idnum = idnum[1:]
11111111
path = path.rstrip(b'\n')
11121112
# Check for LFS objects from sources before we might toss this filechange
1113-
if self._lfs_object_tracker:
1113+
if mode != b'160000' and self._lfs_object_tracker:
11141114
value = int(idnum) if len(idnum) != 40 else idnum
11151115
self._lfs_object_tracker.check_file_change_data(value, True)
11161116
# We translate the idnum to our id system
@@ -3039,7 +3039,8 @@ class LFSObjectTracker:
30393039
self.check_blob_data(obj.data, obj.id, False)
30403040
elif type(obj) == Commit:
30413041
for change in obj.file_changes:
3042-
if change.type != b'M':
3042+
sys.stdout.flush()
3043+
if change.type != b'M' or change.mode == b'160000':
30433044
continue
30443045
self.check_file_change_data(change.blob_id, False)
30453046

t/t9393-filter-repo-rerun.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,4 +827,40 @@ test_expect_success 'lfs: full rewrite then partial' '
827827
)
828828
'
829829

830+
test_expect_success 'sdr: lfs + submodules' '
831+
test_create_repo lfs_plus_submodules &&
832+
(
833+
cd lfs_plus_submodules &&
834+
git symbolic-ref HEAD refs/heads/main &&
835+
git fast-import --quiet <$DATA/lfs &&
836+
837+
git reset --hard &&
838+
git init subdir &&
839+
>subdir/empty &&
840+
git -C subdir add . &&
841+
git -C subdir commit -m initial &&
842+
843+
git submodule add ./subdir &&
844+
git commit -m "Add submodule" &&
845+
846+
git filter-repo --sensitive-data-removal \
847+
--invert-paths --path LD --force &&
848+
849+
cat <<-EOF >orig_expect &&
850+
sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
851+
sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
852+
sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
853+
sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd
854+
EOF
855+
856+
test_cmp orig_expect .git/filter-repo/original_lfs_objects &&
857+
858+
cat <<-EOF >expect &&
859+
sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd
860+
EOF
861+
862+
test_cmp expect .git/filter-repo/orphaned_lfs_objects
863+
)
864+
'
865+
830866
test_done

0 commit comments

Comments
 (0)