Skip to content

Commit 9f389aa

Browse files
committed
Merge branch 'jk/prune-with-corrupt-refs' into maint
"git prune" used to largely ignore broken refs when deciding which objects are still being used, which could spread an existing small damage and make it a larger one. * jk/prune-with-corrupt-refs: refs.c: drop curate_packed_refs repack: turn on "ref paranoia" when doing a destructive repack prune: turn on ref_paranoia flag refs: introduce a "ref paranoia" flag t5312: test object deletion code paths in a corrupted repository
2 parents b37996e + ea56c4e commit 9f389aa

File tree

7 files changed

+147
-68
lines changed

7 files changed

+147
-68
lines changed

Documentation/git.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,17 @@ GIT_ICASE_PATHSPECS::
10271027
variable when it is invoked as the top level command by the
10281028
end user, to be recorded in the body of the reflog.
10291029

1030+
`GIT_REF_PARANOIA`::
1031+
If set to `1`, include broken or badly named refs when iterating
1032+
over lists of refs. In a normal, non-corrupted repository, this
1033+
does nothing. However, enabling it may help git to detect and
1034+
abort some operations in the presence of broken refs. Git sets
1035+
this variable automatically when performing destructive
1036+
operations like linkgit:git-prune[1]. You should not need to set
1037+
it yourself unless you want to be paranoid about making sure
1038+
an operation has touched every ref (e.g., because you are
1039+
cloning a repository to make a backup).
1040+
10301041

10311042
Discussion[[Discussion]]
10321043
------------------------

builtin/prune.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
115115
expire = ULONG_MAX;
116116
save_commit_buffer = 0;
117117
check_replace_refs = 0;
118+
ref_paranoia = 1;
118119
init_revisions(&revs, prefix);
119120

120121
argc = parse_options(argc, argv, prefix, options, prune_usage, 0);

builtin/repack.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,13 +228,17 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
228228
get_non_kept_pack_filenames(&existing_packs);
229229

230230
if (existing_packs.nr && delete_redundant) {
231-
if (unpack_unreachable)
231+
if (unpack_unreachable) {
232232
argv_array_pushf(&cmd.args,
233233
"--unpack-unreachable=%s",
234234
unpack_unreachable);
235-
else if (pack_everything & LOOSEN_UNREACHABLE)
235+
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
236+
} else if (pack_everything & LOOSEN_UNREACHABLE) {
236237
argv_array_push(&cmd.args,
237238
"--unpack-unreachable");
239+
} else {
240+
argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
241+
}
238242
}
239243
} else {
240244
argv_array_push(&cmd.args, "--unpacked");

cache.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,14 @@ extern int precomposed_unicode;
613613
extern int protect_hfs;
614614
extern int protect_ntfs;
615615

616+
/*
617+
* Include broken refs in all ref iterations, which will
618+
* generally choke dangerous operations rather than letting
619+
* them silently proceed without taking the broken ref into
620+
* account.
621+
*/
622+
extern int ref_paranoia;
623+
616624
/*
617625
* The character that begins a commented line in user-editable file
618626
* that is subject to stripspace.

environment.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ int is_bare_repository_cfg = -1; /* unspecified */
2424
int log_all_ref_updates = -1; /* unspecified */
2525
int warn_ambiguous_refs = 1;
2626
int warn_on_object_refname_ambiguity = 1;
27+
int ref_paranoia = -1;
2728
int repository_format_version;
2829
const char *git_commit_encoding;
2930
const char *git_log_output_encoding;

refs.c

Lines changed: 6 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,6 +1907,11 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base,
19071907
data.fn = fn;
19081908
data.cb_data = cb_data;
19091909

1910+
if (ref_paranoia < 0)
1911+
ref_paranoia = git_env_bool("GIT_REF_PARANOIA", 0);
1912+
if (ref_paranoia)
1913+
data.flags |= DO_FOR_EACH_INCLUDE_BROKEN;
1914+
19101915
return do_for_each_entry(refs, base, do_one_ref, &data);
19111916
}
19121917

@@ -2591,68 +2596,10 @@ int pack_refs(unsigned int flags)
25912596
return 0;
25922597
}
25932598

2594-
/*
2595-
* If entry is no longer needed in packed-refs, add it to the string
2596-
* list pointed to by cb_data. Reasons for deleting entries:
2597-
*
2598-
* - Entry is broken.
2599-
* - Entry is overridden by a loose ref.
2600-
* - Entry does not point at a valid object.
2601-
*
2602-
* In the first and third cases, also emit an error message because these
2603-
* are indications of repository corruption.
2604-
*/
2605-
static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data)
2606-
{
2607-
struct string_list *refs_to_delete = cb_data;
2608-
2609-
if (entry->flag & REF_ISBROKEN) {
2610-
/* This shouldn't happen to packed refs. */
2611-
error("%s is broken!", entry->name);
2612-
string_list_append(refs_to_delete, entry->name);
2613-
return 0;
2614-
}
2615-
if (!has_sha1_file(entry->u.value.sha1)) {
2616-
unsigned char sha1[20];
2617-
int flags;
2618-
2619-
if (read_ref_full(entry->name, 0, sha1, &flags))
2620-
/* We should at least have found the packed ref. */
2621-
die("Internal error");
2622-
if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) {
2623-
/*
2624-
* This packed reference is overridden by a
2625-
* loose reference, so it is OK that its value
2626-
* is no longer valid; for example, it might
2627-
* refer to an object that has been garbage
2628-
* collected. For this purpose we don't even
2629-
* care whether the loose reference itself is
2630-
* invalid, broken, symbolic, etc. Silently
2631-
* remove the packed reference.
2632-
*/
2633-
string_list_append(refs_to_delete, entry->name);
2634-
return 0;
2635-
}
2636-
/*
2637-
* There is no overriding loose reference, so the fact
2638-
* that this reference doesn't refer to a valid object
2639-
* indicates some kind of repository corruption.
2640-
* Report the problem, then omit the reference from
2641-
* the output.
2642-
*/
2643-
error("%s does not point to a valid object!", entry->name);
2644-
string_list_append(refs_to_delete, entry->name);
2645-
return 0;
2646-
}
2647-
2648-
return 0;
2649-
}
2650-
26512599
int repack_without_refs(struct string_list *refnames, struct strbuf *err)
26522600
{
26532601
struct ref_dir *packed;
2654-
struct string_list refs_to_delete = STRING_LIST_INIT_DUP;
2655-
struct string_list_item *refname, *ref_to_delete;
2602+
struct string_list_item *refname;
26562603
int ret, needs_repacking = 0, removed = 0;
26572604

26582605
assert(err);
@@ -2688,13 +2635,6 @@ int repack_without_refs(struct string_list *refnames, struct strbuf *err)
26882635
return 0;
26892636
}
26902637

2691-
/* Remove any other accumulated cruft */
2692-
do_for_each_entry_in_dir(packed, 0, curate_packed_ref_fn, &refs_to_delete);
2693-
for_each_string_list_item(ref_to_delete, &refs_to_delete) {
2694-
if (remove_entry(packed, ref_to_delete->string) == -1)
2695-
die("internal error");
2696-
}
2697-
26982638
/* Write what remains */
26992639
ret = commit_packed_refs();
27002640
if (ret)

t/t5312-prune-corruption.sh

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#!/bin/sh
2+
3+
test_description='
4+
Test pruning of repositories with minor corruptions. The goal
5+
here is that we should always be erring on the side of safety. So
6+
if we see, for example, a ref with a bogus name, it is OK either to
7+
bail out or to proceed using it as a reachable tip, but it is _not_
8+
OK to proceed as if it did not exist. Otherwise we might silently
9+
delete objects that cannot be recovered.
10+
'
11+
. ./test-lib.sh
12+
13+
test_expect_success 'disable reflogs' '
14+
git config core.logallrefupdates false &&
15+
rm -rf .git/logs
16+
'
17+
18+
test_expect_success 'create history reachable only from a bogus-named ref' '
19+
test_tick && git commit --allow-empty -m master &&
20+
base=$(git rev-parse HEAD) &&
21+
test_tick && git commit --allow-empty -m bogus &&
22+
bogus=$(git rev-parse HEAD) &&
23+
git cat-file commit $bogus >saved &&
24+
echo $bogus >.git/refs/heads/bogus..name &&
25+
git reset --hard HEAD^
26+
'
27+
28+
test_expect_success 'pruning does not drop bogus object' '
29+
test_when_finished "git hash-object -w -t commit saved" &&
30+
test_might_fail git prune --expire=now &&
31+
verbose git cat-file -e $bogus
32+
'
33+
34+
test_expect_success 'put bogus object into pack' '
35+
git tag reachable $bogus &&
36+
git repack -ad &&
37+
git tag -d reachable &&
38+
verbose git cat-file -e $bogus
39+
'
40+
41+
test_expect_success 'destructive repack keeps packed object' '
42+
test_might_fail git repack -Ad --unpack-unreachable=now &&
43+
verbose git cat-file -e $bogus &&
44+
test_might_fail git repack -ad &&
45+
verbose git cat-file -e $bogus
46+
'
47+
48+
# subsequent tests will have different corruptions
49+
test_expect_success 'clean up bogus ref' '
50+
rm .git/refs/heads/bogus..name
51+
'
52+
53+
# We create two new objects here, "one" and "two". Our
54+
# master branch points to "two", which is deleted,
55+
# corrupting the repository. But we'd like to make sure
56+
# that the otherwise unreachable "one" is not pruned
57+
# (since it is the user's best bet for recovering
58+
# from the corruption).
59+
#
60+
# Note that we also point HEAD somewhere besides "two",
61+
# as we want to make sure we test the case where we
62+
# pick up the reference to "two" by iterating the refs,
63+
# not by resolving HEAD.
64+
test_expect_success 'create history with missing tip commit' '
65+
test_tick && git commit --allow-empty -m one &&
66+
recoverable=$(git rev-parse HEAD) &&
67+
git cat-file commit $recoverable >saved &&
68+
test_tick && git commit --allow-empty -m two &&
69+
missing=$(git rev-parse HEAD) &&
70+
git checkout --detach $base &&
71+
rm .git/objects/$(echo $missing | sed "s,..,&/,") &&
72+
test_must_fail git cat-file -e $missing
73+
'
74+
75+
test_expect_success 'pruning with a corrupted tip does not drop history' '
76+
test_when_finished "git hash-object -w -t commit saved" &&
77+
test_might_fail git prune --expire=now &&
78+
verbose git cat-file -e $recoverable
79+
'
80+
81+
test_expect_success 'pack-refs does not silently delete broken loose ref' '
82+
git pack-refs --all --prune &&
83+
echo $missing >expect &&
84+
git rev-parse refs/heads/master >actual &&
85+
test_cmp expect actual
86+
'
87+
88+
# we do not want to count on running pack-refs to
89+
# actually pack it, as it is perfectly reasonable to
90+
# skip processing a broken ref
91+
test_expect_success 'create packed-refs file with broken ref' '
92+
rm -f .git/refs/heads/master &&
93+
cat >.git/packed-refs <<-EOF &&
94+
$missing refs/heads/master
95+
$recoverable refs/heads/other
96+
EOF
97+
echo $missing >expect &&
98+
git rev-parse refs/heads/master >actual &&
99+
test_cmp expect actual
100+
'
101+
102+
test_expect_success 'pack-refs does not silently delete broken packed ref' '
103+
git pack-refs --all --prune &&
104+
git rev-parse refs/heads/master >actual &&
105+
test_cmp expect actual
106+
'
107+
108+
test_expect_success 'pack-refs does not drop broken refs during deletion' '
109+
git update-ref -d refs/heads/other &&
110+
git rev-parse refs/heads/master >actual &&
111+
test_cmp expect actual
112+
'
113+
114+
test_done

0 commit comments

Comments
 (0)