Skip to content
This repository was archived by the owner on Nov 9, 2017. It is now read-only.

Commit 6c34560

Browse files
committed
Merge branch 'jk/duplicate-objects-in-packs'
A packfile that stores the same object more than once is broken and will be rejected by "git index-pack" that is run when receiving data over the wire. * jk/duplicate-objects-in-packs: t5308: check that index-pack --strict detects duplicate objects test index-pack on packs with recoverable delta cycles add tests for indexing packs with delta cycles sha1-lookup: handle duplicate keys with GIT_USE_LOOKUP test-sha1: add a binary output mode
2 parents 01e0fa2 + 16c159d commit 6c34560

File tree

5 files changed

+316
-3
lines changed

5 files changed

+316
-3
lines changed

sha1-lookup.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,54 @@ int sha1_entry_pos(const void *table,
204204
* byte 0 thru (ofs-1) are the same between
205205
* lo and hi; ofs is the first byte that is
206206
* different.
207+
*
208+
* If ofs==20, then no bytes are different,
209+
* meaning we have entries with duplicate
210+
* keys. We know that we are in a solid run
211+
* of this entry (because the entries are
212+
* sorted, and our lo and hi are the same,
213+
* there can be nothing but this single key
214+
* in between). So we can stop the search.
215+
* Either one of these entries is it (and
216+
* we do not care which), or we do not have
217+
* it.
218+
*
219+
* Furthermore, we know that one of our
220+
* endpoints must be the edge of the run of
221+
* duplicates. For example, given this
222+
* sequence:
223+
*
224+
* idx 0 1 2 3 4 5
225+
* key A C C C C D
226+
*
227+
* If we are searching for "B", we might
228+
* hit the duplicate run at lo=1, hi=3
229+
* (e.g., by first mi=3, then mi=0). But we
230+
* can never have lo > 1, because B < C.
231+
* That is, if our key is less than the
232+
* run, we know that "lo" is the edge, but
233+
* we can say nothing of "hi". Similarly,
234+
* if our key is greater than the run, we
235+
* know that "hi" is the edge, but we can
236+
* say nothing of "lo".
237+
*
238+
* Therefore if we do not find it, we also
239+
* know where it would go if it did exist:
240+
* just on the far side of the edge that we
241+
* know about.
207242
*/
243+
if (ofs == 20) {
244+
mi = lo;
245+
mi_key = base + elem_size * mi + key_offset;
246+
cmp = memcmp(mi_key, key, 20);
247+
if (!cmp)
248+
return mi;
249+
if (cmp < 0)
250+
return -1 - hi;
251+
else
252+
return -1 - lo;
253+
}
254+
208255
hiv = hi_key[ofs_0];
209256
if (ofs_0 < 19)
210257
hiv = (hiv << 8) | hi_key[ofs_0+1];

t/lib-pack.sh

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#!/bin/sh
2+
#
3+
# Support routines for hand-crafting weird or malicious packs.
4+
#
5+
# You can make a complete pack like:
6+
#
7+
# pack_header 2 >foo.pack &&
8+
# pack_obj e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 >>foo.pack &&
9+
# pack_obj e68fe8129b546b101aee9510c5328e7f21ca1d18 >>foo.pack &&
10+
# pack_trailer foo.pack
11+
12+
# Print the big-endian 4-byte octal representation of $1
13+
uint32_octal () {
14+
n=$1
15+
printf '\%o' $(($n / 16777216)); n=$((n % 16777216))
16+
printf '\%o' $(($n / 65536)); n=$((n % 65536))
17+
printf '\%o' $(($n / 256)); n=$((n % 256))
18+
printf '\%o' $(($n ));
19+
}
20+
21+
# Print the big-endian 4-byte binary representation of $1
22+
uint32_binary () {
23+
printf "$(uint32_octal "$1")"
24+
}
25+
26+
# Print a pack header, version 2, for a pack with $1 objects
27+
pack_header () {
28+
printf 'PACK' &&
29+
printf '\0\0\0\2' &&
30+
uint32_binary "$1"
31+
}
32+
33+
# Print the pack data for object $1, as a delta against object $2 (or as a full
34+
# object if $2 is missing or empty). The output is suitable for including
35+
# directly in the packfile, and represents the entirety of the object entry.
36+
# Doing this on the fly (especially picking your deltas) is quite tricky, so we
37+
# have hardcoded some well-known objects. See the case statements below for the
38+
# complete list.
39+
pack_obj () {
40+
case "$1" in
41+
# empty blob
42+
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
43+
case "$2" in
44+
'')
45+
printf '\060\170\234\003\0\0\0\0\1'
46+
return
47+
;;
48+
esac
49+
;;
50+
51+
# blob containing "\7\76"
52+
e68fe8129b546b101aee9510c5328e7f21ca1d18)
53+
case "$2" in
54+
'')
55+
printf '\062\170\234\143\267\3\0\0\116\0\106'
56+
return
57+
;;
58+
01d7713666f4de822776c7622c10f1b07de280dc)
59+
printf '\165\1\327\161\66\146\364\336\202\47\166' &&
60+
printf '\307\142\54\20\361\260\175\342\200\334\170' &&
61+
printf '\234\143\142\142\142\267\003\0\0\151\0\114'
62+
return
63+
;;
64+
esac
65+
;;
66+
67+
# blob containing "\7\0"
68+
01d7713666f4de822776c7622c10f1b07de280dc)
69+
case "$2" in
70+
'')
71+
printf '\062\170\234\143\147\0\0\0\20\0\10'
72+
return
73+
;;
74+
e68fe8129b546b101aee9510c5328e7f21ca1d18)
75+
printf '\165\346\217\350\22\233\124\153\20\32\356' &&
76+
printf '\225\20\305\62\216\177\41\312\35\30\170\234' &&
77+
printf '\143\142\142\142\147\0\0\0\53\0\16'
78+
return
79+
;;
80+
esac
81+
;;
82+
esac
83+
84+
echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
85+
return 1
86+
}
87+
88+
# Compute and append pack trailer to "$1"
89+
pack_trailer () {
90+
test-sha1 -b <"$1" >trailer.tmp &&
91+
cat trailer.tmp >>"$1" &&
92+
rm -f trailer.tmp
93+
}
94+
95+
# Remove any existing packs to make sure that
96+
# whatever we index next will be the pack that we
97+
# actually use.
98+
clear_packs () {
99+
rm -f .git/objects/pack/*
100+
}

t/t5308-pack-detect-duplicates.sh

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/bin/sh
2+
3+
test_description='handling of duplicate objects in incoming packfiles'
4+
. ./test-lib.sh
5+
. "$TEST_DIRECTORY"/lib-pack.sh
6+
7+
# The sha1s we have in our pack. It's important that these have the same
8+
# starting byte, so that they end up in the same fanout section of the index.
9+
# That lets us make sure we are exercising the binary search with both sets.
10+
LO_SHA1=e68fe8129b546b101aee9510c5328e7f21ca1d18
11+
HI_SHA1=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
12+
13+
# And here's a "missing sha1" which will produce failed lookups. It must also
14+
# be in the same fanout section, and should be between the two (so that during
15+
# our binary search, we are sure to end up looking at one or the other of the
16+
# duplicate runs).
17+
MISSING_SHA1='e69d000000000000000000000000000000000000'
18+
19+
# git will never intentionally create packfiles with
20+
# duplicate objects, so we have to construct them by hand.
21+
#
22+
# $1 is the name of the packfile to create
23+
#
24+
# $2 is the number of times to duplicate each object
25+
create_pack () {
26+
pack_header "$((2 * $2))" >"$1" &&
27+
for i in $(test_seq 1 "$2"); do
28+
pack_obj $LO_SHA1 &&
29+
pack_obj $HI_SHA1
30+
done >>"$1" &&
31+
pack_trailer "$1"
32+
}
33+
34+
# double-check that create_pack actually works
35+
test_expect_success 'pack with no duplicates' '
36+
create_pack no-dups.pack 1 &&
37+
git index-pack --stdin <no-dups.pack
38+
'
39+
40+
test_expect_success 'index-pack will allow duplicate objects by default' '
41+
clear_packs &&
42+
create_pack dups.pack 100 &&
43+
git index-pack --stdin <dups.pack
44+
'
45+
46+
test_expect_success 'create batch-check test vectors' '
47+
cat >input <<-EOF &&
48+
$LO_SHA1
49+
$HI_SHA1
50+
$MISSING_SHA1
51+
EOF
52+
cat >expect <<-EOF
53+
$LO_SHA1 blob 2
54+
$HI_SHA1 blob 0
55+
$MISSING_SHA1 missing
56+
EOF
57+
'
58+
59+
test_expect_success 'lookup in duplicated pack (binary search)' '
60+
git cat-file --batch-check <input >actual &&
61+
test_cmp expect actual
62+
'
63+
64+
test_expect_success 'lookup in duplicated pack (GIT_USE_LOOKUP)' '
65+
(
66+
GIT_USE_LOOKUP=1 &&
67+
export GIT_USE_LOOKUP &&
68+
git cat-file --batch-check <input >actual
69+
) &&
70+
test_cmp expect actual
71+
'
72+
73+
test_expect_success 'index-pack can reject packs with duplicates' '
74+
clear_packs &&
75+
create_pack dups.pack 2 &&
76+
test_must_fail git index-pack --strict --stdin <dups.pack &&
77+
test_expect_code 1 git cat-file -e $LO_SHA1
78+
'
79+
80+
test_done

t/t5309-pack-delta-cycles.sh

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/sh
2+
3+
test_description='test index-pack handling of delta cycles in packfiles'
4+
. ./test-lib.sh
5+
. "$TEST_DIRECTORY"/lib-pack.sh
6+
7+
# Two similar-ish objects that we have computed deltas between.
8+
A=01d7713666f4de822776c7622c10f1b07de280dc
9+
B=e68fe8129b546b101aee9510c5328e7f21ca1d18
10+
11+
# double-check our hand-constucted packs
12+
test_expect_success 'index-pack works with a single delta (A->B)' '
13+
clear_packs &&
14+
{
15+
pack_header 2 &&
16+
pack_obj $A $B &&
17+
pack_obj $B
18+
} >ab.pack &&
19+
pack_trailer ab.pack &&
20+
git index-pack --stdin <ab.pack &&
21+
git cat-file -t $A &&
22+
git cat-file -t $B
23+
'
24+
25+
test_expect_success 'index-pack works with a single delta (B->A)' '
26+
clear_packs &&
27+
{
28+
pack_header 2 &&
29+
pack_obj $A &&
30+
pack_obj $B $A
31+
} >ba.pack &&
32+
pack_trailer ba.pack &&
33+
git index-pack --stdin <ba.pack &&
34+
git cat-file -t $A &&
35+
git cat-file -t $B
36+
'
37+
38+
test_expect_success 'index-pack detects missing base objects' '
39+
clear_packs &&
40+
{
41+
pack_header 1 &&
42+
pack_obj $A $B
43+
} >missing.pack &&
44+
pack_trailer missing.pack &&
45+
test_must_fail git index-pack --fix-thin --stdin <missing.pack
46+
'
47+
48+
test_expect_success 'index-pack detects REF_DELTA cycles' '
49+
clear_packs &&
50+
{
51+
pack_header 2 &&
52+
pack_obj $A $B &&
53+
pack_obj $B $A
54+
} >cycle.pack &&
55+
pack_trailer cycle.pack &&
56+
test_must_fail git index-pack --fix-thin --stdin <cycle.pack
57+
'
58+
59+
test_expect_failure 'failover to an object in another pack' '
60+
clear_packs &&
61+
git index-pack --stdin <ab.pack &&
62+
git index-pack --stdin --fix-thin <cycle.pack
63+
'
64+
65+
test_expect_failure 'failover to a duplicate object in the same pack' '
66+
clear_packs &&
67+
{
68+
pack_header 3 &&
69+
pack_obj $A $B &&
70+
pack_obj $B $A &&
71+
pack_obj $A
72+
} >recoverable.pack &&
73+
pack_trailer recoverable.pack &&
74+
git index-pack --fix-thin --stdin <recoverable.pack
75+
'
76+
77+
test_done

test-sha1.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@ int main(int ac, char **av)
55
git_SHA_CTX ctx;
66
unsigned char sha1[20];
77
unsigned bufsz = 8192;
8+
int binary = 0;
89
char *buffer;
910

10-
if (ac == 2)
11-
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
11+
if (ac == 2) {
12+
if (!strcmp(av[1], "-b"))
13+
binary = 1;
14+
else
15+
bufsz = strtoul(av[1], NULL, 10) * 1024 * 1024;
16+
}
1217

1318
if (!bufsz)
1419
bufsz = 8192;
@@ -42,6 +47,10 @@ int main(int ac, char **av)
4247
git_SHA1_Update(&ctx, buffer, this_sz);
4348
}
4449
git_SHA1_Final(sha1, &ctx);
45-
puts(sha1_to_hex(sha1));
50+
51+
if (binary)
52+
fwrite(sha1, 1, 20, stdout);
53+
else
54+
puts(sha1_to_hex(sha1));
4655
exit(0);
4756
}

0 commit comments

Comments
 (0)