Skip to content

Commit af626ac

Browse files
ttaylorrgitster
authored andcommitted
pack-bitmap: enable reuse from all bitmapped packs
Now that both the pack-bitmap and pack-objects code are prepared to handle marking and using objects from multiple bitmapped packs for verbatim reuse, allow marking objects from all bitmapped packs as eligible for reuse. Within the `reuse_partial_packfile_from_bitmap()` function, we no longer only mark the pack whose first object is at bit position zero for reuse, and instead mark any pack contained in the MIDX as a reuse candidate. Provide a handful of test cases in a new script (t5332) exercising interesting behavior for multi-pack reuse to ensure that we performed all of the previous steps correctly. Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 9410741 commit af626ac

File tree

5 files changed

+245
-17
lines changed

5 files changed

+245
-17
lines changed

Documentation/config/pack.txt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,17 @@ all existing objects. You can force recompression by passing the -F option
2828
to linkgit:git-repack[1].
2929

3030
pack.allowPackReuse::
31-
When true or "single", and when reachability bitmaps are enabled,
32-
pack-objects will try to send parts of the bitmapped packfile
33-
verbatim. This can reduce memory and CPU usage to serve fetches,
34-
but might result in sending a slightly larger pack. Defaults to
35-
true.
31+
When true or "single", and when reachability bitmaps are
32+
enabled, pack-objects will try to send parts of the bitmapped
33+
packfile verbatim. When "multi", and when a multi-pack
34+
reachability bitmap is available, pack-objects will try to send
35+
parts of all packs in the MIDX.
36+
+
37+
If only a single pack bitmap is available, and
38+
`pack.allowPackReuse` is set to "multi", reuse parts of just the
39+
bitmapped packfile. This can reduce memory and CPU usage to
40+
serve fetches, but might result in sending a slightly larger
41+
pack. Defaults to true.
3642

3743
pack.island::
3844
An extended regular expression configuring a set of delta

builtin/pack-objects.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ static int use_bitmap_index = -1;
232232
static enum {
233233
NO_PACK_REUSE = 0,
234234
SINGLE_PACK_REUSE,
235+
MULTI_PACK_REUSE,
235236
} allow_pack_reuse = SINGLE_PACK_REUSE;
236237
static enum {
237238
WRITE_BITMAP_FALSE = 0,
@@ -3251,6 +3252,8 @@ static int git_pack_config(const char *k, const char *v,
32513252
if (res < 0) {
32523253
if (!strcasecmp(v, "single"))
32533254
allow_pack_reuse = SINGLE_PACK_REUSE;
3255+
else if (!strcasecmp(v, "multi"))
3256+
allow_pack_reuse = MULTI_PACK_REUSE;
32543257
else
32553258
die(_("invalid pack.allowPackReuse value: '%s'"), v);
32563259
} else if (res) {
@@ -4029,7 +4032,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
40294032
reuse_partial_packfile_from_bitmap(bitmap_git,
40304033
&reuse_packfiles,
40314034
&reuse_packfiles_nr,
4032-
&reuse_packfile_bitmap);
4035+
&reuse_packfile_bitmap,
4036+
allow_pack_reuse == MULTI_PACK_REUSE);
40334037

40344038
if (reuse_packfiles) {
40354039
reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);

pack-bitmap.c

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2040,7 +2040,8 @@ static int bitmapped_pack_cmp(const void *va, const void *vb)
20402040
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
20412041
struct bitmapped_pack **packs_out,
20422042
size_t *packs_nr_out,
2043-
struct bitmap **reuse_out)
2043+
struct bitmap **reuse_out,
2044+
int multi_pack_reuse)
20442045
{
20452046
struct repository *r = the_repository;
20462047
struct bitmapped_pack *packs = NULL;
@@ -2064,37 +2065,50 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
20642065
free(packs);
20652066
return;
20662067
}
2068+
20672069
if (!pack.bitmap_nr)
2068-
continue; /* no objects from this pack */
2069-
if (pack.bitmap_pos)
2070-
continue; /* not preferred pack */
2070+
continue;
2071+
2072+
if (!multi_pack_reuse && pack.bitmap_pos) {
2073+
/*
2074+
* If we're only reusing a single pack, skip
2075+
* over any packs which are not positioned at
2076+
* the beginning of the MIDX bitmap.
2077+
*
2078+
* This is consistent with the existing
2079+
* single-pack reuse behavior, which only reuses
2080+
* parts of the MIDX's preferred pack.
2081+
*/
2082+
continue;
2083+
}
20712084

20722085
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
20732086
memcpy(&packs[packs_nr++], &pack, sizeof(pack));
20742087

20752088
objects_nr += pack.p->num_objects;
2089+
2090+
if (!multi_pack_reuse)
2091+
break;
20762092
}
20772093

20782094
QSORT(packs, packs_nr, bitmapped_pack_cmp);
20792095
} else {
20802096
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
20812097

20822098
packs[packs_nr].p = bitmap_git->pack;
2083-
packs[packs_nr].bitmap_pos = 0;
20842099
packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects;
2100+
packs[packs_nr].bitmap_pos = 0;
20852101

2086-
objects_nr = packs[packs_nr++].p->num_objects;
2102+
objects_nr = packs[packs_nr++].bitmap_nr;
20872103
}
20882104

20892105
word_alloc = objects_nr / BITS_IN_EWORD;
20902106
if (objects_nr % BITS_IN_EWORD)
20912107
word_alloc++;
20922108
reuse = bitmap_word_alloc(word_alloc);
20932109

2094-
if (packs_nr != 1)
2095-
BUG("pack reuse not yet implemented for multiple packs");
2096-
2097-
reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse);
2110+
for (i = 0; i < packs_nr; i++)
2111+
reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse);
20982112

20992113
if (bitmap_is_empty(reuse)) {
21002114
free(packs);

pack-bitmap.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
8080
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
8181
struct bitmapped_pack **packs_out,
8282
size_t *packs_nr_out,
83-
struct bitmap **reuse_out);
83+
struct bitmap **reuse_out,
84+
int multi_pack_reuse);
8485
int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping,
8586
kh_oid_map_t *reused_bitmaps, int show_progress);
8687
void free_bitmap_index(struct bitmap_index *);

t/t5332-multi-pack-reuse.sh

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/bin/sh
2+
3+
test_description='pack-objects multi-pack reuse'
4+
5+
. ./test-lib.sh
6+
. "$TEST_DIRECTORY"/lib-bitmap.sh
7+
8+
objdir=.git/objects
9+
packdir=$objdir/pack
10+
11+
test_pack_reused () {
12+
test_trace2_data pack-objects pack-reused "$1"
13+
}
14+
15+
test_packs_reused () {
16+
test_trace2_data pack-objects packs-reused "$1"
17+
}
18+
19+
20+
# pack_position <object> </path/to/pack.idx
21+
pack_position () {
22+
git show-index >objects &&
23+
grep "$1" objects | cut -d" " -f1
24+
}
25+
26+
test_expect_success 'preferred pack is reused for single-pack reuse' '
27+
test_config pack.allowPackReuse single &&
28+
29+
for i in A B
30+
do
31+
test_commit "$i" &&
32+
git repack -d || return 1
33+
done &&
34+
35+
git multi-pack-index write --bitmap &&
36+
37+
: >trace2.txt &&
38+
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
39+
git pack-objects --stdout --revs --all >/dev/null &&
40+
41+
test_pack_reused 3 <trace2.txt &&
42+
test_packs_reused 1 <trace2.txt
43+
'
44+
45+
test_expect_success 'enable multi-pack reuse' '
46+
git config pack.allowPackReuse multi
47+
'
48+
49+
test_expect_success 'reuse all objects from subset of bitmapped packs' '
50+
test_commit C &&
51+
git repack -d &&
52+
53+
git multi-pack-index write --bitmap &&
54+
55+
cat >in <<-EOF &&
56+
$(git rev-parse C)
57+
^$(git rev-parse A)
58+
EOF
59+
60+
: >trace2.txt &&
61+
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
62+
git pack-objects --stdout --revs <in >/dev/null &&
63+
64+
test_pack_reused 6 <trace2.txt &&
65+
test_packs_reused 2 <trace2.txt
66+
'
67+
68+
test_expect_success 'reuse all objects from all packs' '
69+
: >trace2.txt &&
70+
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
71+
git pack-objects --stdout --revs --all >/dev/null &&
72+
73+
test_pack_reused 9 <trace2.txt &&
74+
test_packs_reused 3 <trace2.txt
75+
'
76+
77+
test_expect_success 'reuse objects from first pack with middle gap' '
78+
for i in D E F
79+
do
80+
test_commit "$i" || return 1
81+
done &&
82+
83+
# Set "pack.window" to zero to ensure that we do not create any
84+
# deltas, which could alter the amount of pack reuse we perform
85+
# (if, for e.g., we are not sending one or more bases).
86+
D="$(git -c pack.window=0 pack-objects --all --unpacked $packdir/pack)" &&
87+
88+
d_pos="$(pack_position $(git rev-parse D) <$packdir/pack-$D.idx)" &&
89+
e_pos="$(pack_position $(git rev-parse E) <$packdir/pack-$D.idx)" &&
90+
f_pos="$(pack_position $(git rev-parse F) <$packdir/pack-$D.idx)" &&
91+
92+
# commits F, E, and D, should appear in that order at the
93+
# beginning of the pack
94+
test $f_pos -lt $e_pos &&
95+
test $e_pos -lt $d_pos &&
96+
97+
# Ensure that the pack we are constructing sorts ahead of any
98+
# other packs in lexical/bitmap order by choosing it as the
99+
# preferred pack.
100+
git multi-pack-index write --bitmap --preferred-pack="pack-$D.idx" &&
101+
102+
cat >in <<-EOF &&
103+
$(git rev-parse E)
104+
^$(git rev-parse D)
105+
EOF
106+
107+
: >trace2.txt &&
108+
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
109+
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
110+
111+
test_pack_reused 3 <trace2.txt &&
112+
test_packs_reused 1 <trace2.txt
113+
'
114+
115+
test_expect_success 'reuse objects from middle pack with middle gap' '
116+
rm -fr $packdir/multi-pack-index* &&
117+
118+
# Ensure that the pack we are constructing sort into any
119+
# position *but* the first one, by choosing a different pack as
120+
# the preferred one.
121+
git multi-pack-index write --bitmap --preferred-pack="pack-$A.idx" &&
122+
123+
cat >in <<-EOF &&
124+
$(git rev-parse E)
125+
^$(git rev-parse D)
126+
EOF
127+
128+
: >trace2.txt &&
129+
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
130+
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
131+
132+
test_pack_reused 3 <trace2.txt &&
133+
test_packs_reused 1 <trace2.txt
134+
'
135+
136+
test_expect_success 'omit delta with uninteresting base (same pack)' '
137+
git repack -adk &&
138+
139+
test_seq 32 >f &&
140+
git add f &&
141+
test_tick &&
142+
git commit -m "delta" &&
143+
delta="$(git rev-parse HEAD)" &&
144+
145+
test_seq 64 >f &&
146+
test_tick &&
147+
git commit -a -m "base" &&
148+
base="$(git rev-parse HEAD)" &&
149+
150+
test_commit other &&
151+
152+
git repack -d &&
153+
154+
have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" &&
155+
156+
git multi-pack-index write --bitmap &&
157+
158+
cat >in <<-EOF &&
159+
$(git rev-parse other)
160+
^$base
161+
EOF
162+
163+
: >trace2.txt &&
164+
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
165+
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
166+
167+
# We can only reuse the 3 objects corresponding to "other" from
168+
# the latest pack.
169+
#
170+
# This is because even though we want "delta", we do not want
171+
# "base", meaning that we have to inflate the delta/base-pair
172+
# corresponding to the blob in commit "delta", which bypasses
173+
# the pack-reuse mechanism.
174+
#
175+
# The remaining objects from the other pack are similarly not
176+
# reused because their objects are on the uninteresting side of
177+
# the query.
178+
test_pack_reused 3 <trace2.txt &&
179+
test_packs_reused 1 <trace2.txt
180+
'
181+
182+
test_expect_success 'omit delta from uninteresting base (cross pack)' '
183+
cat >in <<-EOF &&
184+
$(git rev-parse $base)
185+
^$(git rev-parse $delta)
186+
EOF
187+
188+
P="$(git pack-objects --revs $packdir/pack <in)" &&
189+
190+
git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" &&
191+
192+
: >trace2.txt &&
193+
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
194+
git pack-objects --stdout --delta-base-offset --all >/dev/null &&
195+
196+
packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
197+
objects_nr="$(git rev-list --count --all --objects)" &&
198+
199+
test_pack_reused $(($objects_nr - 1)) <trace2.txt &&
200+
test_packs_reused $packs_nr <trace2.txt
201+
'
202+
203+
test_done

0 commit comments

Comments
 (0)