Skip to content

Commit 16a0e92

Browse files
committed
Merge branch 'tb/geom-repack-with-keep-and-max'
Teach "git repack --geometric" work better with "--keep-pack" and avoid corrupting the repository when packsize limit is used. * tb/geom-repack-with-keep-and-max: builtin/repack.c: ensure that `names` is sorted t7703: demonstrate object corruption with pack.packSizeLimit repack: respect --keep-pack with geometric repack
2 parents c276c21 + 66731ff commit 16a0e92

File tree

2 files changed

+112
-12
lines changed

2 files changed

+112
-12
lines changed

builtin/repack.c

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ static void collect_pack_filenames(struct string_list *fname_nonkept_list,
137137
string_list_append_nodup(fname_nonkept_list, fname);
138138
}
139139
closedir(dir);
140+
141+
string_list_sort(fname_kept_list);
140142
}
141143

142144
static void remove_redundant_pack(const char *dir_name, const char *base_name)
@@ -332,17 +334,38 @@ static int geometry_cmp(const void *va, const void *vb)
332334
return 0;
333335
}
334336

335-
static void init_pack_geometry(struct pack_geometry **geometry_p)
337+
static void init_pack_geometry(struct pack_geometry **geometry_p,
338+
struct string_list *existing_kept_packs)
336339
{
337340
struct packed_git *p;
338341
struct pack_geometry *geometry;
342+
struct strbuf buf = STRBUF_INIT;
339343

340344
*geometry_p = xcalloc(1, sizeof(struct pack_geometry));
341345
geometry = *geometry_p;
342346

343347
for (p = get_all_packs(the_repository); p; p = p->next) {
344-
if (!pack_kept_objects && p->pack_keep)
345-
continue;
348+
if (!pack_kept_objects) {
349+
/*
350+
* Any pack that has its pack_keep bit set will appear
351+
* in existing_kept_packs below, but this saves us from
352+
* doing a more expensive check.
353+
*/
354+
if (p->pack_keep)
355+
continue;
356+
357+
/*
358+
* The pack may be kept via the --keep-pack option;
359+
* check 'existing_kept_packs' to determine whether to
360+
* ignore it.
361+
*/
362+
strbuf_reset(&buf);
363+
strbuf_addstr(&buf, pack_basename(p));
364+
strbuf_strip_suffix(&buf, ".pack");
365+
366+
if (string_list_has_string(existing_kept_packs, buf.buf))
367+
continue;
368+
}
346369

347370
ALLOC_GROW(geometry->pack,
348371
geometry->pack_nr + 1,
@@ -353,6 +376,7 @@ static void init_pack_geometry(struct pack_geometry **geometry_p)
353376
}
354377

355378
QSORT(geometry->pack, geometry->pack_nr, geometry_cmp);
379+
strbuf_release(&buf);
356380
}
357381

358382
static void split_pack_geometry(struct pack_geometry *geometry, int factor)
@@ -714,17 +738,20 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
714738
strbuf_release(&path);
715739
}
716740

741+
packdir = mkpathdup("%s/pack", get_object_directory());
742+
packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid());
743+
packtmp = mkpathdup("%s/%s", packdir, packtmp_name);
744+
745+
collect_pack_filenames(&existing_nonkept_packs, &existing_kept_packs,
746+
&keep_pack_list);
747+
717748
if (geometric_factor) {
718749
if (pack_everything)
719750
die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a");
720-
init_pack_geometry(&geometry);
751+
init_pack_geometry(&geometry, &existing_kept_packs);
721752
split_pack_geometry(geometry, geometric_factor);
722753
}
723754

724-
packdir = mkpathdup("%s/pack", get_object_directory());
725-
packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid());
726-
packtmp = mkpathdup("%s/%s", packdir, packtmp_name);
727-
728755
sigchain_push_common(remove_pack_on_signal);
729756

730757
prepare_pack_objects(&cmd, &po_args);
@@ -764,9 +791,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
764791
if (use_delta_islands)
765792
strvec_push(&cmd.args, "--delta-islands");
766793

767-
collect_pack_filenames(&existing_nonkept_packs, &existing_kept_packs,
768-
&keep_pack_list);
769-
770794
if (pack_everything & ALL_INTO_ONE) {
771795
repack_promisor_objects(&po_args, &names);
772796

@@ -832,6 +856,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
832856
if (!names.nr && !po_args.quiet)
833857
printf_ln(_("Nothing new to pack."));
834858

859+
string_list_sort(&names);
860+
835861
for_each_string_list_item(item, &names) {
836862
item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
837863
}
@@ -872,7 +898,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
872898

873899
if (delete_redundant && pack_everything & ALL_INTO_ONE) {
874900
const int hexsz = the_hash_algo->hexsz;
875-
string_list_sort(&names);
876901
for_each_string_list_item(item, &existing_nonkept_packs) {
877902
char *sha1;
878903
size_t len = strlen(item->string);

t/t7703-repack-geometric.sh

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ test_description='git repack --geometric works correctly'
77
GIT_TEST_MULTI_PACK_INDEX=0
88

99
objdir=.git/objects
10+
packdir=$objdir/pack
1011
midx=$objdir/pack/multi-pack-index
1112

1213
test_expect_success '--geometric with no packs' '
@@ -180,6 +181,34 @@ test_expect_success '--geometric ignores kept packs' '
180181
)
181182
'
182183

184+
test_expect_success '--geometric ignores --keep-pack packs' '
185+
git init geometric &&
186+
test_when_finished "rm -fr geometric" &&
187+
(
188+
cd geometric &&
189+
190+
# Create two equal-sized packs
191+
test_commit kept && # 3 objects
192+
git repack -d &&
193+
test_commit pack && # 3 objects
194+
git repack -d &&
195+
196+
find $objdir/pack -type f -name "*.pack" | sort >packs.before &&
197+
git repack --geometric 2 -dm \
198+
--keep-pack="$(basename "$(head -n 1 packs.before)")" >out &&
199+
find $objdir/pack -type f -name "*.pack" | sort >packs.after &&
200+
201+
# Packs should not have changed (only one non-kept pack, no
202+
# loose objects), but $midx should now exist.
203+
grep "Nothing new to pack" out &&
204+
test_path_is_file $midx &&
205+
206+
test_cmp packs.before packs.after &&
207+
208+
git fsck
209+
)
210+
'
211+
183212
test_expect_success '--geometric chooses largest MIDX preferred pack' '
184213
git init geometric &&
185214
test_when_finished "rm -fr geometric" &&
@@ -202,4 +231,50 @@ test_expect_success '--geometric chooses largest MIDX preferred pack' '
202231
)
203232
'
204233

234+
test_expect_success '--geometric with pack.packSizeLimit' '
235+
git init pack-rewrite &&
236+
test_when_finished "rm -fr pack-rewrite" &&
237+
(
238+
cd pack-rewrite &&
239+
240+
test-tool genrandom foo 1048576 >foo &&
241+
test-tool genrandom bar 1048576 >bar &&
242+
243+
git add foo bar &&
244+
test_tick &&
245+
git commit -m base &&
246+
247+
git rev-parse HEAD:foo HEAD:bar >p1.objects &&
248+
git rev-parse HEAD HEAD^{tree} >p2.objects &&
249+
250+
# These two packs each contain two objects, so the following
251+
# `--geometric` repack will try to combine them.
252+
p1="$(git pack-objects $packdir/pack <p1.objects)" &&
253+
p2="$(git pack-objects $packdir/pack <p2.objects)" &&
254+
255+
# Remove any loose objects in packs, since we do not want extra
256+
# copies around (which would mask over potential object
257+
# corruption issues).
258+
git prune-packed &&
259+
260+
# Both p1 and p2 will be rolled up, but pack-objects will write
261+
# three packs:
262+
#
263+
# - one containing object "foo",
264+
# - another containing object "bar",
265+
# - a final pack containing the commit and tree objects
266+
# (identical to p2 above)
267+
git repack --geometric 2 -d --max-pack-size=1048576 &&
268+
269+
# Ensure `repack` can detect that the third pack it wrote
270+
# (containing just the tree and commit objects) was identical to
271+
# one that was below the geometric split, so that we can save it
272+
# from deletion.
273+
#
274+
# If `repack` fails to do that, we will incorrectly delete p2,
275+
# causing object corruption.
276+
git fsck
277+
)
278+
'
279+
205280
test_done

0 commit comments

Comments
 (0)