Skip to content

Commit 5d19e81

Browse files
jonathantanmygitster
authored andcommitted
repack: repack promisor objects if -a or -A is set
Currently, repack does not touch promisor packfiles at all, potentially causing the performance of repositories that have many such packfiles to drop. Therefore, repack all promisor objects if invoked with -a or -A. This is done by an additional invocation of pack-objects on all promisor objects individually given, which takes care of deduplication and allows the resulting packfiles to respect flags such as --max-pack-size. Signed-off-by: Jonathan Tan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 2b958e7 commit 5d19e81

File tree

3 files changed

+158
-15
lines changed

3 files changed

+158
-15
lines changed

Documentation/git-repack.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ OPTIONS
4040
Note that users fetching over dumb protocols will have to fetch the
4141
whole new pack in order to get any contained object, no matter how many
4242
other objects in that pack they already have locally.
43+
+
44+
Promisor packfiles are repacked separately: if there are packfiles that
45+
have an associated ".promisor" file, these packfiles will be repacked
46+
into another separate pack, and an empty ".promisor" file corresponding
47+
to the new separate pack will be written.
4348

4449
-A::
4550
Same as `-a`, unless `-d` is used. Then any unreachable

builtin/repack.c

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "strbuf.h"
99
#include "string-list.h"
1010
#include "argv-array.h"
11+
#include "packfile.h"
1112

1213
static int delta_base_offset = 1;
1314
static int pack_kept_objects = -1;
@@ -83,7 +84,7 @@ static void remove_pack_on_signal(int signo)
8384

8485
/*
8586
* Adds all packs hex strings to the fname list, which do not
86-
* have a corresponding .keep or .promisor file. These packs are not to
87+
* have a corresponding .keep file. These packs are not to
8788
* be kept if we are going to pack everything into one file.
8889
*/
8990
static void get_non_kept_pack_filenames(struct string_list *fname_list,
@@ -111,8 +112,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
111112

112113
fname = xmemdupz(e->d_name, len);
113114

114-
if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) &&
115-
!file_exists(mkpath("%s/%s.promisor", packdir, fname)))
115+
if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
116116
string_list_append_nodup(fname_list, fname);
117117
else
118118
free(fname);
@@ -122,7 +122,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
122122

123123
static void remove_redundant_pack(const char *dir_name, const char *base_name)
124124
{
125-
const char *exts[] = {".pack", ".idx", ".keep", ".bitmap"};
125+
const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
126126
int i;
127127
struct strbuf buf = STRBUF_INIT;
128128
size_t plen;
@@ -179,6 +179,76 @@ static void prepare_pack_objects(struct child_process *cmd,
179179
cmd->out = -1;
180180
}
181181

182+
/*
183+
* Write oid to the given struct child_process's stdin, starting it first if
184+
* necessary.
185+
*/
186+
static int write_oid(const struct object_id *oid, struct packed_git *pack,
187+
uint32_t pos, void *data)
188+
{
189+
struct child_process *cmd = data;
190+
191+
if (cmd->in == -1) {
192+
if (start_command(cmd))
193+
die("Could not start pack-objects to repack promisor objects");
194+
}
195+
196+
xwrite(cmd->in, oid_to_hex(oid), GIT_SHA1_HEXSZ);
197+
xwrite(cmd->in, "\n", 1);
198+
return 0;
199+
}
200+
201+
static void repack_promisor_objects(const struct pack_objects_args *args,
202+
struct string_list *names)
203+
{
204+
struct child_process cmd = CHILD_PROCESS_INIT;
205+
FILE *out;
206+
struct strbuf line = STRBUF_INIT;
207+
208+
prepare_pack_objects(&cmd, args);
209+
cmd.in = -1;
210+
211+
/*
212+
* NEEDSWORK: Giving pack-objects only the OIDs without any ordering
213+
* hints may result in suboptimal deltas in the resulting pack. See if
214+
* the OIDs can be sent with fake paths such that pack-objects can use a
215+
* {type -> existing pack order} ordering when computing deltas instead
216+
* of a {type -> size} ordering, which may produce better deltas.
217+
*/
218+
for_each_packed_object(write_oid, &cmd,
219+
FOR_EACH_OBJECT_PROMISOR_ONLY);
220+
221+
if (cmd.in == -1)
222+
/* No packed objects; cmd was never started */
223+
return;
224+
225+
close(cmd.in);
226+
227+
out = xfdopen(cmd.out, "r");
228+
while (strbuf_getline_lf(&line, out) != EOF) {
229+
char *promisor_name;
230+
int fd;
231+
if (line.len != 40)
232+
die("repack: Expecting 40 character sha1 lines only from pack-objects.");
233+
string_list_append(names, line.buf);
234+
235+
/*
236+
* pack-objects creates the .pack and .idx files, but not the
237+
* .promisor file. Create the .promisor file, which is empty.
238+
*/
239+
promisor_name = mkpathdup("%s-%s.promisor", packtmp,
240+
line.buf);
241+
fd = open(promisor_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
242+
if (fd < 0)
243+
die_errno("unable to create '%s'", promisor_name);
244+
close(fd);
245+
free(promisor_name);
246+
}
247+
fclose(out);
248+
if (finish_command(&cmd))
249+
die("Could not finish pack-objects to repack promisor objects");
250+
}
251+
182252
#define ALL_INTO_ONE 1
183253
#define LOOSEN_UNREACHABLE 2
184254

@@ -191,6 +261,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
191261
{".pack"},
192262
{".idx"},
193263
{".bitmap", 1},
264+
{".promisor", 1},
194265
};
195266
struct child_process cmd = CHILD_PROCESS_INIT;
196267
struct string_list_item *item;
@@ -293,6 +364,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
293364
if (pack_everything & ALL_INTO_ONE) {
294365
get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
295366

367+
repack_promisor_objects(&po_args, &names);
368+
296369
if (existing_packs.nr && delete_redundant) {
297370
if (unpack_unreachable) {
298371
argv_array_pushf(&cmd.args,
@@ -440,6 +513,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
440513

441514
/* End of pack replacement. */
442515

516+
reprepare_packed_git(the_repository);
517+
443518
if (delete_redundant) {
444519
int opts = 0;
445520
string_list_sort(&names);

t/t0410-partial-clone.sh

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -271,28 +271,91 @@ test_expect_success 'rev-list accepts missing and promised objects on command li
271271
git -C repo rev-list --exclude-promisor-objects --objects "$COMMIT" "$TREE" "$BLOB"
272272
'
273273

274-
test_expect_success 'gc does not repack promisor objects' '
274+
test_expect_success 'gc repacks promisor objects separately from non-promisor objects' '
275275
rm -rf repo &&
276276
test_create_repo repo &&
277-
test_commit -C repo my_commit &&
277+
test_commit -C repo one &&
278+
test_commit -C repo two &&
278279
279-
TREE_HASH=$(git -C repo rev-parse HEAD^{tree}) &&
280-
HASH=$(printf "$TREE_HASH\n" | pack_as_from_promisor) &&
280+
TREE_ONE=$(git -C repo rev-parse one^{tree}) &&
281+
printf "$TREE_ONE\n" | pack_as_from_promisor &&
282+
TREE_TWO=$(git -C repo rev-parse two^{tree}) &&
283+
printf "$TREE_TWO\n" | pack_as_from_promisor &&
281284
282285
git -C repo config core.repositoryformatversion 1 &&
283286
git -C repo config extensions.partialclone "arbitrary string" &&
284287
git -C repo gc &&
285288
286-
# Ensure that the promisor packfile still exists, and remove it
287-
test -e repo/.git/objects/pack/pack-$HASH.pack &&
288-
rm repo/.git/objects/pack/pack-$HASH.* &&
289-
290-
# Ensure that the single other pack contains the commit, but not the tree
289+
# Ensure that exactly one promisor packfile exists, and that it
290+
# contains the trees but not the commits
291+
ls repo/.git/objects/pack/pack-*.promisor >promisorlist &&
292+
test_line_count = 1 promisorlist &&
293+
PROMISOR_PACKFILE=$(sed "s/.promisor/.pack/" <promisorlist) &&
294+
git verify-pack $PROMISOR_PACKFILE -v >out &&
295+
grep "$TREE_ONE" out &&
296+
grep "$TREE_TWO" out &&
297+
! grep "$(git -C repo rev-parse one)" out &&
298+
! grep "$(git -C repo rev-parse two)" out &&
299+
300+
# Remove the promisor packfile and associated files
301+
rm $(sed "s/.promisor//" <promisorlist).* &&
302+
303+
# Ensure that the single other pack contains the commits, but not the
304+
# trees
291305
ls repo/.git/objects/pack/pack-*.pack >packlist &&
292306
test_line_count = 1 packlist &&
293307
git verify-pack repo/.git/objects/pack/pack-*.pack -v >out &&
294-
grep "$(git -C repo rev-parse HEAD)" out &&
295-
! grep "$TREE_HASH" out
308+
grep "$(git -C repo rev-parse one)" out &&
309+
grep "$(git -C repo rev-parse two)" out &&
310+
! grep "$TREE_ONE" out &&
311+
! grep "$TREE_TWO" out
312+
'
313+
314+
test_expect_success 'gc does not repack promisor objects if there are none' '
315+
rm -rf repo &&
316+
test_create_repo repo &&
317+
test_commit -C repo one &&
318+
319+
git -C repo config core.repositoryformatversion 1 &&
320+
git -C repo config extensions.partialclone "arbitrary string" &&
321+
git -C repo gc &&
322+
323+
# Ensure that only one pack exists
324+
ls repo/.git/objects/pack/pack-*.pack >packlist &&
325+
test_line_count = 1 packlist
326+
'
327+
328+
repack_and_check () {
329+
rm -rf repo2 &&
330+
cp -r repo repo2 &&
331+
git -C repo2 repack $1 -d &&
332+
git -C repo2 fsck &&
333+
334+
git -C repo2 cat-file -e $2 &&
335+
git -C repo2 cat-file -e $3
336+
}
337+
338+
test_expect_success 'repack -d does not irreversibly delete promisor objects' '
339+
rm -rf repo &&
340+
test_create_repo repo &&
341+
git -C repo config core.repositoryformatversion 1 &&
342+
git -C repo config extensions.partialclone "arbitrary string" &&
343+
344+
git -C repo commit --allow-empty -m one &&
345+
git -C repo commit --allow-empty -m two &&
346+
git -C repo commit --allow-empty -m three &&
347+
git -C repo commit --allow-empty -m four &&
348+
ONE=$(git -C repo rev-parse HEAD^^^) &&
349+
TWO=$(git -C repo rev-parse HEAD^^) &&
350+
THREE=$(git -C repo rev-parse HEAD^) &&
351+
352+
printf "$TWO\n" | pack_as_from_promisor &&
353+
printf "$THREE\n" | pack_as_from_promisor &&
354+
delete_object repo "$ONE" &&
355+
356+
repack_and_check -a "$TWO" "$THREE" &&
357+
repack_and_check -A "$TWO" "$THREE" &&
358+
repack_and_check -l "$TWO" "$THREE"
296359
'
297360

298361
test_expect_success 'gc stops traversal when a missing but promised object is reached' '

0 commit comments

Comments
 (0)