Skip to content

Commit 36f0f34

Browse files
committed
Merge branch 'jt/repack-promisor-packs'
After a partial clone, repeated fetches from promisor remote would have accumulated many packfiles marked with .promisor bit without getting them coalesced into fewer packfiles, hurting performance. "git repack" now learned to repack them. * jt/repack-promisor-packs: repack: repack promisor objects if -a or -A is set repack: refactor setup of pack-objects cmd
2 parents e72db08 + 5d19e81 commit 36f0f34

File tree

3 files changed

+214
-59
lines changed

3 files changed

+214
-59
lines changed

Documentation/git-repack.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ OPTIONS
4040
Note that users fetching over dumb protocols will have to fetch the
4141
whole new pack in order to get any contained object, no matter how many
4242
other objects in that pack they already have locally.
43+
+
44+
Promisor packfiles are repacked separately: if there are packfiles that
45+
have an associated ".promisor" file, these packfiles will be repacked
46+
into another separate pack, and an empty ".promisor" file corresponding
47+
to the new separate pack will be written.
4348

4449
-A::
4550
Same as `-a`, unless `-d` is used. Then any unreachable

builtin/repack.c

Lines changed: 135 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include "strbuf.h"
99
#include "string-list.h"
1010
#include "argv-array.h"
11+
#include "packfile.h"
12+
#include "object-store.h"
1113

1214
static int delta_base_offset = 1;
1315
static int pack_kept_objects = -1;
@@ -83,7 +85,7 @@ static void remove_pack_on_signal(int signo)
8385

8486
/*
8587
* Adds all packs hex strings to the fname list, which do not
86-
* have a corresponding .keep or .promisor file. These packs are not to
88+
* have a corresponding .keep file. These packs are not to
8789
* be kept if we are going to pack everything into one file.
8890
*/
8991
static void get_non_kept_pack_filenames(struct string_list *fname_list,
@@ -111,8 +113,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
111113

112114
fname = xmemdupz(e->d_name, len);
113115

114-
if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) &&
115-
!file_exists(mkpath("%s/%s.promisor", packdir, fname)))
116+
if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
116117
string_list_append_nodup(fname_list, fname);
117118
else
118119
free(fname);
@@ -122,7 +123,7 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
122123

123124
static void remove_redundant_pack(const char *dir_name, const char *base_name)
124125
{
125-
const char *exts[] = {".pack", ".idx", ".keep", ".bitmap"};
126+
const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
126127
int i;
127128
struct strbuf buf = STRBUF_INIT;
128129
size_t plen;
@@ -138,6 +139,117 @@ static void remove_redundant_pack(const char *dir_name, const char *base_name)
138139
strbuf_release(&buf);
139140
}
140141

142+
struct pack_objects_args {
143+
const char *window;
144+
const char *window_memory;
145+
const char *depth;
146+
const char *threads;
147+
const char *max_pack_size;
148+
int no_reuse_delta;
149+
int no_reuse_object;
150+
int quiet;
151+
int local;
152+
};
153+
154+
static void prepare_pack_objects(struct child_process *cmd,
155+
const struct pack_objects_args *args)
156+
{
157+
argv_array_push(&cmd->args, "pack-objects");
158+
if (args->window)
159+
argv_array_pushf(&cmd->args, "--window=%s", args->window);
160+
if (args->window_memory)
161+
argv_array_pushf(&cmd->args, "--window-memory=%s", args->window_memory);
162+
if (args->depth)
163+
argv_array_pushf(&cmd->args, "--depth=%s", args->depth);
164+
if (args->threads)
165+
argv_array_pushf(&cmd->args, "--threads=%s", args->threads);
166+
if (args->max_pack_size)
167+
argv_array_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
168+
if (args->no_reuse_delta)
169+
argv_array_pushf(&cmd->args, "--no-reuse-delta");
170+
if (args->no_reuse_object)
171+
argv_array_pushf(&cmd->args, "--no-reuse-object");
172+
if (args->local)
173+
argv_array_push(&cmd->args, "--local");
174+
if (args->quiet)
175+
argv_array_push(&cmd->args, "--quiet");
176+
if (delta_base_offset)
177+
argv_array_push(&cmd->args, "--delta-base-offset");
178+
argv_array_push(&cmd->args, packtmp);
179+
cmd->git_cmd = 1;
180+
cmd->out = -1;
181+
}
182+
183+
/*
184+
* Write oid to the given struct child_process's stdin, starting it first if
185+
* necessary.
186+
*/
187+
static int write_oid(const struct object_id *oid, struct packed_git *pack,
188+
uint32_t pos, void *data)
189+
{
190+
struct child_process *cmd = data;
191+
192+
if (cmd->in == -1) {
193+
if (start_command(cmd))
194+
die("Could not start pack-objects to repack promisor objects");
195+
}
196+
197+
xwrite(cmd->in, oid_to_hex(oid), GIT_SHA1_HEXSZ);
198+
xwrite(cmd->in, "\n", 1);
199+
return 0;
200+
}
201+
202+
static void repack_promisor_objects(const struct pack_objects_args *args,
203+
struct string_list *names)
204+
{
205+
struct child_process cmd = CHILD_PROCESS_INIT;
206+
FILE *out;
207+
struct strbuf line = STRBUF_INIT;
208+
209+
prepare_pack_objects(&cmd, args);
210+
cmd.in = -1;
211+
212+
/*
213+
* NEEDSWORK: Giving pack-objects only the OIDs without any ordering
214+
* hints may result in suboptimal deltas in the resulting pack. See if
215+
* the OIDs can be sent with fake paths such that pack-objects can use a
216+
* {type -> existing pack order} ordering when computing deltas instead
217+
* of a {type -> size} ordering, which may produce better deltas.
218+
*/
219+
for_each_packed_object(write_oid, &cmd,
220+
FOR_EACH_OBJECT_PROMISOR_ONLY);
221+
222+
if (cmd.in == -1)
223+
/* No packed objects; cmd was never started */
224+
return;
225+
226+
close(cmd.in);
227+
228+
out = xfdopen(cmd.out, "r");
229+
while (strbuf_getline_lf(&line, out) != EOF) {
230+
char *promisor_name;
231+
int fd;
232+
if (line.len != 40)
233+
die("repack: Expecting 40 character sha1 lines only from pack-objects.");
234+
string_list_append(names, line.buf);
235+
236+
/*
237+
* pack-objects creates the .pack and .idx files, but not the
238+
* .promisor file. Create the .promisor file, which is empty.
239+
*/
240+
promisor_name = mkpathdup("%s-%s.promisor", packtmp,
241+
line.buf);
242+
fd = open(promisor_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
243+
if (fd < 0)
244+
die_errno("unable to create '%s'", promisor_name);
245+
close(fd);
246+
free(promisor_name);
247+
}
248+
fclose(out);
249+
if (finish_command(&cmd))
250+
die("Could not finish pack-objects to repack promisor objects");
251+
}
252+
141253
#define ALL_INTO_ONE 1
142254
#define LOOSEN_UNREACHABLE 2
143255

@@ -150,6 +262,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
150262
{".pack"},
151263
{".idx"},
152264
{".bitmap", 1},
265+
{".promisor", 1},
153266
};
154267
struct child_process cmd = CHILD_PROCESS_INIT;
155268
struct string_list_item *item;
@@ -165,15 +278,9 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
165278
int delete_redundant = 0;
166279
const char *unpack_unreachable = NULL;
167280
int keep_unreachable = 0;
168-
const char *window = NULL, *window_memory = NULL;
169-
const char *depth = NULL;
170-
const char *threads = NULL;
171-
const char *max_pack_size = NULL;
172281
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
173-
int no_reuse_delta = 0, no_reuse_object = 0;
174282
int no_update_server_info = 0;
175-
int quiet = 0;
176-
int local = 0;
283+
struct pack_objects_args po_args = {NULL};
177284

178285
struct option builtin_repack_options[] = {
179286
OPT_BIT('a', NULL, &pack_everything,
@@ -183,30 +290,30 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
183290
LOOSEN_UNREACHABLE | ALL_INTO_ONE),
184291
OPT_BOOL('d', NULL, &delete_redundant,
185292
N_("remove redundant packs, and run git-prune-packed")),
186-
OPT_BOOL('f', NULL, &no_reuse_delta,
293+
OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
187294
N_("pass --no-reuse-delta to git-pack-objects")),
188-
OPT_BOOL('F', NULL, &no_reuse_object,
295+
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
189296
N_("pass --no-reuse-object to git-pack-objects")),
190297
OPT_BOOL('n', NULL, &no_update_server_info,
191298
N_("do not run git-update-server-info")),
192-
OPT__QUIET(&quiet, N_("be quiet")),
193-
OPT_BOOL('l', "local", &local,
299+
OPT__QUIET(&po_args.quiet, N_("be quiet")),
300+
OPT_BOOL('l', "local", &po_args.local,
194301
N_("pass --local to git-pack-objects")),
195302
OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
196303
N_("write bitmap index")),
197304
OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, N_("approxidate"),
198305
N_("with -A, do not loosen objects older than this")),
199306
OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
200307
N_("with -a, repack unreachable objects")),
201-
OPT_STRING(0, "window", &window, N_("n"),
308+
OPT_STRING(0, "window", &po_args.window, N_("n"),
202309
N_("size of the window used for delta compression")),
203-
OPT_STRING(0, "window-memory", &window_memory, N_("bytes"),
310+
OPT_STRING(0, "window-memory", &po_args.window_memory, N_("bytes"),
204311
N_("same as the above, but limit memory size instead of entries count")),
205-
OPT_STRING(0, "depth", &depth, N_("n"),
312+
OPT_STRING(0, "depth", &po_args.depth, N_("n"),
206313
N_("limits the maximum delta depth")),
207-
OPT_STRING(0, "threads", &threads, N_("n"),
314+
OPT_STRING(0, "threads", &po_args.threads, N_("n"),
208315
N_("limits the maximum number of threads")),
209-
OPT_STRING(0, "max-pack-size", &max_pack_size, N_("bytes"),
316+
OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
210317
N_("maximum size of each packfile")),
211318
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
212319
N_("repack objects in packs marked with .keep")),
@@ -238,7 +345,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
238345

239346
sigchain_push_common(remove_pack_on_signal);
240347

241-
argv_array_push(&cmd.args, "pack-objects");
348+
prepare_pack_objects(&cmd, &po_args);
349+
242350
argv_array_push(&cmd.args, "--keep-true-parents");
243351
if (!pack_kept_objects)
244352
argv_array_push(&cmd.args, "--honor-pack-keep");
@@ -251,26 +359,14 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
251359
argv_array_push(&cmd.args, "--indexed-objects");
252360
if (repository_format_partial_clone)
253361
argv_array_push(&cmd.args, "--exclude-promisor-objects");
254-
if (window)
255-
argv_array_pushf(&cmd.args, "--window=%s", window);
256-
if (window_memory)
257-
argv_array_pushf(&cmd.args, "--window-memory=%s", window_memory);
258-
if (depth)
259-
argv_array_pushf(&cmd.args, "--depth=%s", depth);
260-
if (threads)
261-
argv_array_pushf(&cmd.args, "--threads=%s", threads);
262-
if (max_pack_size)
263-
argv_array_pushf(&cmd.args, "--max-pack-size=%s", max_pack_size);
264-
if (no_reuse_delta)
265-
argv_array_pushf(&cmd.args, "--no-reuse-delta");
266-
if (no_reuse_object)
267-
argv_array_pushf(&cmd.args, "--no-reuse-object");
268362
if (write_bitmaps)
269363
argv_array_push(&cmd.args, "--write-bitmap-index");
270364

271365
if (pack_everything & ALL_INTO_ONE) {
272366
get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
273367

368+
repack_promisor_objects(&po_args, &names);
369+
274370
if (existing_packs.nr && delete_redundant) {
275371
if (unpack_unreachable) {
276372
argv_array_pushf(&cmd.args,
@@ -292,17 +388,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
292388
argv_array_push(&cmd.args, "--incremental");
293389
}
294390

295-
if (local)
296-
argv_array_push(&cmd.args, "--local");
297-
if (quiet)
298-
argv_array_push(&cmd.args, "--quiet");
299-
if (delta_base_offset)
300-
argv_array_push(&cmd.args, "--delta-base-offset");
301-
302-
argv_array_push(&cmd.args, packtmp);
303-
304-
cmd.git_cmd = 1;
305-
cmd.out = -1;
306391
cmd.no_stdin = 1;
307392

308393
ret = start_command(&cmd);
@@ -320,7 +405,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
320405
if (ret)
321406
return ret;
322407

323-
if (!names.nr && !quiet)
408+
if (!names.nr && !po_args.quiet)
324409
printf("Nothing new to pack.\n");
325410

326411
/*
@@ -429,6 +514,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
429514

430515
/* End of pack replacement. */
431516

517+
reprepare_packed_git(the_repository);
518+
432519
if (delete_redundant) {
433520
int opts = 0;
434521
string_list_sort(&names);
@@ -441,7 +528,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
441528
if (!string_list_has_string(&names, sha1))
442529
remove_redundant_pack(packdir, item->string);
443530
}
444-
if (!quiet && isatty(2))
531+
if (!po_args.quiet && isatty(2))
445532
opts |= PRUNE_PACKED_VERBOSE;
446533
prune_packed_objects(opts);
447534
}

0 commit comments

Comments
 (0)