Skip to content

Commit cf0879f

Browse files
committed
Merge branch 'sc/pack-redundant'
Update the implementation of pack-redundant for performance in a repository with many packfiles. * sc/pack-redundant: pack-redundant: consistent sort method pack-redundant: rename pack_list.all_objects pack-redundant: new algorithm to find min packs pack-redundant: delete redundant code pack-redundant: delay creation of unique_objects t5323: test cases for git-pack-redundant
2 parents 3710f60 + 0e37abd commit cf0879f

File tree

2 files changed

+559
-140
lines changed

2 files changed

+559
-140
lines changed

builtin/pack-redundant.c

Lines changed: 92 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,10 @@ static struct pack_list {
3232
struct pack_list *next;
3333
struct packed_git *pack;
3434
struct llist *unique_objects;
35-
struct llist *all_objects;
35+
struct llist *remaining_objects;
36+
size_t all_objects_size;
3637
} *local_packs = NULL, *altodb_packs = NULL;
3738

38-
struct pll {
39-
struct pll *next;
40-
struct pack_list *pl;
41-
};
42-
4339
static struct llist_item *free_nodes;
4440

4541
static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +59,6 @@ static inline struct llist_item *llist_item_get(void)
6359
return new_item;
6460
}
6561

66-
static void llist_free(struct llist *list)
67-
{
68-
while ((list->back = list->front)) {
69-
list->front = list->front->next;
70-
llist_item_put(list->back);
71-
}
72-
free(list);
73-
}
74-
7562
static inline void llist_init(struct llist **list)
7663
{
7764
*list = xmalloc(sizeof(struct llist));
@@ -254,6 +241,11 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
254241
struct llist_item *p1_hint = NULL, *p2_hint = NULL;
255242
const unsigned int hashsz = the_hash_algo->rawsz;
256243

244+
if (!p1->unique_objects)
245+
p1->unique_objects = llist_copy(p1->remaining_objects);
246+
if (!p2->unique_objects)
247+
p2->unique_objects = llist_copy(p2->remaining_objects);
248+
257249
p1_base = p1->pack->index_data;
258250
p2_base = p2->pack->index_data;
259251
p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
@@ -285,78 +277,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
285277
}
286278
}
287279

288-
static void pll_free(struct pll *l)
289-
{
290-
struct pll *old;
291-
struct pack_list *opl;
292-
293-
while (l) {
294-
old = l;
295-
while (l->pl) {
296-
opl = l->pl;
297-
l->pl = opl->next;
298-
free(opl);
299-
}
300-
l = l->next;
301-
free(old);
302-
}
303-
}
304-
305-
/* all the permutations have to be free()d at the same time,
306-
* since they refer to each other
307-
*/
308-
static struct pll * get_permutations(struct pack_list *list, int n)
309-
{
310-
struct pll *subset, *ret = NULL, *new_pll = NULL;
311-
312-
if (list == NULL || pack_list_size(list) < n || n == 0)
313-
return NULL;
314-
315-
if (n == 1) {
316-
while (list) {
317-
new_pll = xmalloc(sizeof(*new_pll));
318-
new_pll->pl = NULL;
319-
pack_list_insert(&new_pll->pl, list);
320-
new_pll->next = ret;
321-
ret = new_pll;
322-
list = list->next;
323-
}
324-
return ret;
325-
}
326-
327-
while (list->next) {
328-
subset = get_permutations(list->next, n - 1);
329-
while (subset) {
330-
new_pll = xmalloc(sizeof(*new_pll));
331-
new_pll->pl = subset->pl;
332-
pack_list_insert(&new_pll->pl, list);
333-
new_pll->next = ret;
334-
ret = new_pll;
335-
subset = subset->next;
336-
}
337-
list = list->next;
338-
}
339-
return ret;
340-
}
341-
342-
static int is_superset(struct pack_list *pl, struct llist *list)
343-
{
344-
struct llist *diff;
345-
346-
diff = llist_copy(list);
347-
348-
while (pl) {
349-
llist_sorted_difference_inplace(diff, pl->all_objects);
350-
if (diff->size == 0) { /* we're done */
351-
llist_free(diff);
352-
return 1;
353-
}
354-
pl = pl->next;
355-
}
356-
llist_free(diff);
357-
return 0;
358-
}
359-
360280
static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
361281
{
362282
size_t ret = 0;
@@ -421,14 +341,58 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
421341
return ret;
422342
}
423343

344+
static int cmp_remaining_objects(const void *a, const void *b)
345+
{
346+
struct pack_list *pl_a = *((struct pack_list **)a);
347+
struct pack_list *pl_b = *((struct pack_list **)b);
348+
349+
if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
350+
/* have the same remaining_objects, big pack first */
351+
if (pl_a->all_objects_size == pl_b->all_objects_size)
352+
return 0;
353+
else if (pl_a->all_objects_size < pl_b->all_objects_size)
354+
return 1;
355+
else
356+
return -1;
357+
} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
358+
/* sort by remaining objects, more objects first */
359+
return 1;
360+
} else {
361+
return -1;
362+
}
363+
}
364+
365+
/* Sort pack_list, greater size of remaining_objects first */
366+
static void sort_pack_list(struct pack_list **pl)
367+
{
368+
struct pack_list **ary, *p;
369+
int i;
370+
size_t n = pack_list_size(*pl);
371+
372+
if (n < 2)
373+
return;
374+
375+
/* prepare an array of packed_list for easier sorting */
376+
ary = xcalloc(n, sizeof(struct pack_list *));
377+
for (n = 0, p = *pl; p; p = p->next)
378+
ary[n++] = p;
379+
380+
QSORT(ary, n, cmp_remaining_objects);
381+
382+
/* link them back again */
383+
for (i = 0; i < n - 1; i++)
384+
ary[i]->next = ary[i + 1];
385+
ary[n - 1]->next = NULL;
386+
*pl = ary[0];
387+
388+
free(ary);
389+
}
390+
391+
424392
static void minimize(struct pack_list **min)
425393
{
426-
struct pack_list *pl, *unique = NULL,
427-
*non_unique = NULL, *min_perm = NULL;
428-
struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
429-
struct llist *missing;
430-
off_t min_perm_size = 0, perm_size;
431-
int n;
394+
struct pack_list *pl, *unique = NULL, *non_unique = NULL;
395+
struct llist *missing, *unique_pack_objects;
432396

433397
pl = local_packs;
434398
while (pl) {
@@ -442,53 +406,41 @@ static void minimize(struct pack_list **min)
442406
missing = llist_copy(all_objects);
443407
pl = unique;
444408
while (pl) {
445-
llist_sorted_difference_inplace(missing, pl->all_objects);
409+
llist_sorted_difference_inplace(missing, pl->remaining_objects);
446410
pl = pl->next;
447411
}
448412

413+
*min = unique;
414+
449415
/* return if there are no objects missing from the unique set */
450416
if (missing->size == 0) {
451-
*min = unique;
452417
free(missing);
453418
return;
454419
}
455420

456-
/* find the permutations which contain all missing objects */
457-
for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
458-
perm_all = perm = get_permutations(non_unique, n);
459-
while (perm) {
460-
if (is_superset(perm->pl, missing)) {
461-
new_perm = xmalloc(sizeof(struct pll));
462-
memcpy(new_perm, perm, sizeof(struct pll));
463-
new_perm->next = perm_ok;
464-
perm_ok = new_perm;
465-
}
466-
perm = perm->next;
467-
}
468-
if (perm_ok)
469-
break;
470-
pll_free(perm_all);
471-
}
472-
if (perm_ok == NULL)
473-
die("Internal error: No complete sets found!");
474-
475-
/* find the permutation with the smallest size */
476-
perm = perm_ok;
477-
while (perm) {
478-
perm_size = pack_set_bytecount(perm->pl);
479-
if (!min_perm_size || min_perm_size > perm_size) {
480-
min_perm_size = perm_size;
481-
min_perm = perm->pl;
482-
}
483-
perm = perm->next;
484-
}
485-
*min = min_perm;
486-
/* add the unique packs to the list */
487-
pl = unique;
421+
unique_pack_objects = llist_copy(all_objects);
422+
llist_sorted_difference_inplace(unique_pack_objects, missing);
423+
424+
/* remove unique pack objects from the non_unique packs */
425+
pl = non_unique;
488426
while (pl) {
489-
pack_list_insert(min, pl);
427+
llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
490428
pl = pl->next;
491429
}
430+
431+
while (non_unique) {
432+
/* sort the non_unique packs, greater size of remaining_objects first */
433+
sort_pack_list(&non_unique);
434+
if (non_unique->remaining_objects->size == 0)
435+
break;
436+
437+
pack_list_insert(min, non_unique);
438+
439+
for (pl = non_unique->next; pl && pl->remaining_objects->size > 0; pl = pl->next)
440+
llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
441+
442+
non_unique = non_unique->next;
443+
}
492444
}
493445

494446
static void load_all_objects(void)
@@ -500,7 +452,7 @@ static void load_all_objects(void)
500452

501453
while (pl) {
502454
hint = NULL;
503-
l = pl->all_objects->front;
455+
l = pl->remaining_objects->front;
504456
while (l) {
505457
hint = llist_insert_sorted_unique(all_objects,
506458
l->oid, hint);
@@ -511,7 +463,7 @@ static void load_all_objects(void)
511463
/* remove objects present in remote packs */
512464
pl = altodb_packs;
513465
while (pl) {
514-
llist_sorted_difference_inplace(all_objects, pl->all_objects);
466+
llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
515467
pl = pl->next;
516468
}
517469
}
@@ -536,11 +488,10 @@ static void scan_alt_odb_packs(void)
536488
while (alt) {
537489
local = local_packs;
538490
while (local) {
539-
llist_sorted_difference_inplace(local->unique_objects,
540-
alt->all_objects);
491+
llist_sorted_difference_inplace(local->remaining_objects,
492+
alt->remaining_objects);
541493
local = local->next;
542494
}
543-
llist_sorted_difference_inplace(all_objects, alt->all_objects);
544495
alt = alt->next;
545496
}
546497
}
@@ -555,7 +506,7 @@ static struct pack_list * add_pack(struct packed_git *p)
555506
return NULL;
556507

557508
l.pack = p;
558-
llist_init(&l.all_objects);
509+
llist_init(&l.remaining_objects);
559510

560511
if (open_pack_index(p))
561512
return NULL;
@@ -564,11 +515,11 @@ static struct pack_list * add_pack(struct packed_git *p)
564515
base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
565516
step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
566517
while (off < p->num_objects * step) {
567-
llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
518+
llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
568519
off += step;
569520
}
570-
/* this list will be pruned in cmp_two_packs later */
571-
l.unique_objects = llist_copy(l.all_objects);
521+
l.all_objects_size = l.remaining_objects->size;
522+
l.unique_objects = NULL;
572523
if (p->pack_local)
573524
return pack_list_insert(&local_packs, &l);
574525
else
@@ -603,7 +554,7 @@ static void load_all(void)
603554
int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
604555
{
605556
int i;
606-
struct pack_list *min, *red, *pl;
557+
struct pack_list *min = NULL, *red, *pl;
607558
struct llist *ignore;
608559
struct object_id *oid;
609560
char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -646,7 +597,6 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
646597

647598
load_all_objects();
648599

649-
cmp_local_packs();
650600
if (alt_odb)
651601
scan_alt_odb_packs();
652602

@@ -663,10 +613,12 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
663613
llist_sorted_difference_inplace(all_objects, ignore);
664614
pl = local_packs;
665615
while (pl) {
666-
llist_sorted_difference_inplace(pl->unique_objects, ignore);
616+
llist_sorted_difference_inplace(pl->remaining_objects, ignore);
667617
pl = pl->next;
668618
}
669619

620+
cmp_local_packs();
621+
670622
minimize(&min);
671623

672624
if (verbose) {

0 commit comments

Comments
 (0)