Skip to content

Commit d01bf2e

Browse files
derrickstoleegitster
authored andcommitted
midx: refactor permutation logic and pack sorting
In anticipation of the expire subcommand, refactor the way we sort the packfiles by name. This will greatly simplify our approach to dropping expired packs from the list. First, create 'struct pack_info' to replace 'struct pack_pair'. This struct contains the necessary information about a pack, including its name, a pointer to its packfile struct (if not already in the multi-pack-index), and the original pack-int-id. Second, track the pack information using an array of pack_info structs in the pack_list struct. This simplifies the logic around the multiple arrays we were tracking in that struct. Finally, update get_sorted_entries() to not permute the pack-int-id and instead supply the permutation to write_midx_object_offsets(). This requires sorting the packs after get_sorted_entries(). Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent dba6175 commit d01bf2e

File tree

1 file changed

+69
-87
lines changed

1 file changed

+69
-87
lines changed

midx.c

Lines changed: 69 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -427,12 +427,23 @@ static size_t write_midx_header(struct hashfile *f,
427427
return MIDX_HEADER_SIZE;
428428
}
429429

430+
struct pack_info {
431+
uint32_t orig_pack_int_id;
432+
char *pack_name;
433+
struct packed_git *p;
434+
};
435+
436+
static int pack_info_compare(const void *_a, const void *_b)
437+
{
438+
struct pack_info *a = (struct pack_info *)_a;
439+
struct pack_info *b = (struct pack_info *)_b;
440+
return strcmp(a->pack_name, b->pack_name);
441+
}
442+
430443
struct pack_list {
431-
struct packed_git **list;
432-
char **names;
444+
struct pack_info *info;
433445
uint32_t nr;
434-
uint32_t alloc_list;
435-
uint32_t alloc_names;
446+
uint32_t alloc;
436447
struct multi_pack_index *m;
437448
};
438449

@@ -445,66 +456,32 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
445456
if (packs->m && midx_contains_pack(packs->m, file_name))
446457
return;
447458

448-
ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list);
449-
ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names);
459+
ALLOC_GROW(packs->info, packs->nr + 1, packs->alloc);
450460

451-
packs->list[packs->nr] = add_packed_git(full_path,
452-
full_path_len,
453-
0);
461+
packs->info[packs->nr].p = add_packed_git(full_path,
462+
full_path_len,
463+
0);
454464

455-
if (!packs->list[packs->nr]) {
465+
if (!packs->info[packs->nr].p) {
456466
warning(_("failed to add packfile '%s'"),
457467
full_path);
458468
return;
459469
}
460470

461-
if (open_pack_index(packs->list[packs->nr])) {
471+
if (open_pack_index(packs->info[packs->nr].p)) {
462472
warning(_("failed to open pack-index '%s'"),
463473
full_path);
464-
close_pack(packs->list[packs->nr]);
465-
FREE_AND_NULL(packs->list[packs->nr]);
474+
close_pack(packs->info[packs->nr].p);
475+
FREE_AND_NULL(packs->info[packs->nr].p);
466476
return;
467477
}
468478

469-
packs->names[packs->nr] = xstrdup(file_name);
479+
packs->info[packs->nr].pack_name = xstrdup(file_name);
480+
packs->info[packs->nr].orig_pack_int_id = packs->nr;
470481
packs->nr++;
471482
}
472483
}
473484

474-
struct pack_pair {
475-
uint32_t pack_int_id;
476-
char *pack_name;
477-
};
478-
479-
static int pack_pair_compare(const void *_a, const void *_b)
480-
{
481-
struct pack_pair *a = (struct pack_pair *)_a;
482-
struct pack_pair *b = (struct pack_pair *)_b;
483-
return strcmp(a->pack_name, b->pack_name);
484-
}
485-
486-
static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm)
487-
{
488-
uint32_t i;
489-
struct pack_pair *pairs;
490-
491-
ALLOC_ARRAY(pairs, nr_packs);
492-
493-
for (i = 0; i < nr_packs; i++) {
494-
pairs[i].pack_int_id = i;
495-
pairs[i].pack_name = pack_names[i];
496-
}
497-
498-
QSORT(pairs, nr_packs, pack_pair_compare);
499-
500-
for (i = 0; i < nr_packs; i++) {
501-
pack_names[i] = pairs[i].pack_name;
502-
perm[pairs[i].pack_int_id] = i;
503-
}
504-
505-
free(pairs);
506-
}
507-
508485
struct pack_midx_entry {
509486
struct object_id oid;
510487
uint32_t pack_int_id;
@@ -530,15 +507,14 @@ static int midx_oid_compare(const void *_a, const void *_b)
530507
}
531508

532509
static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
533-
uint32_t *pack_perm,
534510
struct pack_midx_entry *e,
535511
uint32_t pos)
536512
{
537513
if (pos >= m->num_objects)
538514
return 1;
539515

540516
nth_midxed_object_oid(&e->oid, m, pos);
541-
e->pack_int_id = pack_perm[nth_midxed_pack_int_id(m, pos)];
517+
e->pack_int_id = nth_midxed_pack_int_id(m, pos);
542518
e->offset = nth_midxed_offset(m, pos);
543519

544520
/* consider objects in midx to be from "old" packs */
@@ -572,8 +548,7 @@ static void fill_pack_entry(uint32_t pack_int_id,
572548
* of a packfile containing the object).
573549
*/
574550
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
575-
struct packed_git **p,
576-
uint32_t *perm,
551+
struct pack_info *info,
577552
uint32_t nr_packs,
578553
uint32_t *nr_objects)
579554
{
@@ -584,7 +559,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
584559
uint32_t start_pack = m ? m->num_packs : 0;
585560

586561
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
587-
total_objects += p[cur_pack]->num_objects;
562+
total_objects += info[cur_pack].p->num_objects;
588563

589564
/*
590565
* As we de-duplicate by fanout value, we expect the fanout
@@ -609,7 +584,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
609584

610585
for (cur_object = start; cur_object < end; cur_object++) {
611586
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
612-
nth_midxed_pack_midx_entry(m, perm,
587+
nth_midxed_pack_midx_entry(m,
613588
&entries_by_fanout[nr_fanout],
614589
cur_object);
615590
nr_fanout++;
@@ -620,12 +595,12 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
620595
uint32_t start = 0, end;
621596

622597
if (cur_fanout)
623-
start = get_pack_fanout(p[cur_pack], cur_fanout - 1);
624-
end = get_pack_fanout(p[cur_pack], cur_fanout);
598+
start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1);
599+
end = get_pack_fanout(info[cur_pack].p, cur_fanout);
625600

626601
for (cur_object = start; cur_object < end; cur_object++) {
627602
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
628-
fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]);
603+
fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]);
629604
nr_fanout++;
630605
}
631606
}
@@ -654,22 +629,22 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
654629
}
655630

656631
static size_t write_midx_pack_names(struct hashfile *f,
657-
char **pack_names,
632+
struct pack_info *info,
658633
uint32_t num_packs)
659634
{
660635
uint32_t i;
661636
unsigned char padding[MIDX_CHUNK_ALIGNMENT];
662637
size_t written = 0;
663638

664639
for (i = 0; i < num_packs; i++) {
665-
size_t writelen = strlen(pack_names[i]) + 1;
640+
size_t writelen = strlen(info[i].pack_name) + 1;
666641

667-
if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0)
642+
if (i && strcmp(info[i].pack_name, info[i - 1].pack_name) <= 0)
668643
BUG("incorrect pack-file order: %s before %s",
669-
pack_names[i - 1],
670-
pack_names[i]);
644+
info[i - 1].pack_name,
645+
info[i].pack_name);
671646

672-
hashwrite(f, pack_names[i], writelen);
647+
hashwrite(f, info[i].pack_name, writelen);
673648
written += writelen;
674649
}
675650

@@ -740,6 +715,7 @@ static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len,
740715
}
741716

742717
static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed,
718+
uint32_t *perm,
743719
struct pack_midx_entry *objects, uint32_t nr_objects)
744720
{
745721
struct pack_midx_entry *list = objects;
@@ -749,7 +725,7 @@ static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_nee
749725
for (i = 0; i < nr_objects; i++) {
750726
struct pack_midx_entry *obj = list++;
751727

752-
hashwrite_be32(f, obj->pack_int_id);
728+
hashwrite_be32(f, perm[obj->pack_int_id]);
753729

754730
if (large_offset_needed && obj->offset >> 31)
755731
hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
@@ -822,20 +798,17 @@ int write_midx_file(const char *object_dir)
822798
packs.m = load_multi_pack_index(object_dir, 1);
823799

824800
packs.nr = 0;
825-
packs.alloc_list = packs.m ? packs.m->num_packs : 16;
826-
packs.alloc_names = packs.alloc_list;
827-
packs.list = NULL;
828-
packs.names = NULL;
829-
ALLOC_ARRAY(packs.list, packs.alloc_list);
830-
ALLOC_ARRAY(packs.names, packs.alloc_names);
801+
packs.alloc = packs.m ? packs.m->num_packs : 16;
802+
packs.info = NULL;
803+
ALLOC_ARRAY(packs.info, packs.alloc);
831804

832805
if (packs.m) {
833806
for (i = 0; i < packs.m->num_packs; i++) {
834-
ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list);
835-
ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names);
807+
ALLOC_GROW(packs.info, packs.nr + 1, packs.alloc);
836808

837-
packs.list[packs.nr] = NULL;
838-
packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]);
809+
packs.info[packs.nr].orig_pack_int_id = i;
810+
packs.info[packs.nr].pack_name = xstrdup(packs.m->pack_names[i]);
811+
packs.info[packs.nr].p = NULL;
839812
packs.nr++;
840813
}
841814
}
@@ -845,10 +818,7 @@ int write_midx_file(const char *object_dir)
845818
if (packs.m && packs.nr == packs.m->num_packs)
846819
goto cleanup;
847820

848-
ALLOC_ARRAY(pack_perm, packs.nr);
849-
sort_packs_by_name(packs.names, packs.nr, pack_perm);
850-
851-
entries = get_sorted_entries(packs.m, packs.list, pack_perm, packs.nr, &nr_entries);
821+
entries = get_sorted_entries(packs.m, packs.info, packs.nr, &nr_entries);
852822

853823
for (i = 0; i < nr_entries; i++) {
854824
if (entries[i].offset > 0x7fffffff)
@@ -857,8 +827,21 @@ int write_midx_file(const char *object_dir)
857827
large_offsets_needed = 1;
858828
}
859829

830+
QSORT(packs.info, packs.nr, pack_info_compare);
831+
832+
/*
833+
* pack_perm stores a permutation between pack-int-ids from the
834+
* previous multi-pack-index to the new one we are writing:
835+
*
836+
* pack_perm[old_id] = new_id
837+
*/
838+
ALLOC_ARRAY(pack_perm, packs.nr);
839+
for (i = 0; i < packs.nr; i++) {
840+
pack_perm[packs.info[i].orig_pack_int_id] = i;
841+
}
842+
860843
for (i = 0; i < packs.nr; i++)
861-
pack_name_concat_len += strlen(packs.names[i]) + 1;
844+
pack_name_concat_len += strlen(packs.info[i].pack_name) + 1;
862845

863846
if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
864847
pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
@@ -929,7 +912,7 @@ int write_midx_file(const char *object_dir)
929912

930913
switch (chunk_ids[i]) {
931914
case MIDX_CHUNKID_PACKNAMES:
932-
written += write_midx_pack_names(f, packs.names, packs.nr);
915+
written += write_midx_pack_names(f, packs.info, packs.nr);
933916
break;
934917

935918
case MIDX_CHUNKID_OIDFANOUT:
@@ -941,7 +924,7 @@ int write_midx_file(const char *object_dir)
941924
break;
942925

943926
case MIDX_CHUNKID_OBJECTOFFSETS:
944-
written += write_midx_object_offsets(f, large_offsets_needed, entries, nr_entries);
927+
written += write_midx_object_offsets(f, large_offsets_needed, pack_perm, entries, nr_entries);
945928
break;
946929

947930
case MIDX_CHUNKID_LARGEOFFSETS:
@@ -964,15 +947,14 @@ int write_midx_file(const char *object_dir)
964947

965948
cleanup:
966949
for (i = 0; i < packs.nr; i++) {
967-
if (packs.list[i]) {
968-
close_pack(packs.list[i]);
969-
free(packs.list[i]);
950+
if (packs.info[i].p) {
951+
close_pack(packs.info[i].p);
952+
free(packs.info[i].p);
970953
}
971-
free(packs.names[i]);
954+
free(packs.info[i].pack_name);
972955
}
973956

974-
free(packs.list);
975-
free(packs.names);
957+
free(packs.info);
976958
free(entries);
977959
free(pack_perm);
978960
free(midx_name);

0 commit comments

Comments
 (0)