Skip to content

Commit 989d9cb

Browse files
ttaylorrgitster
authored andcommitted
midx.c: extract struct midx_fanout
To build up a list of objects (along with their packs, and the offsets within those packs that each object appears at), the MIDX code implements `get_sorted_entries()` which builds up a list of candidates, sorts them, and then removes duplicate entries. To do this, it keeps an array of `pack_midx_entry` structures that it builds up once for each fanout level (ie., for all possible values of the first byte of each object's ID). This array is a function-local variable of `get_sorted_entries()`. Since it uses the ALLOC_GROW() macro, having the `alloc_fanout` variable also be local to that function, and only modified within that function is convenient. However, subsequent changes will extract the two ways this array is filled (from a pack at some fanout value, and from an existing MIDX at some fanout value) into separate functions. Instead of passing around pointers to the entries array, along with `nr_fanout` and `alloc_fanout`, encapsulate these three into a structure instead. Then pass around a pointer to this structure instead. This patch does not yet extract the above two functions, but sets us up to begin doing so in the following commit. For now, the implementation of get_sorted_entries() is only modified to replace `entries_by_fanout` with `fanout.entries`, `nr_fanout` with `fanout.nr`, and so on. Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0b6203c commit 989d9cb

File tree

1 file changed

+35
-19
lines changed

1 file changed

+35
-19
lines changed

midx.c

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,22 @@ static void fill_pack_entry(uint32_t pack_int_id,
577577
entry->preferred = !!preferred;
578578
}
579579

580+
struct midx_fanout {
581+
struct pack_midx_entry *entries;
582+
uint32_t nr;
583+
uint32_t alloc;
584+
};
585+
586+
static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr)
587+
{
588+
ALLOC_GROW(fanout->entries, nr, fanout->alloc);
589+
}
590+
591+
static void midx_fanout_sort(struct midx_fanout *fanout)
592+
{
593+
QSORT(fanout->entries, fanout->nr, midx_oid_compare);
594+
}
595+
580596
/*
581597
* It is possible to artificially get into a state where there are many
582598
* duplicate copies of objects. That can create high memory pressure if
@@ -595,8 +611,8 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
595611
int preferred_pack)
596612
{
597613
uint32_t cur_fanout, cur_pack, cur_object;
598-
uint32_t alloc_fanout, alloc_objects, total_objects = 0;
599-
struct pack_midx_entry *entries_by_fanout = NULL;
614+
uint32_t alloc_objects, total_objects = 0;
615+
struct midx_fanout fanout = { 0 };
600616
struct pack_midx_entry *deduplicated_entries = NULL;
601617
uint32_t start_pack = m ? m->num_packs : 0;
602618

@@ -608,14 +624,14 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
608624
* slices to be evenly distributed, with some noise. Hence,
609625
* allocate slightly more than one 256th.
610626
*/
611-
alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
627+
alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
612628

613-
ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
629+
ALLOC_ARRAY(fanout.entries, fanout.alloc);
614630
ALLOC_ARRAY(deduplicated_entries, alloc_objects);
615631
*nr_objects = 0;
616632

617633
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
618-
uint32_t nr_fanout = 0;
634+
fanout.nr = 0;
619635

620636
if (m) {
621637
uint32_t start = 0, end;
@@ -625,15 +641,15 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
625641
end = ntohl(m->chunk_oid_fanout[cur_fanout]);
626642

627643
for (cur_object = start; cur_object < end; cur_object++) {
628-
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
644+
midx_fanout_grow(&fanout, fanout.nr + 1);
629645
nth_midxed_pack_midx_entry(m,
630-
&entries_by_fanout[nr_fanout],
646+
&fanout.entries[fanout.nr],
631647
cur_object);
632648
if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
633-
entries_by_fanout[nr_fanout].preferred = 1;
649+
fanout.entries[fanout.nr].preferred = 1;
634650
else
635-
entries_by_fanout[nr_fanout].preferred = 0;
636-
nr_fanout++;
651+
fanout.entries[fanout.nr].preferred = 0;
652+
fanout.nr++;
637653
}
638654
}
639655

@@ -646,36 +662,36 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
646662
end = get_pack_fanout(info[cur_pack].p, cur_fanout);
647663

648664
for (cur_object = start; cur_object < end; cur_object++) {
649-
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
665+
midx_fanout_grow(&fanout, fanout.nr + 1);
650666
fill_pack_entry(cur_pack,
651667
info[cur_pack].p,
652668
cur_object,
653-
&entries_by_fanout[nr_fanout],
669+
&fanout.entries[fanout.nr],
654670
preferred);
655-
nr_fanout++;
671+
fanout.nr++;
656672
}
657673
}
658674

659-
QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
675+
midx_fanout_sort(&fanout);
660676

661677
/*
662678
* The batch is now sorted by OID and then mtime (descending).
663679
* Take only the first duplicate.
664680
*/
665-
for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
666-
if (cur_object && oideq(&entries_by_fanout[cur_object - 1].oid,
667-
&entries_by_fanout[cur_object].oid))
681+
for (cur_object = 0; cur_object < fanout.nr; cur_object++) {
682+
if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
683+
&fanout.entries[cur_object].oid))
668684
continue;
669685

670686
ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
671687
memcpy(&deduplicated_entries[*nr_objects],
672-
&entries_by_fanout[cur_object],
688+
&fanout.entries[cur_object],
673689
sizeof(struct pack_midx_entry));
674690
(*nr_objects)++;
675691
}
676692
}
677693

678-
free(entries_by_fanout);
694+
free(fanout.entries);
679695
return deduplicated_entries;
680696
}
681697

0 commit comments

Comments
 (0)