Skip to content

Commit d7cacf2

Browse files
derrickstoleegitster
authored andcommitted
midx: write object id fanout chunk
Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0d5b3a5 commit d7cacf2

File tree

5 files changed

+68
-11
lines changed

5 files changed

+68
-11
lines changed

Documentation/technical/pack-format.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,11 @@ CHUNK DATA:
302302
name. This is the only chunk not guaranteed to be a multiple of four
303303
bytes in length, so should be the last chunk for alignment reasons.
304304

305+
OID Fanout (ID: {'O', 'I', 'D', 'F'})
306+
The ith entry, F[i], stores the number of OIDs with first
307+
byte at most i. Thus F[255] stores the total
308+
number of objects.
309+
305310
OID Lookup (ID: {'O', 'I', 'D', 'L'})
306311
The OIDs for all objects in the MIDX are stored in lexicographic
307312
order in this chunk.

midx.c

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@
1818
#define MIDX_HASH_LEN 20
1919
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
2020

21-
#define MIDX_MAX_CHUNKS 2
21+
#define MIDX_MAX_CHUNKS 3
2222
#define MIDX_CHUNK_ALIGNMENT 4
2323
#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
24+
#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
2425
#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
2526
#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
27+
#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
2628

2729
static char *get_midx_filename(const char *object_dir)
2830
{
@@ -102,6 +104,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
102104
m->chunk_pack_names = m->data + chunk_offset;
103105
break;
104106

107+
case MIDX_CHUNKID_OIDFANOUT:
108+
m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset);
109+
break;
110+
105111
case MIDX_CHUNKID_OIDLOOKUP:
106112
m->chunk_oid_lookup = m->data + chunk_offset;
107113
break;
@@ -121,9 +127,13 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
121127

122128
if (!m->chunk_pack_names)
123129
die(_("multi-pack-index missing required pack-name chunk"));
130+
if (!m->chunk_oid_fanout)
131+
die(_("multi-pack-index missing required OID fanout chunk"));
124132
if (!m->chunk_oid_lookup)
125133
die(_("multi-pack-index missing required OID lookup chunk"));
126134

135+
m->num_objects = ntohl(m->chunk_oid_fanout[255]);
136+
127137
m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names));
128138

129139
cur_pack_name = (const char *)m->chunk_pack_names;
@@ -389,6 +399,35 @@ static size_t write_midx_pack_names(struct hashfile *f,
389399
return written;
390400
}
391401

402+
static size_t write_midx_oid_fanout(struct hashfile *f,
403+
struct pack_midx_entry *objects,
404+
uint32_t nr_objects)
405+
{
406+
struct pack_midx_entry *list = objects;
407+
struct pack_midx_entry *last = objects + nr_objects;
408+
uint32_t count = 0;
409+
uint32_t i;
410+
411+
/*
412+
* Write the first-level table (the list is sorted,
413+
* but we use a 256-entry lookup to be able to avoid
414+
* having to do eight extra binary search iterations).
415+
*/
416+
for (i = 0; i < 256; i++) {
417+
struct pack_midx_entry *next = list;
418+
419+
while (next < last && next->oid.hash[0] == i) {
420+
count++;
421+
next++;
422+
}
423+
424+
hashwrite_be32(f, count);
425+
list = next;
426+
}
427+
428+
return MIDX_CHUNK_FANOUT_SIZE;
429+
}
430+
392431
static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len,
393432
struct pack_midx_entry *objects,
394433
uint32_t nr_objects)
@@ -461,17 +500,21 @@ int write_midx_file(const char *object_dir)
461500
FREE_AND_NULL(midx_name);
462501

463502
cur_chunk = 0;
464-
num_chunks = 2;
503+
num_chunks = 3;
465504

466505
written = write_midx_header(f, num_chunks, packs.nr);
467506

468507
chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
469508
chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
470509

471510
cur_chunk++;
472-
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
511+
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
473512
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len;
474513

514+
cur_chunk++;
515+
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
516+
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE;
517+
475518
cur_chunk++;
476519
chunk_ids[cur_chunk] = 0;
477520
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN;
@@ -505,6 +548,10 @@ int write_midx_file(const char *object_dir)
505548
written += write_midx_pack_names(f, packs.names, packs.nr);
506549
break;
507550

551+
case MIDX_CHUNKID_OIDFANOUT:
552+
written += write_midx_oid_fanout(f, entries, nr_entries);
553+
break;
554+
508555
case MIDX_CHUNKID_OIDLOOKUP:
509556
written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries);
510557
break;

midx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ struct multi_pack_index {
1515
uint32_t num_objects;
1616

1717
const unsigned char *chunk_pack_names;
18+
const uint32_t *chunk_oid_fanout;
1819
const unsigned char *chunk_oid_lookup;
1920

2021
const char **pack_names;

t/helper/test-read-midx.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ static int read_midx_file(const char *object_dir)
2222

2323
if (m->chunk_pack_names)
2424
printf(" pack-names");
25+
if (m->chunk_oid_fanout)
26+
printf(" oid-fanout");
2527
if (m->chunk_oid_lookup)
2628
printf(" oid-lookup");
2729

28-
printf("\n");
30+
printf("\nnum_objects: %d\n", m->num_objects);
2931

3032
printf("packs:\n");
3133
for (i = 0; i < m->num_packs; i++)

t/t5319-multi-pack-index.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ test_description='multi-pack-indexes'
55

66
midx_read_expect () {
77
NUM_PACKS=$1
8+
NUM_OBJECTS=$2
89
{
910
cat <<-EOF &&
10-
header: 4d494458 1 2 $NUM_PACKS
11-
chunks: pack-names oid-lookup
11+
header: 4d494458 1 3 $NUM_PACKS
12+
chunks: pack-names oid-fanout oid-lookup
13+
num_objects: $NUM_OBJECTS
1214
packs:
1315
EOF
1416
if test $NUM_PACKS -ge 1
@@ -24,7 +26,7 @@ midx_read_expect () {
2426
test_expect_success 'write midx with no packs' '
2527
test_when_finished rm -f pack/multi-pack-index &&
2628
git multi-pack-index --object-dir=. write &&
27-
midx_read_expect 0
29+
midx_read_expect 0 0
2830
'
2931

3032
generate_objects () {
@@ -74,13 +76,13 @@ test_expect_success 'write midx with one v1 pack' '
7476
pack=$(git pack-objects --index-version=1 pack/test <obj-list) &&
7577
test_when_finished rm pack/test-$pack.pack pack/test-$pack.idx pack/multi-pack-index &&
7678
git multi-pack-index --object-dir=. write &&
77-
midx_read_expect 1
79+
midx_read_expect 1 18
7880
'
7981

8082
test_expect_success 'write midx with one v2 pack' '
8183
git pack-objects --index-version=2,0x40 pack/test <obj-list &&
8284
git multi-pack-index --object-dir=. write &&
83-
midx_read_expect 1
85+
midx_read_expect 1 18
8486
'
8587

8688
test_expect_success 'add more objects' '
@@ -94,7 +96,7 @@ test_expect_success 'add more objects' '
9496
test_expect_success 'write midx with two packs' '
9597
git pack-objects --index-version=1 pack/test-2 <obj-list &&
9698
git multi-pack-index --object-dir=. write &&
97-
midx_read_expect 2
99+
midx_read_expect 2 34
98100
'
99101

100102
test_expect_success 'add more packs' '
@@ -108,7 +110,7 @@ test_expect_success 'add more packs' '
108110

109111
test_expect_success 'write midx with twelve packs' '
110112
git multi-pack-index --object-dir=. write &&
111-
midx_read_expect 12
113+
midx_read_expect 12 74
112114
'
113115

114116
test_done

0 commit comments

Comments
 (0)