Skip to content

Commit 3fe0121

Browse files
committed
Merge branch 'ac/bitmap-lookup-table'
The pack bitmap file gained a bitmap-lookup table to speed up locating the necessary bitmap for a given commit. * ac/bitmap-lookup-table: pack-bitmap-write: drop unused pack_idx_entry parameters bitmap-lookup-table: add performance tests for lookup table pack-bitmap: prepare to read lookup table extension pack-bitmap-write: learn pack.writeBitmapLookupTable and add tests pack-bitmap-write.c: write lookup table extension bitmap: move `get commit positions` code to `bitmap_writer_finish` Documentation/technical: describe bitmap lookup table extension
2 parents cf98b69 + 969a564 commit 3fe0121

18 files changed

+1375
-711
lines changed

Documentation/config/pack.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,13 @@ When writing a multi-pack reachability bitmap, no new namehashes are
164164
computed; instead, any namehashes stored in an existing bitmap are
165165
permuted into their appropriate location when writing a new bitmap.
166166

167+
pack.writeBitmapLookupTable::
168+
When true, Git will include a "lookup table" section in the
169+
bitmap index (if one is written). This table is used to defer
170+
loading individual bitmaps as late as possible. This can be
171+
beneficial in repositories that have relatively large bitmap
172+
indexes. Defaults to false.
173+
167174
pack.writeReverseIndex::
168175
When true, git will write a corresponding .rev file (see:
169176
linkgit:gitformat-pack[5])

Documentation/technical/bitmap-format.txt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,17 @@ MIDXs, both the bit-cache and rev-cache extensions are required.
7272
pack/MIDX. The format and meaning of the name-hash is
7373
described below.
7474

75+
** {empty}
76+
BITMAP_OPT_LOOKUP_TABLE (0x10): :::
77+
If present, the end of the bitmap file contains a table
78+
containing a list of `N` <commit_pos, offset, xor_row>
79+
triplets. The format and meaning of the table is described
80+
below.
81+
+
82+
NOTE: Unlike the xor_offset used to compress an individual bitmap,
83+
`xor_row` stores an *absolute* index into the lookup table, not a location
84+
relative to the current entry.
85+
7586
4-byte entry count (network byte order): ::
7687
The total count of entries (bitmapped commits) in this bitmap index.
7788

@@ -216,3 +227,31 @@ Note that this hashing scheme is tied to the BITMAP_OPT_HASH_CACHE flag.
216227
If implementations want to choose a different hashing scheme, they are
217228
free to do so, but MUST allocate a new header flag (because comparing
218229
hashes made under two different schemes would be pointless).
230+
231+
Commit lookup table
232+
-------------------
233+
234+
If the BITMAP_OPT_LOOKUP_TABLE flag is set, the last `N * (4 + 8 + 4)`
235+
bytes (preceding the name-hash cache and trailing hash) of the `.bitmap`
236+
file contains a lookup table specifying the information needed to get
237+
the desired bitmap from the entries without parsing previous unnecessary
238+
bitmaps.
239+
240+
For a `.bitmap` containing `nr_entries` reachability bitmaps, the table
241+
contains a list of `nr_entries` <commit_pos, offset, xor_row> triplets
242+
(sorted in the ascending order of `commit_pos`). The content of i'th
243+
triplet is -
244+
245+
* {empty}
246+
commit_pos (4 byte integer, network byte order): ::
247+
It stores the object position of a commit (in the midx or pack
248+
index).
249+
250+
* {empty}
251+
offset (8 byte integer, network byte order): ::
252+
The offset from which that commit's bitmap can be read.
253+
254+
* {empty}
255+
xor_row (4 byte integer, network byte order): ::
256+
The position of the triplet whose bitmap is used to compress
257+
this one, or `0xffffffff` if no such bitmap exists.

builtin/multi-pack-index.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,13 @@ static int git_multi_pack_index_write_config(const char *var, const char *value,
8787
opts.flags &= ~MIDX_WRITE_BITMAP_HASH_CACHE;
8888
}
8989

90+
if (!strcmp(var, "pack.writebitmaplookuptable")) {
91+
if (git_config_bool(var, value))
92+
opts.flags |= MIDX_WRITE_BITMAP_LOOKUP_TABLE;
93+
else
94+
opts.flags &= ~MIDX_WRITE_BITMAP_LOOKUP_TABLE;
95+
}
96+
9097
/*
9198
* We should never make a fall-back call to 'git_default_config', since
9299
* this was already called in 'cmd_multi_pack_index()'.

builtin/pack-objects.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3148,6 +3148,14 @@ static int git_pack_config(const char *k, const char *v, void *cb)
31483148
else
31493149
write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
31503150
}
3151+
3152+
if (!strcmp(k, "pack.writebitmaplookuptable")) {
3153+
if (git_config_bool(k, v))
3154+
write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE;
3155+
else
3156+
write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE;
3157+
}
3158+
31513159
if (!strcmp(k, "pack.usebitmaps")) {
31523160
use_bitmap_index_default = git_config_bool(k, v);
31533161
return 0;

midx.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,9 @@ static int write_midx_bitmap(const char *midx_name,
11191119
if (flags & MIDX_WRITE_BITMAP_HASH_CACHE)
11201120
options |= BITMAP_OPT_HASH_CACHE;
11211121

1122+
if (flags & MIDX_WRITE_BITMAP_LOOKUP_TABLE)
1123+
options |= BITMAP_OPT_LOOKUP_TABLE;
1124+
11221125
/*
11231126
* Build the MIDX-order index based on pdata.objects (which is already
11241127
* in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of

midx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct multi_pack_index {
4747
#define MIDX_WRITE_REV_INDEX (1 << 1)
4848
#define MIDX_WRITE_BITMAP (1 << 2)
4949
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
50+
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)
5051

5152
const unsigned char *get_midx_checksum(struct multi_pack_index *m);
5253
void get_midx_filename(struct strbuf *out, const char *object_dir);

pack-bitmap-write.c

Lines changed: 103 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -649,28 +649,98 @@ static const struct object_id *oid_access(size_t pos, const void *table)
649649
}
650650

651651
static void write_selected_commits_v1(struct hashfile *f,
652-
struct pack_idx_entry **index,
653-
uint32_t index_nr)
652+
uint32_t *commit_positions,
653+
off_t *offsets)
654654
{
655655
int i;
656656

657657
for (i = 0; i < writer.selected_nr; ++i) {
658658
struct bitmapped_commit *stored = &writer.selected[i];
659659

660-
int commit_pos =
661-
oid_pos(&stored->commit->object.oid, index, index_nr, oid_access);
660+
if (offsets)
661+
offsets[i] = hashfile_total(f);
662662

663-
if (commit_pos < 0)
664-
BUG("trying to write commit not in index");
665-
666-
hashwrite_be32(f, commit_pos);
663+
hashwrite_be32(f, commit_positions[i]);
667664
hashwrite_u8(f, stored->xor_offset);
668665
hashwrite_u8(f, stored->flags);
669666

670667
dump_bitmap(f, stored->write_as);
671668
}
672669
}
673670

671+
static int table_cmp(const void *_va, const void *_vb, void *_data)
672+
{
673+
uint32_t *commit_positions = _data;
674+
uint32_t a = commit_positions[*(uint32_t *)_va];
675+
uint32_t b = commit_positions[*(uint32_t *)_vb];
676+
677+
if (a > b)
678+
return 1;
679+
else if (a < b)
680+
return -1;
681+
682+
return 0;
683+
}
684+
685+
static void write_lookup_table(struct hashfile *f,
686+
uint32_t *commit_positions,
687+
off_t *offsets)
688+
{
689+
uint32_t i;
690+
uint32_t *table, *table_inv;
691+
692+
ALLOC_ARRAY(table, writer.selected_nr);
693+
ALLOC_ARRAY(table_inv, writer.selected_nr);
694+
695+
for (i = 0; i < writer.selected_nr; i++)
696+
table[i] = i;
697+
698+
/*
699+
* At the end of this sort table[j] = i means that the i'th
700+
* bitmap corresponds to j'th bitmapped commit (among the selected
701+
* commits) in lex order of OIDs.
702+
*/
703+
QSORT_S(table, writer.selected_nr, table_cmp, commit_positions);
704+
705+
/* table_inv helps us discover that relationship (i'th bitmap
706+
* to j'th commit by j = table_inv[i])
707+
*/
708+
for (i = 0; i < writer.selected_nr; i++)
709+
table_inv[table[i]] = i;
710+
711+
trace2_region_enter("pack-bitmap-write", "writing_lookup_table", the_repository);
712+
for (i = 0; i < writer.selected_nr; i++) {
713+
struct bitmapped_commit *selected = &writer.selected[table[i]];
714+
uint32_t xor_offset = selected->xor_offset;
715+
uint32_t xor_row;
716+
717+
if (xor_offset) {
718+
/*
719+
* xor_index stores the index (in the bitmap entries)
720+
* of the corresponding xor bitmap. But we need to convert
721+
* this index into lookup table's index. So, table_inv[xor_index]
722+
* gives us the index position w.r.t. the lookup table.
723+
*
724+
* If "k = table[i] - xor_offset" then the xor base is the k'th
725+
* bitmap. `table_inv[k]` gives us the position of that bitmap
726+
* in the lookup table.
727+
*/
728+
uint32_t xor_index = table[i] - xor_offset;
729+
xor_row = table_inv[xor_index];
730+
} else {
731+
xor_row = 0xffffffff;
732+
}
733+
734+
hashwrite_be32(f, commit_positions[table[i]]);
735+
hashwrite_be64(f, (uint64_t)offsets[table[i]]);
736+
hashwrite_be32(f, xor_row);
737+
}
738+
trace2_region_leave("pack-bitmap-write", "writing_lookup_table", the_repository);
739+
740+
free(table);
741+
free(table_inv);
742+
}
743+
674744
static void write_hash_cache(struct hashfile *f,
675745
struct pack_idx_entry **index,
676746
uint32_t index_nr)
@@ -697,6 +767,9 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
697767
static uint16_t flags = BITMAP_OPT_FULL_DAG;
698768
struct strbuf tmp_file = STRBUF_INIT;
699769
struct hashfile *f;
770+
uint32_t *commit_positions = NULL;
771+
off_t *offsets = NULL;
772+
uint32_t i;
700773

701774
struct bitmap_disk_header header;
702775

@@ -715,7 +788,26 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
715788
dump_bitmap(f, writer.trees);
716789
dump_bitmap(f, writer.blobs);
717790
dump_bitmap(f, writer.tags);
718-
write_selected_commits_v1(f, index, index_nr);
791+
792+
if (options & BITMAP_OPT_LOOKUP_TABLE)
793+
CALLOC_ARRAY(offsets, index_nr);
794+
795+
ALLOC_ARRAY(commit_positions, writer.selected_nr);
796+
797+
for (i = 0; i < writer.selected_nr; i++) {
798+
struct bitmapped_commit *stored = &writer.selected[i];
799+
int commit_pos = oid_pos(&stored->commit->object.oid, index, index_nr, oid_access);
800+
801+
if (commit_pos < 0)
802+
BUG(_("trying to write commit not in index"));
803+
804+
commit_positions[i] = commit_pos;
805+
}
806+
807+
write_selected_commits_v1(f, commit_positions, offsets);
808+
809+
if (options & BITMAP_OPT_LOOKUP_TABLE)
810+
write_lookup_table(f, commit_positions, offsets);
719811

720812
if (options & BITMAP_OPT_HASH_CACHE)
721813
write_hash_cache(f, index, index_nr);
@@ -730,4 +822,6 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
730822
die_errno("unable to rename temporary bitmap file to '%s'", filename);
731823

732824
strbuf_release(&tmp_file);
825+
free(commit_positions);
826+
free(offsets);
733827
}

0 commit comments

Comments
 (0)