Skip to content

Commit ca0fd69

Browse files
ttaylorrgitster
authored andcommitted
pack-objects: prepare write_reused_pack_verbatim() for multi-pack reuse
The function `write_reused_pack_verbatim()` within `builtin/pack-objects.c` is responsible for writing out a continuous set of objects beginning at the start of the reuse packfile. In the existing implementation, we did something like: while (pos < reuse_packfile_bitmap->word_alloc && reuse_packfile_bitmap->words[pos] == (eword_t)~0) pos++; if (pos) /* write first `pos * BITS_IN_WORD` objects from pack */ as an optimization to record a single chunk for the longest continuous prefix of objects wanted out of the reuse pack, instead of having a chunk for each individual object. For more details, see bb514de (pack-objects: improve partial packfile reuse, 2019-12-18). In order to retain this optimization in a multi-pack reuse world, we can no longer assume that the first object in a pack is on a word boundary in the bitmap storing the set of reusable objects. Assuming that all objects from the beginning of the reuse packfile up to the object corresponding to the first bit on a word boundary are part of the result, consume whole words at a time until the last whole word belonging to the reuse packfile. Copy those objects to the resulting packfile, and track that we reused them by recording a single chunk. Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4805125 commit ca0fd69

File tree

1 file changed

+60
-13
lines changed

1 file changed

+60
-13
lines changed

builtin/pack-objects.c

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,31 +1097,78 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
10971097

10981098
static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile,
10991099
struct hashfile *out,
1100-
off_t pack_start UNUSED,
1100+
off_t pack_start,
11011101
struct pack_window **w_curs)
11021102
{
1103-
size_t pos = 0;
1103+
size_t pos = reuse_packfile->bitmap_pos;
1104+
size_t end;
11041105

1105-
while (pos < reuse_packfile_bitmap->word_alloc &&
1106-
reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1107-
pos++;
1106+
if (pos % BITS_IN_EWORD) {
1107+
size_t word_pos = (pos / BITS_IN_EWORD);
1108+
size_t offset = pos % BITS_IN_EWORD;
1109+
size_t last;
1110+
eword_t word = reuse_packfile_bitmap->words[word_pos];
11081111

1109-
if (pos) {
1110-
off_t to_write;
1112+
if (offset + reuse_packfile->bitmap_nr < BITS_IN_EWORD)
1113+
last = offset + reuse_packfile->bitmap_nr;
1114+
else
1115+
last = BITS_IN_EWORD;
1116+
1117+
for (; offset < last; offset++) {
1118+
if (word >> offset == 0)
1119+
return word_pos;
1120+
if (!bitmap_get(reuse_packfile_bitmap,
1121+
word_pos * BITS_IN_EWORD + offset))
1122+
return word_pos;
1123+
}
1124+
1125+
pos += BITS_IN_EWORD - (pos % BITS_IN_EWORD);
1126+
}
1127+
1128+
/*
1129+
* Now we're going to copy as many whole eword_t's as possible.
1130+
* "end" is the index of the last whole eword_t we copy, but
1131+
* there may be additional bits to process. Those are handled
1132+
* individually by write_reused_pack().
1133+
*
1134+
* Begin by advancing to the first word boundary in range of the
1135+
* bit positions occupied by objects in "reuse_packfile". Then
1136+
* pick the last word boundary in the same range. If we have at
1137+
* least one word's worth of bits to process, continue on.
1138+
*/
1139+
end = reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr;
1140+
if (end % BITS_IN_EWORD)
1141+
end -= end % BITS_IN_EWORD;
1142+
if (pos >= end)
1143+
return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1144+
1145+
while (pos < end &&
1146+
reuse_packfile_bitmap->words[pos / BITS_IN_EWORD] == (eword_t)~0)
1147+
pos += BITS_IN_EWORD;
1148+
1149+
if (pos > end)
1150+
pos = end;
1151+
1152+
if (reuse_packfile->bitmap_pos < pos) {
1153+
off_t pack_start_off = pack_pos_to_offset(reuse_packfile->p, 0);
1154+
off_t pack_end_off = pack_pos_to_offset(reuse_packfile->p,
1155+
pos - reuse_packfile->bitmap_pos);
11111156

1112-
written = (pos * BITS_IN_EWORD);
1113-
to_write = pack_pos_to_offset(reuse_packfile->p, written)
1114-
- sizeof(struct pack_header);
1157+
written += pos - reuse_packfile->bitmap_pos;
11151158

11161159
/* We're recording one chunk, not one object. */
1117-
record_reused_object(sizeof(struct pack_header), 0);
1160+
record_reused_object(pack_start_off,
1161+
pack_start_off - (hashfile_total(out) - pack_start));
11181162
hashflush(out);
11191163
copy_pack_data(out, reuse_packfile->p, w_curs,
1120-
sizeof(struct pack_header), to_write);
1164+
pack_start_off, pack_end_off - pack_start_off);
11211165

11221166
display_progress(progress_state, written);
11231167
}
1124-
return pos;
1168+
if (pos % BITS_IN_EWORD)
1169+
BUG("attempted to jump past a word boundary to %"PRIuMAX,
1170+
(uintmax_t)pos);
1171+
return pos / BITS_IN_EWORD;
11251172
}
11261173

11271174
static void write_reused_pack(struct bitmapped_pack *reuse_packfile,

0 commit comments

Comments
 (0)