Skip to content

Commit e39c849

Browse files
committed
Merge branch 'tb/multi-pack-reuse-dupfix' into seen
* tb/multi-pack-reuse-dupfix: pack-objects: only perform verbatim reuse on the preferred pack t5332-multi-pack-reuse.sh: demonstrate duplicate packing failure
2 parents a35aba8 + 2f02f4d commit e39c849

File tree

2 files changed

+63
-55
lines changed

2 files changed

+63
-55
lines changed

builtin/pack-objects.c

Lines changed: 40 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,78 +1109,64 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
11091109

11101110
static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile,
11111111
struct hashfile *out,
1112-
off_t pack_start,
11131112
struct pack_window **w_curs)
11141113
{
1115-
size_t pos = reuse_packfile->bitmap_pos;
1114+
size_t pos = 0;
11161115
size_t end;
11171116

1118-
if (pos % BITS_IN_EWORD) {
1119-
size_t word_pos = (pos / BITS_IN_EWORD);
1120-
size_t offset = pos % BITS_IN_EWORD;
1121-
size_t last;
1122-
eword_t word = reuse_packfile_bitmap->words[word_pos];
1123-
1124-
if (offset + reuse_packfile->bitmap_nr < BITS_IN_EWORD)
1125-
last = offset + reuse_packfile->bitmap_nr;
1126-
else
1127-
last = BITS_IN_EWORD;
1128-
1129-
for (; offset < last; offset++) {
1130-
if (word >> offset == 0)
1131-
return word_pos;
1132-
if (!bitmap_get(reuse_packfile_bitmap,
1133-
word_pos * BITS_IN_EWORD + offset))
1134-
return word_pos;
1135-
}
1136-
1137-
pos += BITS_IN_EWORD - (pos % BITS_IN_EWORD);
1117+
if (reuse_packfile->bitmap_pos) {
1118+
/*
1119+
* We can't reuse whole chunks verbatim out of
1120+
* non-preferred packs since we can't guarantee that
1121+
* all duplicate objects were resolved in favor of
1122+
* that pack.
1123+
*
1124+
* Even if we have a whole eword_t worth of bits that
1125+
* could be reused, there may be objects between the
1126+
* objects corresponding to the first and last bit of
1127+
* that word which were selected from a different
1128+
* pack, causing us to send duplicate or unwanted
1129+
* objects.
1130+
*
1131+
* Handle non-preferred packs from within
1132+
* write_reused_pack(), which inspects and reuses
1133+
* individual bits.
1134+
*/
1135+
return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
11381136
}
11391137

11401138
/*
1141-
* Now we're going to copy as many whole eword_t's as possible.
1142-
* "end" is the index of the last whole eword_t we copy, but
1143-
* there may be additional bits to process. Those are handled
1144-
* individually by write_reused_pack().
1139+
* Only read through the last word whose bits all correspond
1140+
* to objects in the given packfile, since we must stop at a
1141+
* word boundary.
11451142
*
1146-
* Begin by advancing to the first word boundary in range of the
1147-
* bit positions occupied by objects in "reuse_packfile". Then
1148-
* pick the last word boundary in the same range. If we have at
1149-
* least one word's worth of bits to process, continue on.
1143+
* If there is no whole word to read (i.e. the packfile
1144+
* contains fewer than BITS_IN_EWORD objects), then we'll
1145+
* inspect bits one-by-one in write_reused_pack().
11501146
*/
1151-
end = reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr;
1152-
if (end % BITS_IN_EWORD)
1153-
end -= end % BITS_IN_EWORD;
1154-
if (pos >= end)
1155-
return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1156-
1157-
while (pos < end &&
1158-
reuse_packfile_bitmap->words[pos / BITS_IN_EWORD] == (eword_t)~0)
1159-
pos += BITS_IN_EWORD;
1147+
end = reuse_packfile->bitmap_nr / BITS_IN_EWORD;
1148+
if (reuse_packfile_bitmap->word_alloc < end)
1149+
BUG("fewer words than expected in reuse_packfile_bitmap");
11601150

1161-
if (pos > end)
1162-
pos = end;
1151+
while (pos < end && reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1152+
pos++;
11631153

1164-
if (reuse_packfile->bitmap_pos < pos) {
1165-
off_t pack_start_off = pack_pos_to_offset(reuse_packfile->p, 0);
1166-
off_t pack_end_off = pack_pos_to_offset(reuse_packfile->p,
1167-
pos - reuse_packfile->bitmap_pos);
1154+
if (pos) {
1155+
off_t to_write;
11681156

1169-
written += pos - reuse_packfile->bitmap_pos;
1157+
written = (pos * BITS_IN_EWORD);
1158+
to_write = pack_pos_to_offset(reuse_packfile->p, written)
1159+
- sizeof(struct pack_header);
11701160

11711161
/* We're recording one chunk, not one object. */
1172-
record_reused_object(pack_start_off,
1173-
pack_start_off - (hashfile_total(out) - pack_start));
1162+
record_reused_object(sizeof(struct pack_header), 0);
11741163
hashflush(out);
11751164
copy_pack_data(out, reuse_packfile->p, w_curs,
1176-
pack_start_off, pack_end_off - pack_start_off);
1165+
sizeof(struct pack_header), to_write);
11771166

11781167
display_progress(progress_state, written);
11791168
}
1180-
if (pos % BITS_IN_EWORD)
1181-
BUG("attempted to jump past a word boundary to %"PRIuMAX,
1182-
(uintmax_t)pos);
1183-
return pos / BITS_IN_EWORD;
1169+
return pos;
11841170
}
11851171

11861172
static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
@@ -1192,8 +1178,7 @@ static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
11921178
struct pack_window *w_curs = NULL;
11931179

11941180
if (allow_ofs_delta)
1195-
i = write_reused_pack_verbatim(reuse_packfile, f, pack_start,
1196-
&w_curs);
1181+
i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs);
11971182

11981183
for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
11991184
eword_t word = reuse_packfile_bitmap->words[i];

t/t5332-multi-pack-reuse.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,4 +258,27 @@ test_expect_success 'duplicate objects' '
258258
)
259259
'
260260

261+
test_expect_success 'duplicate objects with verbatim reuse' '
262+
git init duplicate-objects-verbatim &&
263+
(
264+
cd duplicate-objects-verbatim &&
265+
266+
git config pack.allowPackReuse multi &&
267+
268+
test_commit_bulk 64 &&
269+
270+
# take the first object from the main pack...
271+
git show-index <$(ls $packdir/pack-*.idx) >obj.raw &&
272+
sort -nk1 <obj.raw | head -n1 | cut -d" " -f2 >in &&
273+
274+
# ...and create a separate pack containing just that object
275+
p="$(git pack-objects $packdir/pack <in)" &&
276+
git show-index <$packdir/pack-$p.idx &&
277+
278+
git multi-pack-index write --bitmap --preferred-pack=pack-$p.idx &&
279+
280+
test_pack_objects_reused_all 192 2
281+
)
282+
'
283+
261284
test_done

0 commit comments

Comments
 (0)