Skip to content

Commit a14aebe

Browse files
committed
Merge branch 'jk/packfile-reuse-cleanup'
The way "git pack-objects" reuses objects stored in existing pack to generate its result has been improved. * jk/packfile-reuse-cleanup: pack-bitmap: don't rely on bitmap_git->reuse_objects pack-objects: add checks for duplicate objects pack-objects: improve partial packfile reuse builtin/pack-objects: introduce obj_is_packed() pack-objects: introduce pack.allowPackReuse csum-file: introduce hashfile_total() pack-bitmap: simplify bitmap_has_oid_in_uninteresting() pack-bitmap: uninteresting oid can be outside bitmapped packfile pack-bitmap: introduce bitmap_walk_contains() ewah/bitmap: introduce bitmap_word_alloc() packfile: expose get_delta_base() builtin/pack-objects: report reused packfile objects
2 parents daef1b3 + d2ea031 commit a14aebe

File tree

9 files changed

+362
-122
lines changed

9 files changed

+362
-122
lines changed

Documentation/config/pack.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ Note that changing the compression level will not automatically recompress
2727
all existing objects. You can force recompression by passing the -F option
2828
to linkgit:git-repack[1].
2929

30+
pack.allowPackReuse::
31+
When true, and when reachability bitmaps are enabled,
32+
pack-objects will try to send parts of the bitmapped packfile
33+
verbatim. This can reduce memory and CPU usage to serve fetches,
34+
but might result in sending a slightly larger pack. Defaults to
35+
true.
36+
3037
pack.island::
3138
An extended regular expression configuring a set of delta
3239
islands. See "DELTA ISLANDS" in linkgit:git-pack-objects[1]

builtin/pack-objects.c

Lines changed: 194 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,11 @@ static struct progress *progress_state;
9292

9393
static struct packed_git *reuse_packfile;
9494
static uint32_t reuse_packfile_objects;
95-
static off_t reuse_packfile_offset;
95+
static struct bitmap *reuse_packfile_bitmap;
9696

9797
static int use_bitmap_index_default = 1;
9898
static int use_bitmap_index = -1;
99+
static int allow_pack_reuse = 1;
99100
static enum {
100101
WRITE_BITMAP_FALSE = 0,
101102
WRITE_BITMAP_QUIET,
@@ -784,57 +785,185 @@ static struct object_entry **compute_write_order(void)
784785
return wo;
785786
}
786787

787-
static off_t write_reused_pack(struct hashfile *f)
788+
789+
/*
790+
* A reused set of objects. All objects in a chunk have the same
791+
* relative position in the original packfile and the generated
792+
* packfile.
793+
*/
794+
795+
static struct reused_chunk {
796+
/* The offset of the first object of this chunk in the original
797+
* packfile. */
798+
off_t original;
799+
/* The offset of the first object of this chunk in the generated
800+
* packfile minus "original". */
801+
off_t difference;
802+
} *reused_chunks;
803+
static int reused_chunks_nr;
804+
static int reused_chunks_alloc;
805+
806+
static void record_reused_object(off_t where, off_t offset)
807+
{
808+
if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
809+
return;
810+
811+
ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
812+
reused_chunks_alloc);
813+
reused_chunks[reused_chunks_nr].original = where;
814+
reused_chunks[reused_chunks_nr].difference = offset;
815+
reused_chunks_nr++;
816+
}
817+
818+
/*
819+
* Binary search to find the chunk that "where" is in. Note
820+
* that we're not looking for an exact match, just the first
821+
* chunk that contains it (which implicitly ends at the start
822+
* of the next chunk.
823+
*/
824+
static off_t find_reused_offset(off_t where)
825+
{
826+
int lo = 0, hi = reused_chunks_nr;
827+
while (lo < hi) {
828+
int mi = lo + ((hi - lo) / 2);
829+
if (where == reused_chunks[mi].original)
830+
return reused_chunks[mi].difference;
831+
if (where < reused_chunks[mi].original)
832+
hi = mi;
833+
else
834+
lo = mi + 1;
835+
}
836+
837+
/*
838+
* The first chunk starts at zero, so we can't have gone below
839+
* there.
840+
*/
841+
assert(lo);
842+
return reused_chunks[lo-1].difference;
843+
}
844+
845+
static void write_reused_pack_one(size_t pos, struct hashfile *out,
846+
struct pack_window **w_curs)
788847
{
789-
unsigned char buffer[8192];
790-
off_t to_write, total;
791-
int fd;
848+
off_t offset, next, cur;
849+
enum object_type type;
850+
unsigned long size;
792851

793-
if (!is_pack_valid(reuse_packfile))
794-
die(_("packfile is invalid: %s"), reuse_packfile->pack_name);
852+
offset = reuse_packfile->revindex[pos].offset;
853+
next = reuse_packfile->revindex[pos + 1].offset;
795854

796-
fd = git_open(reuse_packfile->pack_name);
797-
if (fd < 0)
798-
die_errno(_("unable to open packfile for reuse: %s"),
799-
reuse_packfile->pack_name);
855+
record_reused_object(offset, offset - hashfile_total(out));
800856

801-
if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
802-
die_errno(_("unable to seek in reused packfile"));
857+
cur = offset;
858+
type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
859+
assert(type >= 0);
803860

804-
if (reuse_packfile_offset < 0)
805-
reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
861+
if (type == OBJ_OFS_DELTA) {
862+
off_t base_offset;
863+
off_t fixup;
864+
865+
unsigned char header[MAX_PACK_OBJECT_HEADER];
866+
unsigned len;
867+
868+
base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
869+
assert(base_offset != 0);
870+
871+
/* Convert to REF_DELTA if we must... */
872+
if (!allow_ofs_delta) {
873+
int base_pos = find_revindex_position(reuse_packfile, base_offset);
874+
const unsigned char *base_sha1 =
875+
nth_packed_object_sha1(reuse_packfile,
876+
reuse_packfile->revindex[base_pos].nr);
877+
878+
len = encode_in_pack_object_header(header, sizeof(header),
879+
OBJ_REF_DELTA, size);
880+
hashwrite(out, header, len);
881+
hashwrite(out, base_sha1, 20);
882+
copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
883+
return;
884+
}
806885

807-
total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
886+
/* Otherwise see if we need to rewrite the offset... */
887+
fixup = find_reused_offset(offset) -
888+
find_reused_offset(base_offset);
889+
if (fixup) {
890+
unsigned char ofs_header[10];
891+
unsigned i, ofs_len;
892+
off_t ofs = offset - base_offset - fixup;
808893

809-
while (to_write) {
810-
int read_pack = xread(fd, buffer, sizeof(buffer));
894+
len = encode_in_pack_object_header(header, sizeof(header),
895+
OBJ_OFS_DELTA, size);
811896

812-
if (read_pack <= 0)
813-
die_errno(_("unable to read from reused packfile"));
897+
i = sizeof(ofs_header) - 1;
898+
ofs_header[i] = ofs & 127;
899+
while (ofs >>= 7)
900+
ofs_header[--i] = 128 | (--ofs & 127);
814901

815-
if (read_pack > to_write)
816-
read_pack = to_write;
902+
ofs_len = sizeof(ofs_header) - i;
817903

818-
hashwrite(f, buffer, read_pack);
819-
to_write -= read_pack;
904+
hashwrite(out, header, len);
905+
hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
906+
copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
907+
return;
908+
}
909+
910+
/* ...otherwise we have no fixup, and can write it verbatim */
911+
}
912+
913+
copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
914+
}
915+
916+
static size_t write_reused_pack_verbatim(struct hashfile *out,
917+
struct pack_window **w_curs)
918+
{
919+
size_t pos = 0;
920+
921+
while (pos < reuse_packfile_bitmap->word_alloc &&
922+
reuse_packfile_bitmap->words[pos] == (eword_t)~0)
923+
pos++;
924+
925+
if (pos) {
926+
off_t to_write;
927+
928+
written = (pos * BITS_IN_EWORD);
929+
to_write = reuse_packfile->revindex[written].offset
930+
- sizeof(struct pack_header);
931+
932+
/* We're recording one chunk, not one object. */
933+
record_reused_object(sizeof(struct pack_header), 0);
934+
hashflush(out);
935+
copy_pack_data(out, reuse_packfile, w_curs,
936+
sizeof(struct pack_header), to_write);
820937

821-
/*
822-
* We don't know the actual number of objects written,
823-
* only how many bytes written, how many bytes total, and
824-
* how many objects total. So we can fake it by pretending all
825-
* objects we are writing are the same size. This gives us a
826-
* smooth progress meter, and at the end it matches the true
827-
* answer.
828-
*/
829-
written = reuse_packfile_objects *
830-
(((double)(total - to_write)) / total);
831938
display_progress(progress_state, written);
832939
}
940+
return pos;
941+
}
942+
943+
static void write_reused_pack(struct hashfile *f)
944+
{
945+
size_t i = 0;
946+
uint32_t offset;
947+
struct pack_window *w_curs = NULL;
948+
949+
if (allow_ofs_delta)
950+
i = write_reused_pack_verbatim(f, &w_curs);
951+
952+
for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
953+
eword_t word = reuse_packfile_bitmap->words[i];
954+
size_t pos = (i * BITS_IN_EWORD);
833955

834-
close(fd);
835-
written = reuse_packfile_objects;
836-
display_progress(progress_state, written);
837-
return reuse_packfile_offset - sizeof(struct pack_header);
956+
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
957+
if ((word >> offset) == 0)
958+
break;
959+
960+
offset += ewah_bit_ctz64(word >> offset);
961+
write_reused_pack_one(pos + offset, f, &w_curs);
962+
display_progress(progress_state, ++written);
963+
}
964+
}
965+
966+
unuse_pack(&w_curs);
838967
}
839968

840969
static const char no_split_warning[] = N_(
@@ -867,11 +996,9 @@ static void write_pack_file(void)
867996
offset = write_pack_header(f, nr_remaining);
868997

869998
if (reuse_packfile) {
870-
off_t packfile_size;
871999
assert(pack_to_stdout);
872-
873-
packfile_size = write_reused_pack(f);
874-
offset += packfile_size;
1000+
write_reused_pack(f);
1001+
offset = hashfile_total(f);
8751002
}
8761003

8771004
nr_written = 0;
@@ -1000,6 +1127,10 @@ static int have_duplicate_entry(const struct object_id *oid,
10001127
{
10011128
struct object_entry *entry;
10021129

1130+
if (reuse_packfile_bitmap &&
1131+
bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1132+
return 1;
1133+
10031134
entry = packlist_find(&to_pack, oid);
10041135
if (!entry)
10051136
return 0;
@@ -2552,6 +2683,13 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size,
25522683
free(p);
25532684
}
25542685

2686+
static int obj_is_packed(const struct object_id *oid)
2687+
{
2688+
return packlist_find(&to_pack, oid) ||
2689+
(reuse_packfile_bitmap &&
2690+
bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
2691+
}
2692+
25552693
static void add_tag_chain(const struct object_id *oid)
25562694
{
25572695
struct tag *tag;
@@ -2563,7 +2701,7 @@ static void add_tag_chain(const struct object_id *oid)
25632701
* it was included via bitmaps, we would not have parsed it
25642702
* previously).
25652703
*/
2566-
if (packlist_find(&to_pack, oid))
2704+
if (obj_is_packed(oid))
25672705
return;
25682706

25692707
tag = lookup_tag(the_repository, oid);
@@ -2587,7 +2725,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag,
25872725

25882726
if (starts_with(path, "refs/tags/") && /* is a tag? */
25892727
!peel_ref(path, &peeled) && /* peelable? */
2590-
packlist_find(&to_pack, &peeled)) /* object packed? */
2728+
obj_is_packed(&peeled)) /* object packed? */
25912729
add_tag_chain(oid);
25922730
return 0;
25932731
}
@@ -2655,6 +2793,7 @@ static void prepare_pack(int window, int depth)
26552793

26562794
if (nr_deltas && n > 1) {
26572795
unsigned nr_done = 0;
2796+
26582797
if (progress)
26592798
progress_state = start_progress(_("Compressing objects"),
26602799
nr_deltas);
@@ -2699,6 +2838,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
26992838
use_bitmap_index_default = git_config_bool(k, v);
27002839
return 0;
27012840
}
2841+
if (!strcmp(k, "pack.allowpackreuse")) {
2842+
allow_pack_reuse = git_config_bool(k, v);
2843+
return 0;
2844+
}
27022845
if (!strcmp(k, "pack.threads")) {
27032846
delta_search_threads = git_config_int(k, v);
27042847
if (delta_search_threads < 0)
@@ -3030,8 +3173,8 @@ static void loosen_unused_packed_objects(void)
30303173
*/
30313174
static int pack_options_allow_reuse(void)
30323175
{
3033-
return pack_to_stdout &&
3034-
allow_ofs_delta &&
3176+
return allow_pack_reuse &&
3177+
pack_to_stdout &&
30353178
!ignore_packed_keep_on_disk &&
30363179
!ignore_packed_keep_in_core &&
30373180
(!local || !have_non_local_packs) &&
@@ -3048,7 +3191,7 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
30483191
bitmap_git,
30493192
&reuse_packfile,
30503193
&reuse_packfile_objects,
3051-
&reuse_packfile_offset)) {
3194+
&reuse_packfile_bitmap)) {
30523195
assert(reuse_packfile_objects);
30533196
nr_result += reuse_packfile_objects;
30543197
display_progress(progress_state, nr_result);
@@ -3509,7 +3652,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
35093652
if (progress)
35103653
fprintf_ln(stderr,
35113654
_("Total %"PRIu32" (delta %"PRIu32"),"
3512-
" reused %"PRIu32" (delta %"PRIu32")"),
3513-
written, written_delta, reused, reused_delta);
3655+
" reused %"PRIu32" (delta %"PRIu32"),"
3656+
" pack-reused %"PRIu32),
3657+
written, written_delta, reused, reused_delta,
3658+
reuse_packfile_objects);
35143659
return 0;
35153660
}

csum-file.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ void hashflush(struct hashfile *f);
4242
void crc32_begin(struct hashfile *);
4343
uint32_t crc32_end(struct hashfile *);
4444

45+
/*
46+
* Returns the total number of bytes fed to the hashfile so far (including ones
47+
* that have not been written out to the descriptor yet).
48+
*/
49+
static inline off_t hashfile_total(struct hashfile *f)
50+
{
51+
return f->total + f->offset;
52+
}
53+
4554
static inline void hashwrite_u8(struct hashfile *f, uint8_t data)
4655
{
4756
hashwrite(f, &data, sizeof(data));

0 commit comments

Comments
 (0)