Skip to content

Commit 074b2ee

Browse files
Martin KoeglerJunio C Hamano
authored andcommitted
git-pack-objects: cache small deltas between big objects
Creating deltas between big blobs is a CPU and memory intensive task. In the writing phase, all (not reused) deltas are redone. This patch adds support for caching deltas from the deltifing phase, so that that the writing phase is faster. The caching is limited to small deltas to avoid increasing memory usage very much. The implemented limit is (memory needed to create the delta)/1024. Signed-off-by: Martin Koegler <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent a588d88 commit 074b2ee

File tree

2 files changed

+59
-15
lines changed

2 files changed

+59
-15
lines changed

Documentation/config.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,11 @@ pack.compression::
567567
slowest. If not set, defaults to core.compression. If that is
568568
not set, defaults to -1.
569569

570+
pack.deltaCacheSize::
571+
The maxium memory in bytes used for caching deltas in
572+
gitlink:git-pack-objects[1].
573+
A value of 0 means no limit. Defaults to 0.
574+
570575
pull.octopus::
571576
The default merge strategy to use when pulling multiple branches
572577
at once.

builtin-pack-objects.c

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct object_entry {
3636
struct object_entry *delta_sibling; /* other deltified objects who
3737
* uses the same base as me
3838
*/
39+
void *delta_data; /* cached delta (uncompressed) */
3940
unsigned long delta_size; /* delta data size (uncompressed) */
4041
enum object_type type;
4142
enum object_type in_pack_type; /* could be delta */
@@ -76,6 +77,9 @@ static struct progress progress_state;
7677
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
7778
static int pack_compression_seen;
7879

80+
static unsigned long delta_cache_size = 0;
81+
static unsigned long max_delta_cache_size = 0;
82+
7983
/*
8084
* The object names in objects array are hashed with this hashtable,
8185
* to help looking up the entry by object name.
@@ -405,24 +409,31 @@ static unsigned long write_object(struct sha1file *f,
405409
z_stream stream;
406410
unsigned long maxsize;
407411
void *out;
408-
buf = read_sha1_file(entry->sha1, &type, &size);
409-
if (!buf)
410-
die("unable to read %s", sha1_to_hex(entry->sha1));
411-
if (size != entry->size)
412-
die("object %s size inconsistency (%lu vs %lu)",
413-
sha1_to_hex(entry->sha1), size, entry->size);
414-
if (usable_delta) {
415-
buf = delta_against(buf, size, entry);
412+
if (entry->delta_data && usable_delta) {
413+
buf = entry->delta_data;
416414
size = entry->delta_size;
417415
obj_type = (allow_ofs_delta && entry->delta->offset) ?
418416
OBJ_OFS_DELTA : OBJ_REF_DELTA;
419417
} else {
420-
/*
421-
* recover real object type in case
422-
* check_object() wanted to re-use a delta,
423-
* but we couldn't since base was in previous split pack
424-
*/
425-
obj_type = type;
418+
buf = read_sha1_file(entry->sha1, &type, &size);
419+
if (!buf)
420+
die("unable to read %s", sha1_to_hex(entry->sha1));
421+
if (size != entry->size)
422+
die("object %s size inconsistency (%lu vs %lu)",
423+
sha1_to_hex(entry->sha1), size, entry->size);
424+
if (usable_delta) {
425+
buf = delta_against(buf, size, entry);
426+
size = entry->delta_size;
427+
obj_type = (allow_ofs_delta && entry->delta->offset) ?
428+
OBJ_OFS_DELTA : OBJ_REF_DELTA;
429+
} else {
430+
/*
431+
* recover real object type in case
432+
* check_object() wanted to re-use a delta,
433+
* but we couldn't since base was in previous split pack
434+
*/
435+
obj_type = type;
436+
}
426437
}
427438
/* compress the data to store and put compressed length in datalen */
428439
memset(&stream, 0, sizeof(stream));
@@ -1385,6 +1396,20 @@ struct unpacked {
13851396
struct delta_index *index;
13861397
};
13871398

1399+
static int delta_cacheable(struct unpacked *trg, struct unpacked *src,
1400+
unsigned long src_size, unsigned long trg_size,
1401+
unsigned long delta_size)
1402+
{
1403+
if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
1404+
return 0;
1405+
1406+
/* cache delta, if objects are large enough compared to delta size */
1407+
if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
1408+
return 1;
1409+
1410+
return 0;
1411+
}
1412+
13881413
/*
13891414
* We search for deltas _backwards_ in a list sorted by type and
13901415
* by size, so that we see progressively smaller and smaller files.
@@ -1466,10 +1491,20 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
14661491
if (!delta_buf)
14671492
return 0;
14681493

1494+
if (trg_entry->delta_data) {
1495+
delta_cache_size -= trg_entry->delta_size;
1496+
free(trg_entry->delta_data);
1497+
}
1498+
trg_entry->delta_data = 0;
14691499
trg_entry->delta = src_entry;
14701500
trg_entry->delta_size = delta_size;
14711501
trg_entry->depth = src_entry->depth + 1;
1472-
free(delta_buf);
1502+
1503+
if (delta_cacheable(src, trg, src_size, trg_size, delta_size)) {
1504+
trg_entry->delta_data = xrealloc(delta_buf, delta_size);
1505+
delta_cache_size += trg_entry->delta_size;
1506+
} else
1507+
free(delta_buf);
14731508
return 1;
14741509
}
14751510

@@ -1615,6 +1650,10 @@ static int git_pack_config(const char *k, const char *v)
16151650
pack_compression_seen = 1;
16161651
return 0;
16171652
}
1653+
if (!strcmp(k, "pack.deltacachesize")) {
1654+
max_delta_cache_size = git_config_int(k, v);
1655+
return 0;
1656+
}
16181657
return git_default_config(k, v);
16191658
}
16201659

0 commit comments

Comments
 (0)