Skip to content

Commit 6c9cd16

Browse files
committed
read-cache.c: read prefix-compressed names in index on-disk version v4
Because the entries are sorted by path, adjacent entries in the index tend to share the leading components of them, and it makes sense to only store the differences in later entries. In the v4 on-disk format of the index, each on-disk cache entry stores the number of bytes to be stripped from the end of the previous name, and the bytes to append to the result, to come up with its name. Signed-off-by: Junio C Hamano <[email protected]>
1 parent f136f7b commit 6c9cd16

File tree

1 file changed

+51
-7
lines changed

1 file changed

+51
-7
lines changed

read-cache.c

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include "commit.h"
1313
#include "blob.h"
1414
#include "resolve-undo.h"
15+
#include "strbuf.h"
16+
#include "varint.h"
1517

1618
static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
1719

@@ -1236,6 +1238,7 @@ struct ondisk_cache_entry_extended {
12361238
char name[FLEX_ARRAY]; /* more */
12371239
};
12381240

1241+
/* These are only used for v3 or lower */
12391242
#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
12401243
#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
12411244
#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
@@ -1252,7 +1255,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
12521255
if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
12531256
return error("bad signature");
12541257
hdr_version = ntohl(hdr->hdr_version);
1255-
if (hdr_version < 2 || 3 < hdr_version)
1258+
if (hdr_version < 2 || 4 < hdr_version)
12561259
return error("bad index version %d", hdr_version);
12571260
git_SHA1_Init(&c);
12581261
git_SHA1_Update(&c, hdr, size - 20);
@@ -1331,8 +1334,30 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
13311334
return ce;
13321335
}
13331336

1337+
/*
1338+
* Adjacent cache entries tend to share the leading paths, so it makes
1339+
* sense to only store the differences in later entries. In the v4
1340+
* on-disk format of the index, each on-disk cache entry stores the
1341+
* number of bytes to be stripped from the end of the previous name,
1342+
* and the bytes to append to the result, to come up with its name.
1343+
*/
1344+
static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
1345+
{
1346+
const unsigned char *ep, *cp = (const unsigned char *)cp_;
1347+
size_t len = decode_varint(&cp);
1348+
1349+
if (name->len < len)
1350+
die("malformed name field in the index");
1351+
strbuf_remove(name, name->len - len, len);
1352+
for (ep = cp; *ep; ep++)
1353+
; /* find the end */
1354+
strbuf_add(name, cp, ep - cp);
1355+
return (const char *)ep + 1 - cp_;
1356+
}
1357+
13341358
static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
1335-
unsigned long *ent_size)
1359+
unsigned long *ent_size,
1360+
struct strbuf *previous_name)
13361361
{
13371362
struct cache_entry *ce;
13381363
size_t len;
@@ -1357,10 +1382,22 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
13571382
else
13581383
name = ondisk->name;
13591384

1360-
if (len == CE_NAMEMASK)
1361-
len = strlen(name);
1362-
ce = cache_entry_from_ondisk(ondisk, flags, name, len);
1363-
*ent_size = ondisk_ce_size(ce);
1385+
if (!previous_name) {
1386+
/* v3 and earlier */
1387+
if (len == CE_NAMEMASK)
1388+
len = strlen(name);
1389+
ce = cache_entry_from_ondisk(ondisk, flags, name, len);
1390+
1391+
*ent_size = ondisk_ce_size(ce);
1392+
} else {
1393+
unsigned long consumed;
1394+
consumed = expand_name_field(previous_name, name);
1395+
ce = cache_entry_from_ondisk(ondisk, flags,
1396+
previous_name->buf,
1397+
previous_name->len);
1398+
1399+
*ent_size = (name - ((char *)ondisk)) + consumed;
1400+
}
13641401
return ce;
13651402
}
13661403

@@ -1373,6 +1410,7 @@ int read_index_from(struct index_state *istate, const char *path)
13731410
struct cache_header *hdr;
13741411
void *mmap;
13751412
size_t mmap_size;
1413+
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
13761414

13771415
errno = EBUSY;
13781416
if (istate->initialized)
@@ -1410,18 +1448,24 @@ int read_index_from(struct index_state *istate, const char *path)
14101448
istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
14111449
istate->initialized = 1;
14121450

1451+
if (hdr->hdr_version == htonl(4))
1452+
previous_name = &previous_name_buf;
1453+
else
1454+
previous_name = NULL;
1455+
14131456
src_offset = sizeof(*hdr);
14141457
for (i = 0; i < istate->cache_nr; i++) {
14151458
struct ondisk_cache_entry *disk_ce;
14161459
struct cache_entry *ce;
14171460
unsigned long consumed;
14181461

14191462
disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
1420-
ce = create_from_disk(disk_ce, &consumed);
1463+
ce = create_from_disk(disk_ce, &consumed, previous_name);
14211464
set_index_entry(istate, i, ce);
14221465

14231466
src_offset += consumed;
14241467
}
1468+
strbuf_release(&previous_name_buf);
14251469
istate->timestamp.sec = st.st_mtime;
14261470
istate->timestamp.nsec = ST_MTIME_NSEC(st);
14271471

0 commit comments

Comments
 (0)