Skip to content

Commit 91a2288

Browse files
pcloudsgitster
authored andcommitted
untracked cache: record/validate dir mtime and reuse cached output
The main readdir loop in read_directory_recursive() is replaced with a new one that checks if cached results of a directory is still valid. If a file is added or removed from the index, the containing directory is invalidated (but not its subdirs). If directory's mtime is changed, the same happens. If a .gitignore is updated, the containing directory and all subdirs are invalidated recursively. If dir_struct#flags or other conditions change, the cache is ignored. If a directory is invalidated, we opendir/readdir/closedir and run the exclude machinery on that directory listing as usual. If untracked cache is also enabled, we'll update the cache along the way. If a directory is validated, we simply pull the untracked listing out from the cache. The cache also records the list of direct subdirs that we have to recurse in. Fully excluded directories are seen as "untracked files". In the best case when no dirs are invalidated, read_directory() becomes a series of stat(dir), open(.gitignore), fstat(), read(), close() and optionally hash_sha1_file() For comparison, standard read_directory() is a sequence of opendir(), readdir(), open(.gitignore), fstat(), read(), close(), the expensive last_exclude_matching() and closedir(). We already try not to open(.gitignore) if we know it does not exist, so open/fstat/read/close sequence does not apply to every directory. The sequence could be reduced further, as noted in prep_exclude() in another patch. So in theory, the entire best-case read_directory sequence could be reduced to a series of stat() and nothing else. This is not a silver bullet approach. When you compile a C file, for example, the old .o file is removed and a new one with the same name created, effectively invalidating the containing directory's cache (but not its subdirectories). If your build process touches every directory, this cache adds extra overhead for nothing, so it's a good idea to separate generated files from tracked files.. Editors may use the same strategy for saving files. And of course you're out of luck running your repo on an unsupported filesystem and/or operating system. Helped-by: Eric Sunshine <[email protected]> Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent cf7c614 commit 91a2288

File tree

2 files changed

+121
-2
lines changed

2 files changed

+121
-2
lines changed

dir.c

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@ enum path_treatment {
3737
struct cached_dir {
3838
DIR *fdir;
3939
struct untracked_cache_dir *untracked;
40+
int nr_files;
41+
int nr_dirs;
42+
4043
struct dirent *de;
44+
const char *file;
45+
struct untracked_cache_dir *ucd;
4146
};
4247

4348
static enum path_treatment read_directory_recursive(struct dir_struct *dir,
@@ -607,6 +612,14 @@ static void invalidate_gitignore(struct untracked_cache *uc,
607612
do_invalidate_gitignore(dir);
608613
}
609614

615+
static void invalidate_directory(struct untracked_cache *uc,
616+
struct untracked_cache_dir *dir)
617+
{
618+
uc->dir_invalidated++;
619+
dir->valid = 0;
620+
dir->untracked_nr = 0;
621+
}
622+
610623
/*
611624
* Given a file with name "fname", read it (either from disk, or from
612625
* the index if "check_index" is non-zero), parse it and store the
@@ -1425,6 +1438,39 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
14251438
}
14261439
}
14271440

1441+
static enum path_treatment treat_path_fast(struct dir_struct *dir,
1442+
struct untracked_cache_dir *untracked,
1443+
struct cached_dir *cdir,
1444+
struct strbuf *path,
1445+
int baselen,
1446+
const struct path_simplify *simplify)
1447+
{
1448+
strbuf_setlen(path, baselen);
1449+
if (!cdir->ucd) {
1450+
strbuf_addstr(path, cdir->file);
1451+
return path_untracked;
1452+
}
1453+
strbuf_addstr(path, cdir->ucd->name);
1454+
/* treat_one_path() does this before it calls treat_directory() */
1455+
if (path->buf[path->len - 1] != '/')
1456+
strbuf_addch(path, '/');
1457+
if (cdir->ucd->check_only)
1458+
/*
1459+
* check_only is set as a result of treat_directory() getting
1460+
* to its bottom. Verify again the same set of directories
1461+
* with check_only set.
1462+
*/
1463+
return read_directory_recursive(dir, path->buf, path->len,
1464+
cdir->ucd, 1, simplify);
1465+
/*
1466+
* We get path_recurse in the first run when
1467+
* directory_exists_in_index() returns index_nonexistent. We
1468+
* are sure that new changes in the index does not impact the
1469+
* outcome. Return now.
1470+
*/
1471+
return path_recurse;
1472+
}
1473+
14281474
static enum path_treatment treat_path(struct dir_struct *dir,
14291475
struct untracked_cache_dir *untracked,
14301476
struct cached_dir *cdir,
@@ -1435,6 +1481,9 @@ static enum path_treatment treat_path(struct dir_struct *dir,
14351481
int dtype;
14361482
struct dirent *de = cdir->de;
14371483

1484+
if (!de)
1485+
return treat_path_fast(dir, untracked, cdir, path,
1486+
baselen, simplify);
14381487
if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
14391488
return path_none;
14401489
strbuf_setlen(path, baselen);
@@ -1455,6 +1504,52 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
14551504
dir->untracked[dir->untracked_nr++] = xstrdup(name);
14561505
}
14571506

1507+
static int valid_cached_dir(struct dir_struct *dir,
1508+
struct untracked_cache_dir *untracked,
1509+
struct strbuf *path,
1510+
int check_only)
1511+
{
1512+
struct stat st;
1513+
1514+
if (!untracked)
1515+
return 0;
1516+
1517+
if (stat(path->len ? path->buf : ".", &st)) {
1518+
invalidate_directory(dir->untracked, untracked);
1519+
memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
1520+
return 0;
1521+
}
1522+
if (!untracked->valid ||
1523+
match_stat_data(&untracked->stat_data, &st)) {
1524+
if (untracked->valid)
1525+
invalidate_directory(dir->untracked, untracked);
1526+
fill_stat_data(&untracked->stat_data, &st);
1527+
return 0;
1528+
}
1529+
1530+
if (untracked->check_only != !!check_only) {
1531+
invalidate_directory(dir->untracked, untracked);
1532+
return 0;
1533+
}
1534+
1535+
/*
1536+
* prep_exclude will be called eventually on this directory,
1537+
* but it's called much later in last_exclude_matching(). We
1538+
* need it now to determine the validity of the cache for this
1539+
* path. The next calls will be nearly no-op, the way
1540+
* prep_exclude() is designed.
1541+
*/
1542+
if (path->len && path->buf[path->len - 1] != '/') {
1543+
strbuf_addch(path, '/');
1544+
prep_exclude(dir, path->buf, path->len);
1545+
strbuf_setlen(path, path->len - 1);
1546+
} else
1547+
prep_exclude(dir, path->buf, path->len);
1548+
1549+
/* hopefully prep_exclude() haven't invalidated this entry... */
1550+
return untracked->valid;
1551+
}
1552+
14581553
static int open_cached_dir(struct cached_dir *cdir,
14591554
struct dir_struct *dir,
14601555
struct untracked_cache_dir *untracked,
@@ -1463,7 +1558,11 @@ static int open_cached_dir(struct cached_dir *cdir,
14631558
{
14641559
memset(cdir, 0, sizeof(*cdir));
14651560
cdir->untracked = untracked;
1561+
if (valid_cached_dir(dir, untracked, path, check_only))
1562+
return 0;
14661563
cdir->fdir = opendir(path->len ? path->buf : ".");
1564+
if (dir->untracked)
1565+
dir->untracked->dir_opened++;
14671566
if (!cdir->fdir)
14681567
return -1;
14691568
return 0;
@@ -1477,13 +1576,31 @@ static int read_cached_dir(struct cached_dir *cdir)
14771576
return -1;
14781577
return 0;
14791578
}
1579+
while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
1580+
struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
1581+
cdir->ucd = d;
1582+
cdir->nr_dirs++;
1583+
return 0;
1584+
}
1585+
cdir->ucd = NULL;
1586+
if (cdir->nr_files < cdir->untracked->untracked_nr) {
1587+
struct untracked_cache_dir *d = cdir->untracked;
1588+
cdir->file = d->untracked[cdir->nr_files++];
1589+
return 0;
1590+
}
14801591
return -1;
14811592
}
14821593

14831594
static void close_cached_dir(struct cached_dir *cdir)
14841595
{
14851596
if (cdir->fdir)
14861597
closedir(cdir->fdir);
1598+
/*
1599+
* We have gone through this directory and found no untracked
1600+
* entries. Mark it valid.
1601+
*/
1602+
if (cdir->untracked)
1603+
cdir->untracked->valid = 1;
14871604
}
14881605

14891606
/*
@@ -1537,7 +1654,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
15371654
if (check_only) {
15381655
/* abort early if maximum state has been reached */
15391656
if (dir_state == path_untracked) {
1540-
if (untracked)
1657+
if (cdir.fdir)
15411658
add_untracked(untracked, path.buf + baselen);
15421659
break;
15431660
}
@@ -1561,7 +1678,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
15611678
if (dir->flags & DIR_SHOW_IGNORED)
15621679
break;
15631680
dir_add_name(dir, path.buf, path.len);
1564-
if (untracked)
1681+
if (cdir.fdir)
15651682
add_untracked(untracked, path.buf + baselen);
15661683
break;
15671684

dir.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ struct untracked_cache {
135135
/* Statistics */
136136
int dir_created;
137137
int gitignore_invalidated;
138+
int dir_invalidated;
139+
int dir_opened;
138140
};
139141

140142
struct dir_struct {

0 commit comments

Comments
 (0)