Skip to content

Commit 0dcb8d7

Browse files
pcloudsgitster
authored andcommitted
untracked cache: record .gitignore information and dir hierarchy
The idea is if we can capture all input and (non-rescursive) output of read_directory_recursive(), and can verify later that all the input is the same, then the second r_d_r() should produce the same output as in the first run. The requirement for this to work is stat info of a directory MUST change if an entry is added to or removed from that directory (and should not change often otherwise). If your OS and filesystem do not meet this requirement, untracked cache is not for you. Most file systems on *nix should be fine. On Windows, NTFS is fine while FAT may not be [1] even though FAT on Linux seems to be fine. The list of input of r_d_r() is in the big comment block in dir.h. In short, the output of a directory (not counting subdirs) mainly depends on stat info of the directory in question, all .gitignore leading to it and the check_only flag when r_d_r() is called recursively. This patch records all this info (and the output) as r_d_r() runs. Two hash_sha1_file() are required for $GIT_DIR/info/exclude and core.excludesfile unless their stat data matches. hash_sha1_file() is only needed when .gitignore files in the worktree are modified, otherwise their SHA-1 in index is used (see the previous patch). We could store stat data for .gitignore files so we don't have to rehash them if their content is different from index, but I think .gitignore files are rarely modified, so not worth extra cache data (and hashing penalty read-cache.c:verify_hdr(), as we will be storing this as an index extension). The implication is, if you change .gitignore, you better add it to the index soon or you lose all the benefit of untracked cache because a modified .gitignore invalidates all subdirs recursively. This is especially bad for .gitignore at root. This cached output is about untracked files only, not ignored files because the number of tracked files is usually small, so small cache overhead, while the number of ignored files could go really high (e.g. *.o files mixing with source code). [1] "Description of NTFS date and time stamps for files and folders" http://support.microsoft.com/kb/299648 Helped-by: Torsten Bögershausen <[email protected]> Helped-by: David Turner <[email protected]> Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 55fe6f5 commit 0dcb8d7

File tree

2 files changed

+183
-19
lines changed

2 files changed

+183
-19
lines changed

dir.c

Lines changed: 123 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ enum path_treatment {
3232
};
3333

3434
static enum path_treatment read_directory_recursive(struct dir_struct *dir,
35-
const char *path, int len,
35+
const char *path, int len, struct untracked_cache_dir *untracked,
3636
int check_only, const struct path_simplify *simplify);
3737
static int get_dtype(struct dirent *de, const char *path, int len);
3838

@@ -534,6 +534,54 @@ static void trim_trailing_spaces(char *buf)
534534
*last_space = '\0';
535535
}
536536

537+
/*
538+
* Given a subdirectory name and "dir" of the current directory,
539+
* search the subdir in "dir" and return it, or create a new one if it
540+
* does not exist in "dir".
541+
*
542+
* If "name" has the trailing slash, it'll be excluded in the search.
543+
*/
544+
static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
545+
struct untracked_cache_dir *dir,
546+
const char *name, int len)
547+
{
548+
int first, last;
549+
struct untracked_cache_dir *d;
550+
if (!dir)
551+
return NULL;
552+
if (len && name[len - 1] == '/')
553+
len--;
554+
first = 0;
555+
last = dir->dirs_nr;
556+
while (last > first) {
557+
int cmp, next = (last + first) >> 1;
558+
d = dir->dirs[next];
559+
cmp = strncmp(name, d->name, len);
560+
if (!cmp && strlen(d->name) > len)
561+
cmp = -1;
562+
if (!cmp)
563+
return d;
564+
if (cmp < 0) {
565+
last = next;
566+
continue;
567+
}
568+
first = next+1;
569+
}
570+
571+
uc->dir_created++;
572+
d = xmalloc(sizeof(*d) + len + 1);
573+
memset(d, 0, sizeof(*d));
574+
memcpy(d->name, name, len);
575+
d->name[len] = '\0';
576+
577+
ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
578+
memmove(dir->dirs + first + 1, dir->dirs + first,
579+
(dir->dirs_nr - first) * sizeof(*dir->dirs));
580+
dir->dirs_nr++;
581+
dir->dirs[first] = d;
582+
return d;
583+
}
584+
537585
/*
538586
* Given a file with name "fname", read it (either from disk, or from
539587
* the index if "check_index" is non-zero), parse it and store the
@@ -646,14 +694,20 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir,
646694
/*
647695
* Used to set up core.excludesfile and .git/info/exclude lists.
648696
*/
649-
void add_excludes_from_file(struct dir_struct *dir, const char *fname)
697+
static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
698+
struct sha1_stat *sha1_stat)
650699
{
651700
struct exclude_list *el;
652701
el = add_exclude_list(dir, EXC_FILE, fname);
653-
if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0)
702+
if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
654703
die("cannot use %s as an exclude file", fname);
655704
}
656705

706+
void add_excludes_from_file(struct dir_struct *dir, const char *fname)
707+
{
708+
add_excludes_from_file_1(dir, fname, NULL);
709+
}
710+
657711
int match_basename(const char *basename, int basenamelen,
658712
const char *pattern, int prefix, int patternlen,
659713
int flags)
@@ -828,6 +882,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
828882
struct exclude_list_group *group;
829883
struct exclude_list *el;
830884
struct exclude_stack *stk = NULL;
885+
struct untracked_cache_dir *untracked;
831886
int current;
832887

833888
group = &dir->exclude_list_group[EXC_DIRS];
@@ -865,8 +920,14 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
865920
/* Read from the parent directories and push them down. */
866921
current = stk ? stk->baselen : -1;
867922
strbuf_setlen(&dir->basebuf, current < 0 ? 0 : current);
923+
if (dir->untracked)
924+
untracked = stk ? stk->ucd : dir->untracked->root;
925+
else
926+
untracked = NULL;
927+
868928
while (current < baselen) {
869929
const char *cp;
930+
struct sha1_stat sha1_stat;
870931

871932
stk = xcalloc(1, sizeof(*stk));
872933
if (current < 0) {
@@ -877,10 +938,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
877938
if (!cp)
878939
die("oops in prep_exclude");
879940
cp++;
941+
untracked =
942+
lookup_untracked(dir->untracked, untracked,
943+
base + current,
944+
cp - base - current);
880945
}
881946
stk->prev = dir->exclude_stack;
882947
stk->baselen = cp - base;
883948
stk->exclude_ix = group->nr;
949+
stk->ucd = untracked;
884950
el = add_exclude_list(dir, EXC_DIRS, NULL);
885951
strbuf_add(&dir->basebuf, base + current, stk->baselen - current);
886952
assert(stk->baselen == dir->basebuf.len);
@@ -903,6 +969,8 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
903969
}
904970

905971
/* Try to read per-directory file */
972+
hashclr(sha1_stat.sha1);
973+
sha1_stat.valid = 0;
906974
if (dir->exclude_per_dir) {
907975
/*
908976
* dir->basebuf gets reused by the traversal, but we
@@ -916,8 +984,11 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
916984
strbuf_addbuf(&sb, &dir->basebuf);
917985
strbuf_addstr(&sb, dir->exclude_per_dir);
918986
el->src = strbuf_detach(&sb, NULL);
919-
add_excludes_from_file_to_list(el->src, el->src,
920-
stk->baselen, el, 1);
987+
add_excludes(el->src, el->src, stk->baselen, el, 1,
988+
untracked ? &sha1_stat : NULL);
989+
}
990+
if (untracked) {
991+
hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
921992
}
922993
dir->exclude_stack = stk;
923994
current = stk->baselen;
@@ -1098,6 +1169,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len)
10981169
* (c) otherwise, we recurse into it.
10991170
*/
11001171
static enum path_treatment treat_directory(struct dir_struct *dir,
1172+
struct untracked_cache_dir *untracked,
11011173
const char *dirname, int len, int exclude,
11021174
const struct path_simplify *simplify)
11031175
{
@@ -1125,7 +1197,9 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
11251197
if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
11261198
return exclude ? path_excluded : path_untracked;
11271199

1128-
return read_directory_recursive(dir, dirname, len, 1, simplify);
1200+
untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
1201+
return read_directory_recursive(dir, dirname, len,
1202+
untracked, 1, simplify);
11291203
}
11301204

11311205
/*
@@ -1241,6 +1315,7 @@ static int get_dtype(struct dirent *de, const char *path, int len)
12411315
}
12421316

12431317
static enum path_treatment treat_one_path(struct dir_struct *dir,
1318+
struct untracked_cache_dir *untracked,
12441319
struct strbuf *path,
12451320
const struct path_simplify *simplify,
12461321
int dtype, struct dirent *de)
@@ -1293,7 +1368,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
12931368
return path_none;
12941369
case DT_DIR:
12951370
strbuf_addch(path, '/');
1296-
return treat_directory(dir, path->buf, path->len, exclude,
1371+
return treat_directory(dir, untracked, path->buf, path->len, exclude,
12971372
simplify);
12981373
case DT_REG:
12991374
case DT_LNK:
@@ -1302,6 +1377,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
13021377
}
13031378

13041379
static enum path_treatment treat_path(struct dir_struct *dir,
1380+
struct untracked_cache_dir *untracked,
13051381
struct dirent *de,
13061382
struct strbuf *path,
13071383
int baselen,
@@ -1317,7 +1393,16 @@ static enum path_treatment treat_path(struct dir_struct *dir,
13171393
return path_none;
13181394

13191395
dtype = DTYPE(de);
1320-
return treat_one_path(dir, path, simplify, dtype, de);
1396+
return treat_one_path(dir, untracked, path, simplify, dtype, de);
1397+
}
1398+
1399+
static void add_untracked(struct untracked_cache_dir *dir, const char *name)
1400+
{
1401+
if (!dir)
1402+
return;
1403+
ALLOC_GROW(dir->untracked, dir->untracked_nr + 1,
1404+
dir->untracked_alloc);
1405+
dir->untracked[dir->untracked_nr++] = xstrdup(name);
13211406
}
13221407

13231408
/*
@@ -1333,7 +1418,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
13331418
*/
13341419
static enum path_treatment read_directory_recursive(struct dir_struct *dir,
13351420
const char *base, int baselen,
1336-
int check_only,
1421+
struct untracked_cache_dir *untracked, int check_only,
13371422
const struct path_simplify *simplify)
13381423
{
13391424
DIR *fdir;
@@ -1347,24 +1432,36 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
13471432
if (!fdir)
13481433
goto out;
13491434

1435+
if (untracked)
1436+
untracked->check_only = !!check_only;
1437+
13501438
while ((de = readdir(fdir)) != NULL) {
13511439
/* check how the file or directory should be treated */
1352-
state = treat_path(dir, de, &path, baselen, simplify);
1440+
state = treat_path(dir, untracked, de, &path, baselen, simplify);
1441+
13531442
if (state > dir_state)
13541443
dir_state = state;
13551444

13561445
/* recurse into subdir if instructed by treat_path */
13571446
if (state == path_recurse) {
1358-
subdir_state = read_directory_recursive(dir, path.buf,
1359-
path.len, check_only, simplify);
1447+
struct untracked_cache_dir *ud;
1448+
ud = lookup_untracked(dir->untracked, untracked,
1449+
path.buf + baselen,
1450+
path.len - baselen);
1451+
subdir_state =
1452+
read_directory_recursive(dir, path.buf, path.len,
1453+
ud, check_only, simplify);
13601454
if (subdir_state > dir_state)
13611455
dir_state = subdir_state;
13621456
}
13631457

13641458
if (check_only) {
13651459
/* abort early if maximum state has been reached */
1366-
if (dir_state == path_untracked)
1460+
if (dir_state == path_untracked) {
1461+
if (untracked)
1462+
add_untracked(untracked, path.buf + baselen);
13671463
break;
1464+
}
13681465
/* skip the dir_add_* part */
13691466
continue;
13701467
}
@@ -1382,8 +1479,11 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
13821479
break;
13831480

13841481
case path_untracked:
1385-
if (!(dir->flags & DIR_SHOW_IGNORED))
1386-
dir_add_name(dir, path.buf, path.len);
1482+
if (dir->flags & DIR_SHOW_IGNORED)
1483+
break;
1484+
dir_add_name(dir, path.buf, path.len);
1485+
if (untracked)
1486+
add_untracked(untracked, path.buf + baselen);
13871487
break;
13881488

13891489
default:
@@ -1460,7 +1560,7 @@ static int treat_leading_path(struct dir_struct *dir,
14601560
break;
14611561
if (simplify_away(sb.buf, sb.len, simplify))
14621562
break;
1463-
if (treat_one_path(dir, &sb, simplify,
1563+
if (treat_one_path(dir, NULL, &sb, simplify,
14641564
DT_DIR, NULL) == path_none)
14651565
break; /* do not recurse into it */
14661566
if (len <= baselen) {
@@ -1500,7 +1600,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
15001600
*/
15011601
simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
15021602
if (!len || treat_leading_path(dir, path, len, simplify))
1503-
read_directory_recursive(dir, path, len, 0, simplify);
1603+
read_directory_recursive(dir, path, len,
1604+
dir->untracked ? dir->untracked->root : NULL,
1605+
0, simplify);
15041606
free_simplify(simplify);
15051607
qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
15061608
qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
@@ -1671,9 +1773,11 @@ void setup_standard_excludes(struct dir_struct *dir)
16711773
excludes_file = xdg_path;
16721774
}
16731775
if (!access_or_warn(path, R_OK, 0))
1674-
add_excludes_from_file(dir, path);
1776+
add_excludes_from_file_1(dir, path,
1777+
dir->untracked ? &dir->ss_info_exclude : NULL);
16751778
if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
1676-
add_excludes_from_file(dir, excludes_file);
1779+
add_excludes_from_file_1(dir, excludes_file,
1780+
dir->untracked ? &dir->ss_excludes_file : NULL);
16771781
}
16781782

16791783
int remove_path(const char *name)

dir.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ struct exclude_stack {
6666
struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
6767
int baselen;
6868
int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */
69+
struct untracked_cache_dir *ucd;
6970
};
7071

7172
struct exclude_list_group {
@@ -79,6 +80,60 @@ struct sha1_stat {
7980
int valid;
8081
};
8182

83+
/*
84+
* Untracked cache
85+
*
86+
* The following inputs are sufficient to determine what files in a
87+
* directory are excluded:
88+
*
89+
* - The list of files and directories of the directory in question
90+
* - The $GIT_DIR/index
91+
* - dir_struct flags
92+
* - The content of $GIT_DIR/info/exclude
93+
* - The content of core.excludesfile
94+
* - The content (or the lack) of .gitignore of all parent directories
95+
* from $GIT_WORK_TREE
96+
* - The check_only flag in read_directory_recursive (for
97+
* DIR_HIDE_EMPTY_DIRECTORIES)
98+
*
99+
* The first input can be checked using directory mtime. In many
100+
* filesystems, directory mtime (stat_data field) is updated when its
101+
* files or direct subdirs are added or removed.
102+
*
103+
* The second one can be hooked from cache_tree_invalidate_path().
104+
* Whenever a file (or a submodule) is added or removed from a
105+
* directory, we invalidate that directory.
106+
*
107+
* The remaining inputs are easy, their SHA-1 could be used to verify
108+
* their contents (exclude_sha1[], info_exclude_sha1[] and
109+
* excludes_file_sha1[])
110+
*/
111+
struct untracked_cache_dir {
112+
struct untracked_cache_dir **dirs;
113+
char **untracked;
114+
struct stat_data stat_data;
115+
unsigned int untracked_alloc, dirs_nr, dirs_alloc;
116+
unsigned int untracked_nr;
117+
unsigned int check_only : 1;
118+
/* null SHA-1 means this directory does not have .gitignore */
119+
unsigned char exclude_sha1[20];
120+
char name[FLEX_ARRAY];
121+
};
122+
123+
struct untracked_cache {
124+
struct sha1_stat ss_info_exclude;
125+
struct sha1_stat ss_excludes_file;
126+
const char *exclude_per_dir;
127+
/*
128+
* dir_struct#flags must match dir_flags or the untracked
129+
* cache is ignored.
130+
*/
131+
unsigned dir_flags;
132+
struct untracked_cache_dir *root;
133+
/* Statistics */
134+
int dir_created;
135+
};
136+
82137
struct dir_struct {
83138
int nr, alloc;
84139
int ignored_nr, ignored_alloc;
@@ -126,6 +181,11 @@ struct dir_struct {
126181
struct exclude_stack *exclude_stack;
127182
struct exclude *exclude;
128183
struct strbuf basebuf;
184+
185+
/* Enable untracked file cache if set */
186+
struct untracked_cache *untracked;
187+
struct sha1_stat ss_info_exclude;
188+
struct sha1_stat ss_excludes_file;
129189
};
130190

131191
/*

0 commit comments

Comments
 (0)