Skip to content

Commit 5aaa7fd

Browse files
jamillgitster
authored andcommitted
Improve performance of git status --ignored
Improve the performance of the directory listing logic when it wants to list non-empty ignored directories. In order to show non-empty ignored directories, the existing logic will recursively iterate through all contents of an ignored directory. This change introduces the optimization to stop iterating through the contents once it finds the first file. This can have a significant improvement in 'git status --ignored' performance in repositories with a large number of files in ignored directories. For an example of the performance difference on an example repository with 196,000 files in 400 ignored directories: | Command | Time (s) | | -------------------------- | --------- | | git status | 1.2 | | git status --ignored (old) | 3.9 | | git status --ignored (new) | 1.4 | Signed-off-by: Jameson Miller <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 94c9fd2 commit 5aaa7fd

File tree

1 file changed

+41
-6
lines changed

1 file changed

+41
-6
lines changed

dir.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct cached_dir {
4949
static enum path_treatment read_directory_recursive(struct dir_struct *dir,
5050
struct index_state *istate, const char *path, int len,
5151
struct untracked_cache_dir *untracked,
52-
int check_only, const struct pathspec *pathspec);
52+
int check_only, int stop_at_first_file, const struct pathspec *pathspec);
5353
static int get_dtype(struct dirent *de, struct index_state *istate,
5454
const char *path, int len);
5555

@@ -1404,8 +1404,13 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
14041404

14051405
untracked = lookup_untracked(dir->untracked, untracked,
14061406
dirname + baselen, len - baselen);
1407+
1408+
/*
1409+
* If this is an excluded directory, then we only need to check if
1410+
* the directory contains any files.
1411+
*/
14071412
return read_directory_recursive(dir, istate, dirname, len,
1408-
untracked, 1, pathspec);
1413+
untracked, 1, exclude, pathspec);
14091414
}
14101415

14111416
/*
@@ -1633,7 +1638,7 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
16331638
* with check_only set.
16341639
*/
16351640
return read_directory_recursive(dir, istate, path->buf, path->len,
1636-
cdir->ucd, 1, pathspec);
1641+
cdir->ucd, 1, 0, pathspec);
16371642
/*
16381643
* We get path_recurse in the first run when
16391644
* directory_exists_in_index() returns index_nonexistent. We
@@ -1793,12 +1798,20 @@ static void close_cached_dir(struct cached_dir *cdir)
17931798
* Also, we ignore the name ".git" (even if it is not a directory).
17941799
* That likely will not change.
17951800
*
1801+
* If 'stop_at_first_file' is specified, 'path_excluded' is returned
1802+
* to signal that a file was found. This is the least significant value that
1803+
* indicates that a file was encountered that does not depend on the order of
1804+
* whether an untracked or exluded path was encountered first.
1805+
*
17961806
* Returns the most significant path_treatment value encountered in the scan.
1807+
* If 'stop_at_first_file' is specified, `path_excluded` is the most
1808+
* significant path_treatment value that will be returned.
17971809
*/
1810+
17981811
static enum path_treatment read_directory_recursive(struct dir_struct *dir,
17991812
struct index_state *istate, const char *base, int baselen,
18001813
struct untracked_cache_dir *untracked, int check_only,
1801-
const struct pathspec *pathspec)
1814+
int stop_at_first_file, const struct pathspec *pathspec)
18021815
{
18031816
struct cached_dir cdir;
18041817
enum path_treatment state, subdir_state, dir_state = path_none;
@@ -1832,12 +1845,34 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
18321845
subdir_state =
18331846
read_directory_recursive(dir, istate, path.buf,
18341847
path.len, ud,
1835-
check_only, pathspec);
1848+
check_only, stop_at_first_file, pathspec);
18361849
if (subdir_state > dir_state)
18371850
dir_state = subdir_state;
18381851
}
18391852

18401853
if (check_only) {
1854+
if (stop_at_first_file) {
1855+
/*
1856+
* If stopping at first file, then
1857+
* signal that a file was found by
1858+
* returning `path_excluded`. This is
1859+
* to return a consistent value
1860+
* regardless of whether an ignored or
1861+
* excluded file happened to be
1862+
* encountered 1st.
1863+
*
1864+
* In current usage, the
1865+
* `stop_at_first_file` is passed when
1866+
* an ancestor directory has matched
1867+
* an exclude pattern, so any found
1868+
* files will be excluded.
1869+
*/
1870+
if (dir_state >= path_excluded) {
1871+
dir_state = path_excluded;
1872+
break;
1873+
}
1874+
}
1875+
18411876
/* abort early if maximum state has been reached */
18421877
if (dir_state == path_untracked) {
18431878
if (cdir.fdir)
@@ -2108,7 +2143,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
21082143
*/
21092144
dir->untracked = NULL;
21102145
if (!len || treat_leading_path(dir, istate, path, len, pathspec))
2111-
read_directory_recursive(dir, istate, path, len, untracked, 0, pathspec);
2146+
read_directory_recursive(dir, istate, path, len, untracked, 0, 0, pathspec);
21122147
QSORT(dir->entries, dir->nr, cmp_dir_entry);
21132148
QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry);
21142149

0 commit comments

Comments
 (0)