Skip to content

Commit 09c9306

Browse files
Kjetil Barvikgitster
authored andcommitted
lstat_cache(): introduce has_symlink_or_noent_leading_path() function
In some cases, especially inside the unpack-trees.c file, and inside the verify_absent() function, we can avoid some unnecessary calls to lstat(), if the lstat_cache() function can also be told to keep track of non-existing directories. So we update the lstat_cache() function to handle this new fact, introduce a new wrapper function, and the result is that we save lots of lstat() calls for a removed directory which previously contained lots of files, when we call this new wrapper of lstat_cache() instead of the old one. We do similar changes inside the unlink_entry() function, since if we can already say that the leading directory component of a pathname does not exist, it is not necessary to try to remove a pathname below it! Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable comments to this patch! Signed-off-by: Kjetil Barvik <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 92604b4 commit 09c9306

File tree

3 files changed

+63
-36
lines changed

3 files changed

+63
-36
lines changed

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,7 @@ struct checkout {
720720

721721
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
722722
extern int has_symlink_leading_path(int len, const char *name);
723+
extern int has_symlink_or_noent_leading_path(int len, const char *name);
723724

724725
extern struct alternate_object_database {
725726
struct alternate_object_database *next;

symlinks.c

Lines changed: 60 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ static struct cache_def {
44
char path[PATH_MAX];
55
int len;
66
int flags;
7+
int track_flags;
78
} cache;
89

910
/*
@@ -30,21 +31,23 @@ static inline int longest_match_lstat_cache(int len, const char *name)
3031
return match_len;
3132
}
3233

33-
static inline void reset_lstat_cache(void)
34+
static inline void reset_lstat_cache(int track_flags)
3435
{
3536
cache.path[0] = '\0';
3637
cache.len = 0;
3738
cache.flags = 0;
39+
cache.track_flags = track_flags;
3840
}
3941

4042
#define FL_DIR (1 << 0)
41-
#define FL_SYMLINK (1 << 1)
42-
#define FL_LSTATERR (1 << 2)
43-
#define FL_ERR (1 << 3)
43+
#define FL_NOENT (1 << 1)
44+
#define FL_SYMLINK (1 << 2)
45+
#define FL_LSTATERR (1 << 3)
46+
#define FL_ERR (1 << 4)
4447

4548
/*
4649
* Check if name 'name' of length 'len' has a symlink leading
47-
* component, or if the directory exists and is real.
50+
* component, or if the directory exists and is real, or not.
4851
*
4952
* To speed up the check, some information is allowed to be cached.
5053
* This can be indicated by the 'track_flags' argument.
@@ -56,25 +59,35 @@ static int lstat_cache(int len, const char *name,
5659
int match_flags, ret_flags, save_flags, max_len;
5760
struct stat st;
5861

59-
/*
60-
* Check to see if we have a match from the cache for the
61-
* symlink path type.
62-
*/
63-
match_len = last_slash = longest_match_lstat_cache(len, name);
64-
match_flags = cache.flags & track_flags & FL_SYMLINK;
65-
if (match_flags && match_len == cache.len)
66-
return match_flags;
67-
/*
68-
* If we now have match_len > 0, we would know that the
69-
* matched part will always be a directory.
70-
*
71-
* Also, if we are tracking directories and 'name' is a
72-
* substring of the cache on a path component basis, we can
73-
* return immediately.
74-
*/
75-
match_flags = track_flags & FL_DIR;
76-
if (match_flags && len == match_len)
77-
return match_flags;
62+
if (cache.track_flags != track_flags) {
63+
/*
64+
* As a safeguard we clear the cache if the value of
65+
* track_flags does not match with the last supplied
66+
* value.
67+
*/
68+
reset_lstat_cache(track_flags);
69+
match_len = last_slash = 0;
70+
} else {
71+
/*
72+
* Check to see if we have a match from the cache for
73+
* the 2 "excluding" path types.
74+
*/
75+
match_len = last_slash = longest_match_lstat_cache(len, name);
76+
match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
77+
if (match_flags && match_len == cache.len)
78+
return match_flags;
79+
/*
80+
* If we now have match_len > 0, we would know that
81+
* the matched part will always be a directory.
82+
*
83+
* Also, if we are tracking directories and 'name' is
84+
* a substring of the cache on a path component basis,
85+
* we can return immediately.
86+
*/
87+
match_flags = track_flags & FL_DIR;
88+
if (match_flags && len == match_len)
89+
return match_flags;
90+
}
7891

7992
/*
8093
* Okay, no match from the cache so far, so now we have to
@@ -95,6 +108,8 @@ static int lstat_cache(int len, const char *name,
95108

96109
if (lstat(cache.path, &st)) {
97110
ret_flags = FL_LSTATERR;
111+
if (errno == ENOENT)
112+
ret_flags |= FL_NOENT;
98113
} else if (S_ISDIR(st.st_mode)) {
99114
last_slash_dir = last_slash;
100115
continue;
@@ -107,11 +122,11 @@ static int lstat_cache(int len, const char *name,
107122
}
108123

109124
/*
110-
* At the end update the cache. Note that max 2 different
111-
* path types, FL_SYMLINK and FL_DIR, can be cached for the
112-
* moment!
125+
* At the end update the cache. Note that max 3 different
126+
* path types, FL_NOENT, FL_SYMLINK and FL_DIR, can be cached
127+
* for the moment!
113128
*/
114-
save_flags = ret_flags & track_flags & FL_SYMLINK;
129+
save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
115130
if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
116131
cache.path[last_slash] = '\0';
117132
cache.len = last_slash;
@@ -120,20 +135,20 @@ static int lstat_cache(int len, const char *name,
120135
last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
121136
/*
122137
* We have a separate test for the directory case,
123-
* since it could be that we have found a symlink and
124-
* the track_flags says that we cannot cache this
125-
* fact, so the cache would then have been left empty
126-
* in this case.
138+
* since it could be that we have found a symlink or a
139+
* non-existing directory and the track_flags says
140+
* that we cannot cache this fact, so the cache would
141+
* then have been left empty in this case.
127142
*
128143
* But if we are allowed to track real directories, we
129144
* can still cache the path components before the last
130-
* one (the found symlink component).
145+
* one (the found symlink or non-existing component).
131146
*/
132147
cache.path[last_slash_dir] = '\0';
133148
cache.len = last_slash_dir;
134149
cache.flags = FL_DIR;
135150
} else {
136-
reset_lstat_cache();
151+
reset_lstat_cache(track_flags);
137152
}
138153
return ret_flags;
139154
}
@@ -147,3 +162,14 @@ int has_symlink_leading_path(int len, const char *name)
147162
FL_SYMLINK|FL_DIR) &
148163
FL_SYMLINK;
149164
}
165+
166+
/*
167+
* Return non-zero if path 'name' has a leading symlink component or
168+
* if some leading path component does not exists.
169+
*/
170+
int has_symlink_or_noent_leading_path(int len, const char *name)
171+
{
172+
return lstat_cache(len, name,
173+
FL_SYMLINK|FL_NOENT|FL_DIR) &
174+
(FL_SYMLINK|FL_NOENT);
175+
}

unpack-trees.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ static void unlink_entry(struct cache_entry *ce)
6161
char *cp, *prev;
6262
char *name = ce->name;
6363

64-
if (has_symlink_leading_path(ce_namelen(ce), ce->name))
64+
if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
6565
return;
6666
if (unlink(name))
6767
return;
@@ -584,7 +584,7 @@ static int verify_absent(struct cache_entry *ce, const char *action,
584584
if (o->index_only || o->reset || !o->update)
585585
return 0;
586586

587-
if (has_symlink_leading_path(ce_namelen(ce), ce->name))
587+
if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
588588
return 0;
589589

590590
if (!lstat(ce->name, &st)) {

0 commit comments

Comments
 (0)