Skip to content

Commit bad4a54

Browse files
Kjetil Barvikgitster
authored andcommitted
lstat_cache(): introduce has_dirs_only_path() function
The create_directories() function in entry.c currently calls stat() or lstat() for each path component of the pathname 'path' each and every time. For the 'git checkout' command, this function is called on each file for which we must do an update (ce->ce_flags & CE_UPDATE), so we get lots and lots of calls. To fix this, we make a new wrapper to the lstat_cache() function, and call the wrapper function instead of the calls to the stat() or the lstat() functions. Since the paths given to the create_directories() function, is sorted alphabetically, the new wrapper would be very cache effective in this situation. To support it we must update the lstat_cache() function to be able to say that "please test the complete length of 'name'", and also to give it the length of a prefix, where the cache should use the stat() function instead of the lstat() function to test each path component. Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable comments to this patch! Signed-off-by: Kjetil Barvik <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 09c9306 commit bad4a54

File tree

3 files changed

+60
-39
lines changed

3 files changed

+60
-39
lines changed

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,7 @@ struct checkout {
721721
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
722722
extern int has_symlink_leading_path(int len, const char *name);
723723
extern int has_symlink_or_noent_leading_path(int len, const char *name);
724+
extern int has_dirs_only_path(int len, const char *name, int prefix_len);
724725

725726
extern struct alternate_object_database {
726727
struct alternate_object_database *next;

entry.c

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,25 @@ static void create_directories(const char *path, const struct checkout *state)
88
const char *slash = path;
99

1010
while ((slash = strchr(slash+1, '/')) != NULL) {
11-
struct stat st;
12-
int stat_status;
13-
1411
len = slash - path;
1512
memcpy(buf, path, len);
1613
buf[len] = 0;
1714

18-
if (len <= state->base_dir_len)
19-
/*
20-
* checkout-index --prefix=<dir>; <dir> is
21-
* allowed to be a symlink to an existing
22-
* directory.
23-
*/
24-
stat_status = stat(buf, &st);
25-
else
26-
/*
27-
* if there currently is a symlink, we would
28-
* want to replace it with a real directory.
29-
*/
30-
stat_status = lstat(buf, &st);
31-
32-
if (!stat_status && S_ISDIR(st.st_mode))
15+
/*
16+
* For 'checkout-index --prefix=<dir>', <dir> is
17+
* allowed to be a symlink to an existing directory,
18+
* and we set 'state->base_dir_len' below, such that
19+
* we test the path components of the prefix with the
20+
* stat() function instead of the lstat() function.
21+
*/
22+
if (has_dirs_only_path(len, buf, state->base_dir_len))
3323
continue; /* ok, it is already a directory. */
3424

3525
/*
36-
* We know stat_status == 0 means something exists
37-
* there and this mkdir would fail, but that is an
38-
* error codepath; we do not care, as we unlink and
39-
* mkdir again in such a case.
26+
* If this mkdir() would fail, it could be that there
27+
* is already a symlink or something else exists
28+
* there, therefore we then try to unlink it and try
29+
* one more time to create the directory.
4030
*/
4131
if (mkdir(buf, 0777)) {
4232
if (errno == EEXIST && state->force &&

symlinks.c

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#include "cache.h"
22

33
static struct cache_def {
4-
char path[PATH_MAX];
4+
char path[PATH_MAX + 1];
55
int len;
66
int flags;
77
int track_flags;
8+
int prefix_len_stat_func;
89
} cache;
910

1011
/*
@@ -31,41 +32,49 @@ static inline int longest_match_lstat_cache(int len, const char *name)
3132
return match_len;
3233
}
3334

34-
static inline void reset_lstat_cache(int track_flags)
35+
static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func)
3536
{
3637
cache.path[0] = '\0';
3738
cache.len = 0;
3839
cache.flags = 0;
3940
cache.track_flags = track_flags;
41+
cache.prefix_len_stat_func = prefix_len_stat_func;
4042
}
4143

4244
#define FL_DIR (1 << 0)
4345
#define FL_NOENT (1 << 1)
4446
#define FL_SYMLINK (1 << 2)
4547
#define FL_LSTATERR (1 << 3)
4648
#define FL_ERR (1 << 4)
49+
#define FL_FULLPATH (1 << 5)
4750

4851
/*
4952
* Check if name 'name' of length 'len' has a symlink leading
5053
* component, or if the directory exists and is real, or not.
5154
*
5255
* To speed up the check, some information is allowed to be cached.
53-
* This can be indicated by the 'track_flags' argument.
56+
* This can be indicated by the 'track_flags' argument, which also can
57+
* be used to indicate that we should check the full path.
58+
*
59+
* The 'prefix_len_stat_func' parameter can be used to set the length
60+
* of the prefix, where the cache should use the stat() function
61+
* instead of the lstat() function to test each path component.
5462
*/
5563
static int lstat_cache(int len, const char *name,
56-
int track_flags)
64+
int track_flags, int prefix_len_stat_func)
5765
{
5866
int match_len, last_slash, last_slash_dir;
59-
int match_flags, ret_flags, save_flags, max_len;
67+
int match_flags, ret_flags, save_flags, max_len, ret;
6068
struct stat st;
6169

62-
if (cache.track_flags != track_flags) {
70+
if (cache.track_flags != track_flags ||
71+
cache.prefix_len_stat_func != prefix_len_stat_func) {
6372
/*
64-
* As a safeguard we clear the cache if the value of
65-
* track_flags does not match with the last supplied
66-
* value.
73+
* As a safeguard we clear the cache if the values of
74+
* track_flags and/or prefix_len_stat_func does not
75+
* match with the last supplied values.
6776
*/
68-
reset_lstat_cache(track_flags);
77+
reset_lstat_cache(track_flags, prefix_len_stat_func);
6978
match_len = last_slash = 0;
7079
} else {
7180
/*
@@ -101,12 +110,17 @@ static int lstat_cache(int len, const char *name,
101110
cache.path[match_len] = name[match_len];
102111
match_len++;
103112
} while (match_len < max_len && name[match_len] != '/');
104-
if (match_len >= max_len)
113+
if (match_len >= max_len && !(track_flags & FL_FULLPATH))
105114
break;
106115
last_slash = match_len;
107116
cache.path[last_slash] = '\0';
108117

109-
if (lstat(cache.path, &st)) {
118+
if (last_slash <= prefix_len_stat_func)
119+
ret = stat(cache.path, &st);
120+
else
121+
ret = lstat(cache.path, &st);
122+
123+
if (ret) {
110124
ret_flags = FL_LSTATERR;
111125
if (errno == ENOENT)
112126
ret_flags |= FL_NOENT;
@@ -127,12 +141,12 @@ static int lstat_cache(int len, const char *name,
127141
* for the moment!
128142
*/
129143
save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
130-
if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
144+
if (save_flags && last_slash > 0 && last_slash <= PATH_MAX) {
131145
cache.path[last_slash] = '\0';
132146
cache.len = last_slash;
133147
cache.flags = save_flags;
134148
} else if (track_flags & FL_DIR &&
135-
last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
149+
last_slash_dir > 0 && last_slash_dir <= PATH_MAX) {
136150
/*
137151
* We have a separate test for the directory case,
138152
* since it could be that we have found a symlink or a
@@ -148,18 +162,20 @@ static int lstat_cache(int len, const char *name,
148162
cache.len = last_slash_dir;
149163
cache.flags = FL_DIR;
150164
} else {
151-
reset_lstat_cache(track_flags);
165+
reset_lstat_cache(track_flags, prefix_len_stat_func);
152166
}
153167
return ret_flags;
154168
}
155169

170+
#define USE_ONLY_LSTAT 0
171+
156172
/*
157173
* Return non-zero if path 'name' has a leading symlink component
158174
*/
159175
int has_symlink_leading_path(int len, const char *name)
160176
{
161177
return lstat_cache(len, name,
162-
FL_SYMLINK|FL_DIR) &
178+
FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) &
163179
FL_SYMLINK;
164180
}
165181

@@ -170,6 +186,20 @@ int has_symlink_leading_path(int len, const char *name)
170186
int has_symlink_or_noent_leading_path(int len, const char *name)
171187
{
172188
return lstat_cache(len, name,
173-
FL_SYMLINK|FL_NOENT|FL_DIR) &
189+
FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) &
174190
(FL_SYMLINK|FL_NOENT);
175191
}
192+
193+
/*
194+
* Return non-zero if all path components of 'name' exists as a
195+
* directory. If prefix_len > 0, we will test with the stat()
196+
* function instead of the lstat() function for a prefix length of
197+
* 'prefix_len', thus we then allow for symlinks in the prefix part as
198+
* long as those points to real existing directories.
199+
*/
200+
int has_dirs_only_path(int len, const char *name, int prefix_len)
201+
{
202+
return lstat_cache(len, name,
203+
FL_DIR|FL_FULLPATH, prefix_len) &
204+
FL_DIR;
205+
}

0 commit comments

Comments
 (0)