Skip to content

Commit 4e09cf2

Browse files
dturner-twgitster
authored andcommitted
path: optimize common dir checking
Instead of a linear search over common_list to check whether a path is common, use a trie. The trie search operates on path prefixes, and handles excludes. Signed-off-by: David Turner <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0701530 commit 4e09cf2

File tree

2 files changed

+214
-14
lines changed

2 files changed

+214
-14
lines changed

path.c

Lines changed: 213 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,25 +121,224 @@ static struct common_dir common_list[] = {
121121
{ 0, 0, 0, NULL }
122122
};
123123

124-
static void update_common_dir(struct strbuf *buf, int git_dir_len)
124+
/*
125+
* A compressed trie. A trie node consists of zero or more characters that
126+
* are common to all elements with this prefix, optionally followed by some
127+
* children. If value is not NULL, the trie node is a terminal node.
128+
*
129+
* For example, consider the following set of strings:
130+
* abc
131+
* def
132+
* definite
133+
* definition
134+
*
135+
* The trie would look look like:
136+
* root: len = 0, children a and d non-NULL, value = NULL.
137+
* a: len = 2, contents = bc, value = (data for "abc")
138+
* d: len = 2, contents = ef, children i non-NULL, value = (data for "def")
139+
* i: len = 3, contents = nit, children e and i non-NULL, value = NULL
140+
* e: len = 0, children all NULL, value = (data for "definite")
141+
* i: len = 2, contents = on, children all NULL,
142+
* value = (data for "definition")
143+
*/
144+
struct trie {
145+
struct trie *children[256];
146+
int len;
147+
char *contents;
148+
void *value;
149+
};
150+
151+
static struct trie *make_trie_node(const char *key, void *value)
125152
{
126-
char *base = buf->buf + git_dir_len;
127-
const struct common_dir *p;
153+
struct trie *new_node = xcalloc(1, sizeof(*new_node));
154+
new_node->len = strlen(key);
155+
if (new_node->len) {
156+
new_node->contents = xmalloc(new_node->len);
157+
memcpy(new_node->contents, key, new_node->len);
158+
}
159+
new_node->value = value;
160+
return new_node;
161+
}
128162

129-
if (is_dir_file(base, "logs", "HEAD") ||
130-
is_dir_file(base, "info", "sparse-checkout"))
131-
return; /* keep this in $GIT_DIR */
132-
for (p = common_list; p->dirname; p++) {
133-
const char *path = p->dirname;
134-
if (p->is_dir && dir_prefix(base, path)) {
135-
replace_dir(buf, git_dir_len, get_git_common_dir());
136-
return;
163+
/*
164+
* Add a key/value pair to a trie. The key is assumed to be \0-terminated.
165+
* If there was an existing value for this key, return it.
166+
*/
167+
static void *add_to_trie(struct trie *root, const char *key, void *value)
168+
{
169+
struct trie *child;
170+
void *old;
171+
int i;
172+
173+
if (!*key) {
174+
/* we have reached the end of the key */
175+
old = root->value;
176+
root->value = value;
177+
return old;
178+
}
179+
180+
for (i = 0; i < root->len; i++) {
181+
if (root->contents[i] == key[i])
182+
continue;
183+
184+
/*
185+
* Split this node: child will contain this node's
186+
* existing children.
187+
*/
188+
child = malloc(sizeof(*child));
189+
memcpy(child->children, root->children, sizeof(root->children));
190+
191+
child->len = root->len - i - 1;
192+
if (child->len) {
193+
child->contents = xstrndup(root->contents + i + 1,
194+
child->len);
137195
}
138-
if (!p->is_dir && !strcmp(base, path)) {
139-
replace_dir(buf, git_dir_len, get_git_common_dir());
140-
return;
196+
child->value = root->value;
197+
root->value = NULL;
198+
root->len = i;
199+
200+
memset(root->children, 0, sizeof(root->children));
201+
root->children[(unsigned char)root->contents[i]] = child;
202+
203+
/* This is the newly-added child. */
204+
root->children[(unsigned char)key[i]] =
205+
make_trie_node(key + i + 1, value);
206+
return NULL;
207+
}
208+
209+
/* We have matched the entire compressed section */
210+
if (key[i]) {
211+
child = root->children[(unsigned char)key[root->len]];
212+
if (child) {
213+
return add_to_trie(child, key + root->len + 1, value);
214+
} else {
215+
child = make_trie_node(key + root->len + 1, value);
216+
root->children[(unsigned char)key[root->len]] = child;
217+
return NULL;
141218
}
142219
}
220+
221+
old = root->value;
222+
root->value = value;
223+
return old;
224+
}
225+
226+
typedef int (*match_fn)(const char *unmatched, void *data, void *baton);
227+
228+
/*
229+
* Search a trie for some key. Find the longest /-or-\0-terminated
230+
* prefix of the key for which the trie contains a value. Call fn
231+
* with the unmatched portion of the key and the found value, and
232+
* return its return value. If there is no such prefix, return -1.
233+
*
234+
* The key is partially normalized: consecutive slashes are skipped.
235+
*
236+
* For example, consider the trie containing only [refs,
237+
* refs/worktree] (both with values).
238+
*
239+
* | key | unmatched | val from node | return value |
240+
* |-----------------|------------|---------------|--------------|
241+
* | a | not called | n/a | -1 |
242+
* | refs | \0 | refs | as per fn |
243+
* | refs/ | / | refs | as per fn |
244+
* | refs/w | /w | refs | as per fn |
245+
* | refs/worktree | \0 | refs/worktree | as per fn |
246+
* | refs/worktree/ | / | refs/worktree | as per fn |
247+
* | refs/worktree/a | /a | refs/worktree | as per fn |
248+
* |-----------------|------------|---------------|--------------|
249+
*
250+
*/
251+
static int trie_find(struct trie *root, const char *key, match_fn fn,
252+
void *baton)
253+
{
254+
int i;
255+
int result;
256+
struct trie *child;
257+
258+
if (!*key) {
259+
/* we have reached the end of the key */
260+
if (root->value && !root->len)
261+
return fn(key, root->value, baton);
262+
else
263+
return -1;
264+
}
265+
266+
for (i = 0; i < root->len; i++) {
267+
/* Partial path normalization: skip consecutive slashes. */
268+
if (key[i] == '/' && key[i+1] == '/') {
269+
key++;
270+
continue;
271+
}
272+
if (root->contents[i] != key[i])
273+
return -1;
274+
}
275+
276+
/* Matched the entire compressed section */
277+
key += i;
278+
if (!*key)
279+
/* End of key */
280+
return fn(key, root->value, baton);
281+
282+
/* Partial path normalization: skip consecutive slashes */
283+
while (key[0] == '/' && key[1] == '/')
284+
key++;
285+
286+
child = root->children[(unsigned char)*key];
287+
if (child)
288+
result = trie_find(child, key + 1, fn, baton);
289+
else
290+
result = -1;
291+
292+
if (result >= 0 || (*key != '/' && *key != 0))
293+
return result;
294+
if (root->value)
295+
return fn(key, root->value, baton);
296+
else
297+
return -1;
298+
}
299+
300+
static struct trie common_trie;
301+
static int common_trie_done_setup;
302+
303+
static void init_common_trie(void)
304+
{
305+
struct common_dir *p;
306+
307+
if (common_trie_done_setup)
308+
return;
309+
310+
for (p = common_list; p->dirname; p++)
311+
add_to_trie(&common_trie, p->dirname, p);
312+
313+
common_trie_done_setup = 1;
314+
}
315+
316+
/*
317+
* Helper function for update_common_dir: returns 1 if the dir
318+
* prefix is common.
319+
*/
320+
static int check_common(const char *unmatched, void *value, void *baton)
321+
{
322+
struct common_dir *dir = value;
323+
324+
if (!dir)
325+
return 0;
326+
327+
if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/'))
328+
return !dir->exclude;
329+
330+
if (!dir->is_dir && unmatched[0] == 0)
331+
return !dir->exclude;
332+
333+
return 0;
334+
}
335+
336+
static void update_common_dir(struct strbuf *buf, int git_dir_len)
337+
{
338+
char *base = buf->buf + git_dir_len;
339+
init_common_trie();
340+
if (trie_find(&common_trie, base, check_common, NULL) > 0)
341+
replace_dir(buf, git_dir_len, get_git_common_dir());
143342
}
144343

145344
void report_linked_checkout_garbage(void)

t/t0060-path-utils.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ test_git_path GIT_COMMON_DIR=bar objects/bar bar/objects/bar
271271
test_git_path GIT_COMMON_DIR=bar info/exclude bar/info/exclude
272272
test_git_path GIT_COMMON_DIR=bar info/grafts bar/info/grafts
273273
test_git_path GIT_COMMON_DIR=bar info/sparse-checkout .git/info/sparse-checkout
274+
test_git_path GIT_COMMON_DIR=bar info//sparse-checkout .git/info//sparse-checkout
274275
test_git_path GIT_COMMON_DIR=bar remotes/bar bar/remotes/bar
275276
test_git_path GIT_COMMON_DIR=bar branches/bar bar/branches/bar
276277
test_git_path GIT_COMMON_DIR=bar logs/refs/heads/master bar/logs/refs/heads/master

0 commit comments

Comments
 (0)