Skip to content

Commit f23092f

Browse files
mhaggergitster
authored andcommitted
cache_ref_iterator_begin(): avoid priming unneeded directories
When iterating over references, reference priming is used to make sure that loose references are read into the ref-cache before packed references, to avoid races. It used to be that the prefix passed to reference iterators almost always ended in `/`, for example `refs/heads/`. In that case, the priming code would read all loose references under `find_containing_dir("refs/heads/")`, which is "refs/heads/". That's just what we want. But now that `ref-filter` knows how to pass refname prefixes to `for_each_fullref_in()`, the prefix might come from user input; for example, git for-each-ref refs/heads Since the argument doesn't include a trailing slash, the reference iteration code would prime all of the loose references under `find_containing_dir("refs/heads")`, which is "refs/". Thus we would unnecessarily read tags, remote-tracking references, etc., when the user is only interested in branches. It is a bit awkward to get around this problem. We can't just append a slash to the argument, because we don't know ab initio whether an argument like `refs/tags/release` corresponds to a single tag or to a directory containing tags. Moreover, until now a `prefix_ref_iterator` was used to make the final decision about which references fall within the prefix (the `cache_ref_iterator` only did a rough cut). This is also inefficient, because the `prefix_ref_iterator` can't know, for example, that while you are in a subdirectory that is completely within the prefix, you don't have to do the prefix check. So: * Move the responsibility for doing the prefix check directly to `cache_ref_iterator`. This means that `cache_ref_iterator_begin()` never has to wrap its return value in a `prefix_ref_iterator`. * Teach `cache_ref_iterator_begin()` (and `prime_ref_dir()`) to be stricter about what they iterate over and what directories they prime. * Teach `cache_ref_iterator` to keep track of whether the current `cache_ref_iterator_level` is fully within the prefix. If so, skip the prefix checks entirely. The main benefit of these optimizations is for loose references, since packed references are always read all at once. Note that after this change, `prefix_ref_iterator` is only ever used for its trimming feature and not for its "prefix" feature. But I'm not ripping out the latter yet, because it might be useful for another patch series that I'm working on. Signed-off-by: Michael Haggerty <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent cfe004a commit f23092f

File tree

1 file changed

+85
-10
lines changed

1 file changed

+85
-10
lines changed

refs/ref-cache.c

Lines changed: 85 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,42 @@ static void sort_ref_dir(struct ref_dir *dir)
312312
dir->sorted = dir->nr = i;
313313
}
314314

315+
enum prefix_state {
316+
/* All refs within the directory would match prefix: */
317+
PREFIX_CONTAINS_DIR,
318+
319+
/* Some, but not all, refs within the directory might match prefix: */
320+
PREFIX_WITHIN_DIR,
321+
322+
/* No refs within the directory could possibly match prefix: */
323+
PREFIX_EXCLUDES_DIR
324+
};
325+
315326
/*
316-
* Load all of the refs from `dir` (recursively) into our in-memory
317-
* cache.
327+
* Return a `prefix_state` constant describing the relationship
328+
* between the directory with the specified `dirname` and `prefix`.
318329
*/
319-
static void prime_ref_dir(struct ref_dir *dir)
330+
static enum prefix_state overlaps_prefix(const char *dirname,
331+
const char *prefix)
332+
{
333+
while (*prefix && *dirname == *prefix) {
334+
dirname++;
335+
prefix++;
336+
}
337+
if (!*prefix)
338+
return PREFIX_CONTAINS_DIR;
339+
else if (!*dirname)
340+
return PREFIX_WITHIN_DIR;
341+
else
342+
return PREFIX_EXCLUDES_DIR;
343+
}
344+
345+
/*
346+
* Load all of the refs from `dir` (recursively) that could possibly
347+
* contain references matching `prefix` into our in-memory cache. If
348+
* `prefix` is NULL, prime unconditionally.
349+
*/
350+
static void prime_ref_dir(struct ref_dir *dir, const char *prefix)
320351
{
321352
/*
322353
* The hard work of loading loose refs is done by get_ref_dir(), so we
@@ -327,8 +358,29 @@ static void prime_ref_dir(struct ref_dir *dir)
327358
int i;
328359
for (i = 0; i < dir->nr; i++) {
329360
struct ref_entry *entry = dir->entries[i];
330-
if (entry->flag & REF_DIR)
331-
prime_ref_dir(get_ref_dir(entry));
361+
if (!(entry->flag & REF_DIR)) {
362+
/* Not a directory; no need to recurse. */
363+
} else if (!prefix) {
364+
/* Recurse in any case: */
365+
prime_ref_dir(get_ref_dir(entry), NULL);
366+
} else {
367+
switch (overlaps_prefix(entry->name, prefix)) {
368+
case PREFIX_CONTAINS_DIR:
369+
/*
370+
* Recurse, and from here down we
371+
* don't have to check the prefix
372+
* anymore:
373+
*/
374+
prime_ref_dir(get_ref_dir(entry), NULL);
375+
break;
376+
case PREFIX_WITHIN_DIR:
377+
prime_ref_dir(get_ref_dir(entry), prefix);
378+
break;
379+
case PREFIX_EXCLUDES_DIR:
380+
/* No need to prime this directory. */
381+
break;
382+
}
383+
}
332384
}
333385
}
334386

@@ -343,6 +395,8 @@ struct cache_ref_iterator_level {
343395
*/
344396
struct ref_dir *dir;
345397

398+
enum prefix_state prefix_state;
399+
346400
/*
347401
* The index of the current entry within dir (which might
348402
* itself be a directory). If index == -1, then the iteration
@@ -369,6 +423,13 @@ struct cache_ref_iterator {
369423
/* The number of levels that have been allocated on the stack */
370424
size_t levels_alloc;
371425

426+
/*
427+
* Only include references with this prefix in the iteration.
428+
* The prefix is matched textually, without regard for path
429+
* component boundaries.
430+
*/
431+
const char *prefix;
432+
372433
/*
373434
* A stack of levels. levels[0] is the uppermost level that is
374435
* being iterated over in this iteration. (This is not
@@ -390,6 +451,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator)
390451
&iter->levels[iter->levels_nr - 1];
391452
struct ref_dir *dir = level->dir;
392453
struct ref_entry *entry;
454+
enum prefix_state entry_prefix_state;
393455

394456
if (level->index == -1)
395457
sort_ref_dir(dir);
@@ -404,13 +466,22 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator)
404466

405467
entry = dir->entries[level->index];
406468

469+
if (level->prefix_state == PREFIX_WITHIN_DIR) {
470+
entry_prefix_state = overlaps_prefix(entry->name, iter->prefix);
471+
if (entry_prefix_state == PREFIX_EXCLUDES_DIR)
472+
continue;
473+
} else {
474+
entry_prefix_state = level->prefix_state;
475+
}
476+
407477
if (entry->flag & REF_DIR) {
408478
/* push down a level */
409479
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
410480
iter->levels_alloc);
411481

412482
level = &iter->levels[iter->levels_nr++];
413483
level->dir = get_ref_dir(entry);
484+
level->prefix_state = entry_prefix_state;
414485
level->index = -1;
415486
} else {
416487
iter->base.refname = entry->name;
@@ -471,6 +542,7 @@ static int cache_ref_iterator_abort(struct ref_iterator *ref_iterator)
471542
struct cache_ref_iterator *iter =
472543
(struct cache_ref_iterator *)ref_iterator;
473544

545+
free((char *)iter->prefix);
474546
free(iter->levels);
475547
base_ref_iterator_free(ref_iterator);
476548
return ITER_DONE;
@@ -496,10 +568,10 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache,
496568
dir = find_containing_dir(dir, prefix, 0);
497569
if (!dir)
498570
/* There's nothing to iterate over. */
499-
return empty_ref_iterator_begin();
571+
return empty_ref_iterator_begin();
500572

501573
if (prime_dir)
502-
prime_ref_dir(dir);
574+
prime_ref_dir(dir, prefix);
503575

504576
iter = xcalloc(1, sizeof(*iter));
505577
ref_iterator = &iter->base;
@@ -511,9 +583,12 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache,
511583
level->index = -1;
512584
level->dir = dir;
513585

514-
if (prefix && *prefix)
515-
ref_iterator = prefix_ref_iterator_begin(ref_iterator,
516-
prefix, 0);
586+
if (prefix && *prefix) {
587+
iter->prefix = xstrdup(prefix);
588+
level->prefix_state = PREFIX_WITHIN_DIR;
589+
} else {
590+
level->prefix_state = PREFIX_CONTAINS_DIR;
591+
}
517592

518593
return ref_iterator;
519594
}

0 commit comments

Comments
 (0)