Skip to content

Commit 06a604e

Browse files
pcloudsgitster
authored andcommitted
attr: avoid heavy work when we know the specified attr is not defined
If we have never seen attr 'X' in any .gitattributes file we have examined so far, we can be sure that 'X' is not defined. So no need to go over all the attr stack to look for attr 'X'. This is the purpose behind this new field maybe_real. This optimization breaks down if macros are involved because we can't know for sure what macro would expand to 'X' at attr parsing time. But if we go the pessimistic way and assume all macros are expanded, we hit the builtin "binary" macro. At least the "diff" attr defined in this macro will disable this optimization for git-grep. So we wait until any attr lines _may_ reference to a macro before we turn this off. In git.git, this reduces the number of fill_one() call for "git grep abcdefghi" from ~5348 to 2955. The optimization stops when it reads t/.gitattributes, which uses 'binary' macro. We could probably reduce it further by limiting the 'binary' reference to t/ and subdirs only in this case. "git grep" is actually a good example to justify this patch. The command checks "diff" attribute on every file. People usually don't define this attribute. But they pay the attr lookup penalty anyway without this patch, proportional to the number of attr lines they have in repo. Helped-by: Junio C Hamano <[email protected]> Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent fad32bc commit 06a604e

File tree

1 file changed

+28
-5
lines changed

1 file changed

+28
-5
lines changed

attr.c

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@ struct git_attr {
3333
unsigned h;
3434
int attr_nr;
3535
int maybe_macro;
36+
int maybe_real;
3637
char name[FLEX_ARRAY];
3738
};
3839
static int attr_nr;
40+
static int cannot_trust_maybe_real;
3941

4042
static struct git_attr_check *check_all_attr;
4143
static struct git_attr *(git_attr_hash[HASHSIZE]);
@@ -97,6 +99,7 @@ static struct git_attr *git_attr_internal(const char *name, int len)
9799
a->next = git_attr_hash[pos];
98100
a->attr_nr = attr_nr++;
99101
a->maybe_macro = 0;
102+
a->maybe_real = 0;
100103
git_attr_hash[pos] = a;
101104

102105
REALLOC_ARRAY(check_all_attr, attr_nr);
@@ -269,6 +272,10 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
269272
/* Second pass to fill the attr_states */
270273
for (cp = states, i = 0; *cp; i++) {
271274
cp = parse_attr(src, lineno, cp, &(res->state[i]));
275+
if (!is_macro)
276+
res->state[i].attr->maybe_real = 1;
277+
if (res->state[i].attr->maybe_macro)
278+
cannot_trust_maybe_real = 1;
272279
}
273280

274281
return res;
@@ -710,10 +717,13 @@ static int macroexpand_one(int nr, int rem)
710717
}
711718

712719
/*
713-
* Collect all attributes for path into the array pointed to by
714-
* check_all_attr.
720+
* Collect attributes for path into the array pointed to by
721+
* check_all_attr. If num is non-zero, only attributes in check[] are
722+
* collected. Otherwise all attributes are collected.
715723
*/
716-
static void collect_all_attrs(const char *path)
724+
static void collect_some_attrs(const char *path, int num,
725+
struct git_attr_check *check)
726+
717727
{
718728
struct attr_stack *stk;
719729
int i, pathlen, rem, dirlen;
@@ -736,6 +746,19 @@ static void collect_all_attrs(const char *path)
736746
prepare_attr_stack(path, dirlen);
737747
for (i = 0; i < attr_nr; i++)
738748
check_all_attr[i].value = ATTR__UNKNOWN;
749+
if (num && !cannot_trust_maybe_real) {
750+
rem = 0;
751+
for (i = 0; i < num; i++) {
752+
if (!check[i].attr->maybe_real) {
753+
struct git_attr_check *c;
754+
c = check_all_attr + check[i].attr->attr_nr;
755+
c->value = ATTR__UNSET;
756+
rem++;
757+
}
758+
}
759+
if (rem == num)
760+
return;
761+
}
739762

740763
rem = attr_nr;
741764
for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
@@ -746,7 +769,7 @@ int git_check_attr(const char *path, int num, struct git_attr_check *check)
746769
{
747770
int i;
748771

749-
collect_all_attrs(path);
772+
collect_some_attrs(path, num, check);
750773

751774
for (i = 0; i < num; i++) {
752775
const char *value = check_all_attr[check[i].attr->attr_nr].value;
@@ -762,7 +785,7 @@ int git_all_attrs(const char *path, int *num, struct git_attr_check **check)
762785
{
763786
int i, count, j;
764787

765-
collect_all_attrs(path);
788+
collect_some_attrs(path, 0, NULL);
766789

767790
/* Count the number of attributes that are set. */
768791
count = 0;

0 commit comments

Comments
 (0)