Skip to content

Commit 5f83642

Browse files
committed
Merge branch 'nd/attr-match-optim-more'
Start laying the foundation to build the "wildmatch" after we can agree on its desired semantics. * nd/attr-match-optim-more: attr: more matching optimizations from .gitignore gitignore: make pattern parsing code a separate function exclude: split pathname matching code into a separate function exclude: fix a bug in prefix compare optimization exclude: split basename matching code into a separate function exclude: stricten a length check in EXC_FLAG_ENDSWITH case
2 parents 8736c90 + 82dce99 commit 5f83642

File tree

6 files changed

+186
-88
lines changed

6 files changed

+186
-88
lines changed

Documentation/gitattributes.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
5656
overrides an earlier line. This overriding is done per
5757
attribute. The rules how the pattern matches paths are the
5858
same as in `.gitignore` files; see linkgit:gitignore[5].
59+
Unlike `.gitignore`, negative patterns are forbidden.
5960

6061
When deciding what attributes are assigned to a path, git
6162
consults `$GIT_DIR/info/attributes` file (which has the highest

attr.c

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,13 @@ struct attr_state {
115115
const char *setto;
116116
};
117117

118+
struct pattern {
119+
const char *pattern;
120+
int patternlen;
121+
int nowildcardlen;
122+
int flags; /* EXC_FLAG_* */
123+
};
124+
118125
/*
119126
* One rule, as from a .gitattributes file.
120127
*
@@ -131,7 +138,7 @@ struct attr_state {
131138
*/
132139
struct match_attr {
133140
union {
134-
char *pattern;
141+
struct pattern pat;
135142
struct git_attr *attr;
136143
} u;
137144
char is_macro;
@@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
241248
if (is_macro)
242249
res->u.attr = git_attr_internal(name, namelen);
243250
else {
244-
res->u.pattern = (char *)&(res->state[num_attr]);
245-
memcpy(res->u.pattern, name, namelen);
246-
res->u.pattern[namelen] = 0;
251+
char *p = (char *)&(res->state[num_attr]);
252+
memcpy(p, name, namelen);
253+
res->u.pat.pattern = p;
254+
parse_exclude_pattern(&res->u.pat.pattern,
255+
&res->u.pat.patternlen,
256+
&res->u.pat.flags,
257+
&res->u.pat.nowildcardlen);
258+
if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
259+
die(_("Negative patterns are forbidden in git attributes\n"
260+
"Use '\\!' for literal leading exclamation."));
247261
}
248262
res->is_macro = is_macro;
249263
res->num_attr = num_attr;
@@ -648,25 +662,21 @@ static void prepare_attr_stack(const char *path)
648662

649663
static int path_matches(const char *pathname, int pathlen,
650664
const char *basename,
651-
const char *pattern,
665+
const struct pattern *pat,
652666
const char *base, int baselen)
653667
{
654-
if (!strchr(pattern, '/')) {
655-
return (fnmatch_icase(pattern, basename, 0) == 0);
668+
const char *pattern = pat->pattern;
669+
int prefix = pat->nowildcardlen;
670+
671+
if (pat->flags & EXC_FLAG_NODIR) {
672+
return match_basename(basename,
673+
pathlen - (basename - pathname),
674+
pattern, prefix,
675+
pat->patternlen, pat->flags);
656676
}
657-
/*
658-
* match with FNM_PATHNAME; the pattern has base implicitly
659-
* in front of it.
660-
*/
661-
if (*pattern == '/')
662-
pattern++;
663-
if (pathlen < baselen ||
664-
(baselen && pathname[baselen] != '/') ||
665-
strncmp(pathname, base, baselen))
666-
return 0;
667-
if (baselen != 0)
668-
baselen++;
669-
return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
677+
return match_pathname(pathname, pathlen,
678+
base, baselen,
679+
pattern, prefix, pat->patternlen, pat->flags);
670680
}
671681

672682
static int macroexpand_one(int attr_nr, int rem);
@@ -704,7 +714,7 @@ static int fill(const char *path, int pathlen, const char *basename,
704714
if (a->is_macro)
705715
continue;
706716
if (path_matches(path, pathlen, basename,
707-
a->u.pattern, base, stk->originlen))
717+
&a->u.pat, base, stk->originlen))
708718
rem = fill_one("fill", a, rem);
709719
}
710720
return rem;

dir.c

Lines changed: 126 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -308,42 +308,69 @@ static int no_wildcard(const char *string)
308308
return string[simple_length(string)] == '\0';
309309
}
310310

311+
void parse_exclude_pattern(const char **pattern,
312+
int *patternlen,
313+
int *flags,
314+
int *nowildcardlen)
315+
{
316+
const char *p = *pattern;
317+
size_t i, len;
318+
319+
*flags = 0;
320+
if (*p == '!') {
321+
*flags |= EXC_FLAG_NEGATIVE;
322+
p++;
323+
}
324+
len = strlen(p);
325+
if (len && p[len - 1] == '/') {
326+
len--;
327+
*flags |= EXC_FLAG_MUSTBEDIR;
328+
}
329+
for (i = 0; i < len; i++) {
330+
if (p[i] == '/')
331+
break;
332+
}
333+
if (i == len)
334+
*flags |= EXC_FLAG_NODIR;
335+
*nowildcardlen = simple_length(p);
336+
/*
337+
* we should have excluded the trailing slash from 'p' too,
338+
* but that's one more allocation. Instead just make sure
339+
* nowildcardlen does not exceed real patternlen
340+
*/
341+
if (*nowildcardlen > len)
342+
*nowildcardlen = len;
343+
if (*p == '*' && no_wildcard(p + 1))
344+
*flags |= EXC_FLAG_ENDSWITH;
345+
*pattern = p;
346+
*patternlen = len;
347+
}
348+
311349
void add_exclude(const char *string, const char *base,
312350
int baselen, struct exclude_list *which)
313351
{
314352
struct exclude *x;
315-
size_t len;
316-
int to_exclude = 1;
317-
int flags = 0;
353+
int patternlen;
354+
int flags;
355+
int nowildcardlen;
318356

319-
if (*string == '!') {
320-
to_exclude = 0;
321-
string++;
322-
}
323-
len = strlen(string);
324-
if (len && string[len - 1] == '/') {
357+
parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
358+
if (flags & EXC_FLAG_MUSTBEDIR) {
325359
char *s;
326-
x = xmalloc(sizeof(*x) + len);
360+
x = xmalloc(sizeof(*x) + patternlen + 1);
327361
s = (char *)(x+1);
328-
memcpy(s, string, len - 1);
329-
s[len - 1] = '\0';
330-
string = s;
362+
memcpy(s, string, patternlen);
363+
s[patternlen] = '\0';
331364
x->pattern = s;
332-
flags = EXC_FLAG_MUSTBEDIR;
333365
} else {
334366
x = xmalloc(sizeof(*x));
335367
x->pattern = string;
336368
}
337-
x->to_exclude = to_exclude;
338-
x->patternlen = strlen(string);
369+
x->patternlen = patternlen;
370+
x->nowildcardlen = nowildcardlen;
339371
x->base = base;
340372
x->baselen = baselen;
341373
x->flags = flags;
342-
if (!strchr(string, '/'))
343-
x->flags |= EXC_FLAG_NODIR;
344-
x->nowildcardlen = simple_length(string);
345-
if (*string == '*' && no_wildcard(string+1))
346-
x->flags |= EXC_FLAG_ENDSWITH;
347374
ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
348375
which->excludes[which->nr++] = x;
349376
}
@@ -505,6 +532,72 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
505532
dir->basebuf[baselen] = '\0';
506533
}
507534

535+
int match_basename(const char *basename, int basenamelen,
536+
const char *pattern, int prefix, int patternlen,
537+
int flags)
538+
{
539+
if (prefix == patternlen) {
540+
if (!strcmp_icase(pattern, basename))
541+
return 1;
542+
} else if (flags & EXC_FLAG_ENDSWITH) {
543+
if (patternlen - 1 <= basenamelen &&
544+
!strcmp_icase(pattern + 1,
545+
basename + basenamelen - patternlen + 1))
546+
return 1;
547+
} else {
548+
if (fnmatch_icase(pattern, basename, 0) == 0)
549+
return 1;
550+
}
551+
return 0;
552+
}
553+
554+
int match_pathname(const char *pathname, int pathlen,
555+
const char *base, int baselen,
556+
const char *pattern, int prefix, int patternlen,
557+
int flags)
558+
{
559+
const char *name;
560+
int namelen;
561+
562+
/*
563+
* match with FNM_PATHNAME; the pattern has base implicitly
564+
* in front of it.
565+
*/
566+
if (*pattern == '/') {
567+
pattern++;
568+
prefix--;
569+
}
570+
571+
/*
572+
* baselen does not count the trailing slash. base[] may or
573+
* may not end with a trailing slash though.
574+
*/
575+
if (pathlen < baselen + 1 ||
576+
(baselen && pathname[baselen] != '/') ||
577+
strncmp_icase(pathname, base, baselen))
578+
return 0;
579+
580+
namelen = baselen ? pathlen - baselen - 1 : pathlen;
581+
name = pathname + pathlen - namelen;
582+
583+
if (prefix) {
584+
/*
585+
* if the non-wildcard part is longer than the
586+
* remaining pathname, surely it cannot match.
587+
*/
588+
if (prefix > namelen)
589+
return 0;
590+
591+
if (strncmp_icase(pattern, name, prefix))
592+
return 0;
593+
pattern += prefix;
594+
name += prefix;
595+
namelen -= prefix;
596+
}
597+
598+
return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
599+
}
600+
508601
/* Scan the list and let the last match determine the fate.
509602
* Return 1 for exclude, 0 for include and -1 for undecided.
510603
*/
@@ -519,9 +612,9 @@ int excluded_from_list(const char *pathname,
519612

520613
for (i = el->nr - 1; 0 <= i; i--) {
521614
struct exclude *x = el->excludes[i];
522-
const char *name, *exclude = x->pattern;
523-
int to_exclude = x->to_exclude;
524-
int namelen, prefix = x->nowildcardlen;
615+
const char *exclude = x->pattern;
616+
int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
617+
int prefix = x->nowildcardlen;
525618

526619
if (x->flags & EXC_FLAG_MUSTBEDIR) {
527620
if (*dtype == DT_UNKNOWN)
@@ -531,51 +624,18 @@ int excluded_from_list(const char *pathname,
531624
}
532625

533626
if (x->flags & EXC_FLAG_NODIR) {
534-
/* match basename */
535-
if (prefix == x->patternlen) {
536-
if (!strcmp_icase(exclude, basename))
537-
return to_exclude;
538-
} else if (x->flags & EXC_FLAG_ENDSWITH) {
539-
if (x->patternlen - 1 <= pathlen &&
540-
!strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
541-
return to_exclude;
542-
} else {
543-
if (fnmatch_icase(exclude, basename, 0) == 0)
544-
return to_exclude;
545-
}
546-
continue;
547-
}
548-
549-
/* match with FNM_PATHNAME:
550-
* exclude has base (baselen long) implicitly in front of it.
551-
*/
552-
if (*exclude == '/') {
553-
exclude++;
554-
prefix--;
555-
}
556-
557-
if (pathlen < x->baselen ||
558-
(x->baselen && pathname[x->baselen-1] != '/') ||
559-
strncmp_icase(pathname, x->base, x->baselen))
627+
if (match_basename(basename,
628+
pathlen - (basename - pathname),
629+
exclude, prefix, x->patternlen,
630+
x->flags))
631+
return to_exclude;
560632
continue;
561-
562-
namelen = x->baselen ? pathlen - x->baselen : pathlen;
563-
name = pathname + pathlen - namelen;
564-
565-
/* if the non-wildcard part is longer than the
566-
remaining pathname, surely it cannot match */
567-
if (prefix > namelen)
568-
continue;
569-
570-
if (prefix) {
571-
if (strncmp_icase(exclude, name, prefix))
572-
continue;
573-
exclude += prefix;
574-
name += prefix;
575-
namelen -= prefix;
576633
}
577634

578-
if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
635+
assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
636+
if (match_pathname(pathname, pathlen,
637+
x->base, x->baselen ? x->baselen - 1 : 0,
638+
exclude, prefix, x->patternlen, x->flags))
579639
return to_exclude;
580640
}
581641
return -1; /* undecided */

dir.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ struct dir_entry {
1111
#define EXC_FLAG_NODIR 1
1212
#define EXC_FLAG_ENDSWITH 4
1313
#define EXC_FLAG_MUSTBEDIR 8
14+
#define EXC_FLAG_NEGATIVE 16
1415

1516
struct exclude_list {
1617
int nr;
@@ -21,7 +22,6 @@ struct exclude_list {
2122
int nowildcardlen;
2223
const char *base;
2324
int baselen;
24-
int to_exclude;
2525
int flags;
2626
} **excludes;
2727
};
@@ -80,6 +80,16 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas
8080
int *dtype, struct exclude_list *el);
8181
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
8282

83+
/*
84+
* these implement the matching logic for dir.c:excluded_from_list and
85+
* attr.c:path_matches()
86+
*/
87+
extern int match_basename(const char *, int,
88+
const char *, int, int, int);
89+
extern int match_pathname(const char *, int,
90+
const char *, int,
91+
const char *, int, int, int);
92+
8393
/*
8494
* The excluded() API is meant for callers that check each level of leading
8595
* directory hierarchies with excluded() to avoid recursing into excluded
@@ -97,6 +107,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
97107
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
98108
char **buf_p, struct exclude_list *which, int check_index);
99109
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
110+
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
100111
extern void add_exclude(const char *string, const char *base,
101112
int baselen, struct exclude_list *which);
102113
extern void free_excludes(struct exclude_list *el);

t/t0003-attributes.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,16 @@ test_expect_success 'root subdir attribute test' '
196196
attr_check subdir/a/i unspecified
197197
'
198198

199+
test_expect_success 'negative patterns' '
200+
echo "!f test=bar" >.gitattributes &&
201+
test_must_fail git check-attr test -- f
202+
'
203+
204+
test_expect_success 'patterns starting with exclamation' '
205+
echo "\!f test=foo" >.gitattributes &&
206+
attr_check "!f" foo
207+
'
208+
199209
test_expect_success 'setup bare' '
200210
git clone --bare . bare.git &&
201211
cd bare.git

0 commit comments

Comments
 (0)