Skip to content

Commit be653d6

Browse files
committed
Merge branch 'mk/grep-pcre'
* mk/grep-pcre: git-grep: Fix problems with recently added tests git-grep: Update tests (mainly for -P) Makefile: Pass USE_LIBPCRE down in GIT-BUILD-OPTIONS git-grep: update tests now regexp type is "last one wins" git-grep: do not die upon -F/-P when grep.extendedRegexp is set. git-grep: Bail out when -P is used with -F or -E grep: Add basic tests configure: Check for libpcre git-grep: Learn PCRE grep: Extract compile_regexp_failed() from compile_regexp() grep: Fix a typo in a comment grep: Put calls to fixmatch() and regmatch() into patmatch() contrib/completion: --line-number to git grep Documentation: Add --line-number to git-grep synopsis
2 parents 3d109dd + d0042ab commit be653d6

File tree

11 files changed

+347
-30
lines changed

11 files changed

+347
-30
lines changed

Documentation/git-grep.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ SYNOPSIS
1212
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
1313
[-v | --invert-match] [-h|-H] [--full-name]
1414
[-E | --extended-regexp] [-G | --basic-regexp]
15-
[-F | --fixed-strings] [-n]
15+
[-P | --perl-regexp]
16+
[-F | --fixed-strings] [-n | --line-number]
1617
[-l | --files-with-matches] [-L | --files-without-match]
1718
[(-O | --open-files-in-pager) [<pager>]]
1819
[-z | --null]
@@ -97,6 +98,11 @@ OPTIONS
9798
Use POSIX extended/basic regexp for patterns. Default
9899
is to use basic regexp.
99100

101+
-P::
102+
--perl-regexp::
103+
Use Perl-compatible regexp for patterns. Requires libpcre to be
104+
compiled in.
105+
100106
-F::
101107
--fixed-strings::
102108
Use fixed strings for patterns (don't interpret pattern

Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ all::
2424
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
2525
# This also implies BLK_SHA1.
2626
#
27+
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
28+
# able to use Perl-compatible regular expressions.
29+
#
30+
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
31+
# /foo/bar/include and /foo/bar/lib directories.
32+
#
2733
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
2834
# git-http-push are not built, and you cannot use http:// and https://
2935
# transports.
@@ -1258,6 +1264,15 @@ ifdef NO_LIBGEN_H
12581264
COMPAT_OBJS += compat/basename.o
12591265
endif
12601266

1267+
ifdef USE_LIBPCRE
1268+
BASIC_CFLAGS += -DUSE_LIBPCRE
1269+
ifdef LIBPCREDIR
1270+
BASIC_CFLAGS += -I$(LIBPCREDIR)/include
1271+
EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
1272+
endif
1273+
EXTLIBS += -lpcre
1274+
endif
1275+
12611276
ifdef NO_CURL
12621277
BASIC_CFLAGS += -DNO_CURL
12631278
REMOTE_CURL_PRIMARY =
@@ -2089,6 +2104,7 @@ GIT-BUILD-OPTIONS: FORCE
20892104
@echo PYTHON_PATH=\''$(subst ','\'',$(PYTHON_PATH_SQ))'\' >>$@
20902105
@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
20912106
@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
2107+
@echo USE_LIBPCRE=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@
20922108
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
20932109
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
20942110
ifdef GIT_TEST_CMP

builtin/grep.c

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,15 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
753753
int i;
754754
int dummy;
755755
int use_index = 1;
756+
enum {
757+
pattern_type_unspecified = 0,
758+
pattern_type_bre,
759+
pattern_type_ere,
760+
pattern_type_fixed,
761+
pattern_type_pcre,
762+
};
763+
int pattern_type = pattern_type_unspecified;
764+
756765
struct option options[] = {
757766
OPT_BOOLEAN(0, "cached", &cached,
758767
"search in index instead of in the work tree"),
@@ -774,13 +783,18 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
774783
"descend at most <depth> levels", PARSE_OPT_NONEG,
775784
NULL, 1 },
776785
OPT_GROUP(""),
777-
OPT_BIT('E', "extended-regexp", &opt.regflags,
778-
"use extended POSIX regular expressions", REG_EXTENDED),
779-
OPT_NEGBIT('G', "basic-regexp", &opt.regflags,
780-
"use basic POSIX regular expressions (default)",
781-
REG_EXTENDED),
782-
OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
783-
"interpret patterns as fixed strings"),
786+
OPT_SET_INT('E', "extended-regexp", &pattern_type,
787+
"use extended POSIX regular expressions",
788+
pattern_type_ere),
789+
OPT_SET_INT('G', "basic-regexp", &pattern_type,
790+
"use basic POSIX regular expressions (default)",
791+
pattern_type_bre),
792+
OPT_SET_INT('F', "fixed-strings", &pattern_type,
793+
"interpret patterns as fixed strings",
794+
pattern_type_fixed),
795+
OPT_SET_INT('P', "perl-regexp", &pattern_type,
796+
"use Perl-compatible regular expressions",
797+
pattern_type_pcre),
784798
OPT_GROUP(""),
785799
OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
786800
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
@@ -886,6 +900,28 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
886900
PARSE_OPT_KEEP_DASHDASH |
887901
PARSE_OPT_STOP_AT_NON_OPTION |
888902
PARSE_OPT_NO_INTERNAL_HELP);
903+
switch (pattern_type) {
904+
case pattern_type_fixed:
905+
opt.fixed = 1;
906+
opt.pcre = 0;
907+
break;
908+
case pattern_type_bre:
909+
opt.fixed = 0;
910+
opt.pcre = 0;
911+
opt.regflags &= ~REG_EXTENDED;
912+
break;
913+
case pattern_type_ere:
914+
opt.fixed = 0;
915+
opt.pcre = 0;
916+
opt.regflags |= REG_EXTENDED;
917+
break;
918+
case pattern_type_pcre:
919+
opt.fixed = 0;
920+
opt.pcre = 1;
921+
break;
922+
default:
923+
break; /* nothing */
924+
}
889925

890926
if (use_index && !startup_info->have_repository)
891927
/* die the same way as if we did it at the beginning */
@@ -925,8 +961,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
925961
die(_("no pattern given."));
926962
if (!opt.fixed && opt.ignore_case)
927963
opt.regflags |= REG_ICASE;
928-
if ((opt.regflags != REG_NEWLINE) && opt.fixed)
929-
die(_("cannot mix --fixed-strings and regexp"));
930964

931965
#ifndef NO_PTHREADS
932966
if (online_cpus() == 1 || !grep_threads_ok(&opt))

config.mak.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ NO_INET_PTON=@NO_INET_PTON@
6161
NO_ICONV=@NO_ICONV@
6262
OLD_ICONV=@OLD_ICONV@
6363
NO_REGEX=@NO_REGEX@
64+
USE_LIBPCRE=@USE_LIBPCRE@
6465
NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@
6566
INLINE=@INLINE@
6667
SOCKLEN_T=@SOCKLEN_T@

configure.ac

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,27 @@ AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)])
220220
AS_HELP_STRING([], [ARG can be prefix for openssl library and headers]),\
221221
GIT_PARSE_WITH(openssl))
222222
#
223+
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
224+
# able to use Perl-compatible regular expressions.
225+
#
226+
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
227+
# /foo/bar/include and /foo/bar/lib directories.
228+
#
229+
AC_ARG_WITH(libpcre,
230+
AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)])
231+
AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]),
232+
if test "$withval" = "no"; then \
233+
USE_LIBPCRE=; \
234+
elif test "$withval" = "yes"; then \
235+
USE_LIBPCRE=YesPlease; \
236+
else
237+
USE_LIBPCRE=YesPlease; \
238+
LIBPCREDIR=$withval; \
239+
AC_MSG_NOTICE([Setting LIBPCREDIR to $withval]); \
240+
GIT_CONF_APPEND_LINE(LIBPCREDIR=$withval); \
241+
fi \
242+
)
243+
#
223244
# Define NO_CURL if you do not have curl installed. git-http-pull and
224245
# git-http-push are not built, and you cannot use http:// and https://
225246
# transports.
@@ -434,6 +455,25 @@ GIT_UNSTASH_FLAGS($OPENSSLDIR)
434455
AC_SUBST(NEEDS_SSL_WITH_CRYPTO)
435456
AC_SUBST(NO_OPENSSL)
436457

458+
#
459+
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
460+
# able to use Perl-compatible regular expressions.
461+
#
462+
463+
if test -n "$USE_LIBPCRE"; then
464+
465+
GIT_STASH_FLAGS($LIBPCREDIR)
466+
467+
AC_CHECK_LIB([pcre], [pcre_version],
468+
[USE_LIBPCRE=YesPlease],
469+
[USE_LIBPCRE=])
470+
471+
GIT_UNSTASH_FLAGS($LIBPCREDIR)
472+
473+
AC_SUBST(USE_LIBPCRE)
474+
475+
fi
476+
437477
#
438478
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
439479
# git-http-push are not built, and you cannot use http:// and https://

contrib/completion/git-completion.bash

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1441,8 +1441,9 @@ _git_grep ()
14411441
__gitcomp "
14421442
--cached
14431443
--text --ignore-case --word-regexp --invert-match
1444-
--full-name
1444+
--full-name --line-number
14451445
--extended-regexp --basic-regexp --fixed-strings
1446+
--perl-regexp
14461447
--files-with-matches --name-only
14471448
--files-without-match
14481449
--max-depth

grep.c

Lines changed: 106 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,84 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
5959
return ret;
6060
}
6161

62+
static NORETURN void compile_regexp_failed(const struct grep_pat *p,
63+
const char *error)
64+
{
65+
char where[1024];
66+
67+
if (p->no)
68+
sprintf(where, "In '%s' at %d, ", p->origin, p->no);
69+
else if (p->origin)
70+
sprintf(where, "%s, ", p->origin);
71+
else
72+
where[0] = 0;
73+
74+
die("%s'%s': %s", where, p->pattern, error);
75+
}
76+
77+
#ifdef USE_LIBPCRE
78+
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
79+
{
80+
const char *error;
81+
int erroffset;
82+
int options = 0;
83+
84+
if (opt->ignore_case)
85+
options |= PCRE_CASELESS;
86+
87+
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
88+
NULL);
89+
if (!p->pcre_regexp)
90+
compile_regexp_failed(p, error);
91+
92+
p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
93+
if (!p->pcre_extra_info && error)
94+
die("%s", error);
95+
}
96+
97+
static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
98+
regmatch_t *match, int eflags)
99+
{
100+
int ovector[30], ret, flags = 0;
101+
102+
if (eflags & REG_NOTBOL)
103+
flags |= PCRE_NOTBOL;
104+
105+
ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
106+
0, flags, ovector, ARRAY_SIZE(ovector));
107+
if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
108+
die("pcre_exec failed with error code %d", ret);
109+
if (ret > 0) {
110+
ret = 0;
111+
match->rm_so = ovector[0];
112+
match->rm_eo = ovector[1];
113+
}
114+
115+
return ret;
116+
}
117+
118+
static void free_pcre_regexp(struct grep_pat *p)
119+
{
120+
pcre_free(p->pcre_regexp);
121+
pcre_free(p->pcre_extra_info);
122+
}
123+
#else /* !USE_LIBPCRE */
124+
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
125+
{
126+
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
127+
}
128+
129+
static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
130+
regmatch_t *match, int eflags)
131+
{
132+
return 1;
133+
}
134+
135+
static void free_pcre_regexp(struct grep_pat *p)
136+
{
137+
}
138+
#endif /* !USE_LIBPCRE */
139+
62140
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
63141
{
64142
int err;
@@ -70,20 +148,17 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
70148
if (p->fixed)
71149
return;
72150

151+
if (opt->pcre) {
152+
compile_pcre_regexp(p, opt);
153+
return;
154+
}
155+
73156
err = regcomp(&p->regexp, p->pattern, opt->regflags);
74157
if (err) {
75158
char errbuf[1024];
76-
char where[1024];
77-
if (p->no)
78-
sprintf(where, "In '%s' at %d, ",
79-
p->origin, p->no);
80-
else if (p->origin)
81-
sprintf(where, "%s, ", p->origin);
82-
else
83-
where[0] = 0;
84159
regerror(err, &p->regexp, errbuf, 1024);
85160
regfree(&p->regexp);
86-
die("%s'%s': %s", where, p->pattern, errbuf);
161+
compile_regexp_failed(p, errbuf);
87162
}
88163
}
89164

@@ -320,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt)
320395
case GREP_PATTERN: /* atom */
321396
case GREP_PATTERN_HEAD:
322397
case GREP_PATTERN_BODY:
323-
regfree(&p->regexp);
398+
if (p->pcre_regexp)
399+
free_pcre_regexp(p);
400+
else
401+
regfree(&p->regexp);
324402
break;
325403
default:
326404
break;
@@ -412,6 +490,21 @@ static int regmatch(const regex_t *preg, char *line, char *eol,
412490
return regexec(preg, line, 1, match, eflags);
413491
}
414492

493+
static int patmatch(struct grep_pat *p, char *line, char *eol,
494+
regmatch_t *match, int eflags)
495+
{
496+
int hit;
497+
498+
if (p->fixed)
499+
hit = !fixmatch(p, line, eol, match);
500+
else if (p->pcre_regexp)
501+
hit = !pcrematch(p, line, eol, match, eflags);
502+
else
503+
hit = !regmatch(&p->regexp, line, eol, match, eflags);
504+
505+
return hit;
506+
}
507+
415508
static int strip_timestamp(char *bol, char **eol_p)
416509
{
417510
char *eol = *eol_p;
@@ -461,10 +554,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
461554
}
462555

463556
again:
464-
if (p->fixed)
465-
hit = !fixmatch(p, bol, eol, pmatch);
466-
else
467-
hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
557+
hit = patmatch(p, bol, eol, pmatch, eflags);
468558

469559
if (hit && p->word_regexp) {
470560
if ((pmatch[0].rm_so < 0) ||
@@ -791,10 +881,7 @@ static int look_ahead(struct grep_opt *opt,
791881
int hit;
792882
regmatch_t m;
793883

794-
if (p->fixed)
795-
hit = !fixmatch(p, bol, bol + *left_p, &m);
796-
else
797-
hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
884+
hit = patmatch(p, bol, bol + *left_p, &m, 0);
798885
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
799886
continue;
800887
if (earliest < 0 || m.rm_so < earliest)
@@ -891,7 +978,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
891978
int hit;
892979

893980
/*
894-
* look_ahead() skips quicly to the line that possibly
981+
* look_ahead() skips quickly to the line that possibly
895982
* has the next hit; don't call it if we need to do
896983
* something more than just skipping the current line
897984
* in response to an unmatch for the current line. E.g.

0 commit comments

Comments
 (0)