Skip to content

Commit 793dc67

Browse files
pcloudsgitster
authored andcommitted
grep/icase: avoid kwsset when -F is specified
Similar to the previous commit, we can't use kws on icase search outside ascii range. But we can't simply pass the pattern to regcomp/pcre like the previous commit because it may contain regex special characters, so we need to quote the regex first. To avoid misquote traps that could lead to undefined behavior, we always stick to basic regex engine in this case. We don't need fancy features for grepping a literal string anyway. basic_regex_quote_buf() assumes that if the pattern is in a multibyte encoding, ascii chars must be unambiguously encoded as single bytes. This is true at least for UTF-8. For others, let's wait until people yell up. Chances are nobody uses multibyte, non utf-8 charsets anymore. Noticed-by: Plamen Totev <[email protected]> Helped-by: René Scharfe <[email protected]> Helped-by: Eric Sunshine <[email protected]> Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 5c1ebcc commit 793dc67

File tree

4 files changed

+108
-1
lines changed

4 files changed

+108
-1
lines changed

grep.c

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "diff.h"
66
#include "diffcore.h"
77
#include "commit.h"
8+
#include "quote.h"
89

910
static int grep_source_load(struct grep_source *gs);
1011
static int grep_source_is_binary(struct grep_source *gs);
@@ -397,6 +398,28 @@ static int is_fixed(const char *s, size_t len)
397398
return 1;
398399
}
399400

401+
static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
402+
{
403+
struct strbuf sb = STRBUF_INIT;
404+
int err;
405+
int regflags;
406+
407+
basic_regex_quote_buf(&sb, p->pattern);
408+
regflags = opt->regflags & ~REG_EXTENDED;
409+
if (opt->ignore_case)
410+
regflags |= REG_ICASE;
411+
err = regcomp(&p->regexp, sb.buf, regflags);
412+
if (opt->debug)
413+
fprintf(stderr, "fixed %s\n", sb.buf);
414+
strbuf_release(&sb);
415+
if (err) {
416+
char errbuf[1024];
417+
regerror(err, &p->regexp, errbuf, sizeof(errbuf));
418+
regfree(&p->regexp);
419+
compile_regexp_failed(p, errbuf);
420+
}
421+
}
422+
400423
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
401424
{
402425
int icase, ascii_only;
@@ -407,8 +430,20 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
407430
icase = opt->regflags & REG_ICASE || p->ignore_case;
408431
ascii_only = !has_non_ascii(p->pattern);
409432

433+
/*
434+
* Even when -F (fixed) asks us to do a non-regexp search, we
435+
* may not be able to correctly case-fold when -i
436+
* (ignore-case) is asked (in which case, we'll synthesize a
437+
* regexp to match the pattern that matches regexp special
438+
* characters literally, while ignoring case differences). On
439+
* the other hand, even without -F, if the pattern does not
440+
* have any regexp special characters and there is no need for
441+
* case-folding search, we can internally turn it into a
442+
* simple string match using kws. p->fixed tells us if we
443+
* want to use kws.
444+
*/
410445
if (opt->fixed)
411-
p->fixed = 1;
446+
p->fixed = !icase || ascii_only;
412447
else if ((!icase || ascii_only) &&
413448
is_fixed(p->pattern, p->patternlen))
414449
p->fixed = 1;
@@ -423,6 +458,14 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
423458
kwsincr(p->kws, p->pattern, p->patternlen);
424459
kwsprep(p->kws);
425460
return;
461+
} else if (opt->fixed) {
462+
/*
463+
* We come here when the pattern has the non-ascii
464+
* characters we cannot case-fold, and asked to
465+
* ignore-case.
466+
*/
467+
compile_fixed_regexp(p, opt);
468+
return;
426469
}
427470

428471
if (opt->pcre) {

quote.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,3 +440,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src)
440440
}
441441
strbuf_addch(sb, '"');
442442
}
443+
444+
void basic_regex_quote_buf(struct strbuf *sb, const char *src)
445+
{
446+
char c;
447+
448+
if (*src == '^') {
449+
/* only beginning '^' is special and needs quoting */
450+
strbuf_addch(sb, '\\');
451+
strbuf_addch(sb, *src++);
452+
}
453+
if (*src == '*')
454+
/* beginning '*' is not special, no quoting */
455+
strbuf_addch(sb, *src++);
456+
457+
while ((c = *src++)) {
458+
switch (c) {
459+
case '[':
460+
case '.':
461+
case '\\':
462+
case '*':
463+
strbuf_addch(sb, '\\');
464+
strbuf_addch(sb, c);
465+
break;
466+
467+
case '$':
468+
/* only the end '$' is special and needs quoting */
469+
if (*src == '\0')
470+
strbuf_addch(sb, '\\');
471+
strbuf_addch(sb, c);
472+
break;
473+
474+
default:
475+
strbuf_addch(sb, c);
476+
break;
477+
}
478+
}
479+
}

quote.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char *prefix,
6767
extern void perl_quote_buf(struct strbuf *sb, const char *src);
6868
extern void python_quote_buf(struct strbuf *sb, const char *src);
6969
extern void tcl_quote_buf(struct strbuf *sb, const char *src);
70+
extern void basic_regex_quote_buf(struct strbuf *sb, const char *src);
7071

7172
#endif

t/t7812-grep-icase-non-ascii.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,30 @@ test_expect_success REGEX_LOCALE 'grep literal string, no -F' '
2020
git grep -i "TILRAUN: HALLÓ HEIMUR!"
2121
'
2222

23+
test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
24+
git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null |
25+
grep fixed >debug1 &&
26+
test_write_lines "fixed TILRAUN: Halló Heimur!" >expect1 &&
27+
test_cmp expect1 debug1 &&
28+
29+
git grep --debug -i -F "TILRAUN: HALLÓ HEIMUR!" 2>&1 >/dev/null |
30+
grep fixed >debug2 &&
31+
test_write_lines "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 &&
32+
test_cmp expect2 debug2
33+
'
34+
35+
test_expect_success REGEX_LOCALE 'grep string with regex, with -F' '
36+
test_write_lines "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file &&
37+
38+
git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 >/dev/null |
39+
grep fixed >debug1 &&
40+
test_write_lines "fixed \\^*TILR^AUN:\\.\\* \\\\Halló \$He\\[]imur!\\\$" >expect1 &&
41+
test_cmp expect1 debug1 &&
42+
43+
git grep --debug -i -F "^*TILR^AUN:.* \\HALLÓ \$HE[]IMUR!\$" 2>&1 >/dev/null |
44+
grep fixed >debug2 &&
45+
test_write_lines "fixed \\^*TILR^AUN:\\.\\* \\\\HALLÓ \$HE\\[]IMUR!\\\$" >expect2 &&
46+
test_cmp expect2 debug2
47+
'
48+
2349
test_done

0 commit comments

Comments
 (0)