|
4 | 4 | #include "xdiff-interface.h"
|
5 | 5 | #include "diff.h"
|
6 | 6 | #include "diffcore.h"
|
| 7 | +#include "commit.h" |
| 8 | +#include "quote.h" |
7 | 9 |
|
8 | 10 | static int grep_source_load(struct grep_source *gs);
|
9 | 11 | static int grep_source_is_binary(struct grep_source *gs);
|
@@ -322,11 +324,16 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
|
322 | 324 | int erroffset;
|
323 | 325 | int options = PCRE_MULTILINE;
|
324 | 326 |
|
325 |
| - if (opt->ignore_case) |
| 327 | + if (opt->ignore_case) { |
| 328 | + if (has_non_ascii(p->pattern)) |
| 329 | + p->pcre_tables = pcre_maketables(); |
326 | 330 | options |= PCRE_CASELESS;
|
| 331 | + } |
| 332 | + if (is_utf8_locale() && has_non_ascii(p->pattern)) |
| 333 | + options |= PCRE_UTF8; |
327 | 334 |
|
328 | 335 | p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
|
329 |
| - NULL); |
| 336 | + p->pcre_tables); |
330 | 337 | if (!p->pcre_regexp)
|
331 | 338 | compile_regexp_failed(p, error);
|
332 | 339 |
|
@@ -360,6 +367,7 @@ static void free_pcre_regexp(struct grep_pat *p)
|
360 | 367 | {
|
361 | 368 | pcre_free(p->pcre_regexp);
|
362 | 369 | pcre_free(p->pcre_extra_info);
|
| 370 | + pcre_free((void *)p->pcre_tables); |
363 | 371 | }
|
364 | 372 | #else /* !USE_LIBPCRE */
|
365 | 373 | static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
|
@@ -396,26 +404,68 @@ static int is_fixed(const char *s, size_t len)
|
396 | 404 | return 1;
|
397 | 405 | }
|
398 | 406 |
|
| 407 | +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) |
| 408 | +{ |
| 409 | + struct strbuf sb = STRBUF_INIT; |
| 410 | + int err; |
| 411 | + int regflags; |
| 412 | + |
| 413 | + basic_regex_quote_buf(&sb, p->pattern); |
| 414 | + regflags = opt->regflags & ~REG_EXTENDED; |
| 415 | + if (opt->ignore_case) |
| 416 | + regflags |= REG_ICASE; |
| 417 | + err = regcomp(&p->regexp, sb.buf, regflags); |
| 418 | + if (opt->debug) |
| 419 | + fprintf(stderr, "fixed %s\n", sb.buf); |
| 420 | + strbuf_release(&sb); |
| 421 | + if (err) { |
| 422 | + char errbuf[1024]; |
| 423 | + regerror(err, &p->regexp, errbuf, sizeof(errbuf)); |
| 424 | + regfree(&p->regexp); |
| 425 | + compile_regexp_failed(p, errbuf); |
| 426 | + } |
| 427 | +} |
| 428 | + |
399 | 429 | static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
|
400 | 430 | {
|
| 431 | + int icase, ascii_only; |
401 | 432 | int err;
|
402 | 433 |
|
403 | 434 | p->word_regexp = opt->word_regexp;
|
404 | 435 | p->ignore_case = opt->ignore_case;
|
| 436 | + icase = opt->regflags & REG_ICASE || p->ignore_case; |
| 437 | + ascii_only = !has_non_ascii(p->pattern); |
405 | 438 |
|
| 439 | + /* |
| 440 | + * Even when -F (fixed) asks us to do a non-regexp search, we |
| 441 | + * may not be able to correctly case-fold when -i |
| 442 | + * (ignore-case) is asked (in which case, we'll synthesize a |
| 443 | + * regexp to match the pattern that matches regexp special |
| 444 | + * characters literally, while ignoring case differences). On |
| 445 | + * the other hand, even without -F, if the pattern does not |
| 446 | + * have any regexp special characters and there is no need for |
| 447 | + * case-folding search, we can internally turn it into a |
| 448 | + * simple string match using kws. p->fixed tells us if we |
| 449 | + * want to use kws. |
| 450 | + */ |
406 | 451 | if (opt->fixed || is_fixed(p->pattern, p->patternlen))
|
407 |
| - p->fixed = 1; |
| 452 | + p->fixed = !icase || ascii_only; |
408 | 453 | else
|
409 | 454 | p->fixed = 0;
|
410 | 455 |
|
411 | 456 | if (p->fixed) {
|
412 |
| - if (opt->regflags & REG_ICASE || p->ignore_case) |
413 |
| - p->kws = kwsalloc(tolower_trans_tbl); |
414 |
| - else |
415 |
| - p->kws = kwsalloc(NULL); |
| 457 | + p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); |
416 | 458 | kwsincr(p->kws, p->pattern, p->patternlen);
|
417 | 459 | kwsprep(p->kws);
|
418 | 460 | return;
|
| 461 | + } else if (opt->fixed) { |
| 462 | + /* |
| 463 | + * We come here when the pattern has the non-ascii |
| 464 | + * characters we cannot case-fold, and asked to |
| 465 | + * ignore-case. |
| 466 | + */ |
| 467 | + compile_fixed_regexp(p, opt); |
| 468 | + return; |
419 | 469 | }
|
420 | 470 |
|
421 | 471 | if (opt->pcre) {
|
|
0 commit comments