Skip to content

Commit f4cec40

Browse files
committed
Merge branch 'cb/t4210-illseq-auto-detect'
As FreeBSD is not the only platform whose regexp library reports a REG_ILLSEQ error when fed invalid UTF-8, add logic to detect that automatically and skip the affected tests. * cb/t4210-illseq-auto-detect: t4210: detect REG_ILLSEQ dynamically and skip affected tests t/helper: teach test-regex to report pattern errors (like REG_ILLSEQ)
2 parents c3a0282 + c4c2a96 commit f4cec40

File tree

3 files changed

+125
-52
lines changed

3 files changed

+125
-52
lines changed

t/helper/test-regex.c

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include "test-tool.h"
2-
#include "git-compat-util.h"
32
#include "gettext.h"
43

54
struct reg_flag {
@@ -8,12 +7,13 @@ struct reg_flag {
87
};
98

109
static struct reg_flag reg_flags[] = {
11-
{ "EXTENDED", REG_EXTENDED },
12-
{ "NEWLINE", REG_NEWLINE },
13-
{ "ICASE", REG_ICASE },
14-
{ "NOTBOL", REG_NOTBOL },
10+
{ "EXTENDED", REG_EXTENDED },
11+
{ "NEWLINE", REG_NEWLINE },
12+
{ "ICASE", REG_ICASE },
13+
{ "NOTBOL", REG_NOTBOL },
14+
{ "NOTEOL", REG_NOTEOL },
1515
#ifdef REG_STARTEND
16-
{ "STARTEND", REG_STARTEND },
16+
{ "STARTEND", REG_STARTEND },
1717
#endif
1818
{ NULL, 0 }
1919
};
@@ -41,36 +41,74 @@ int cmd__regex(int argc, const char **argv)
4141
{
4242
const char *pat;
4343
const char *str;
44-
int flags = 0;
44+
int ret, silent = 0, flags = 0;
4545
regex_t r;
4646
regmatch_t m[1];
47-
48-
if (argc == 2 && !strcmp(argv[1], "--bug"))
49-
return test_regex_bug();
50-
else if (argc < 3)
51-
usage("test-tool regex --bug\n"
52-
"test-tool regex <pattern> <string> [<options>]");
47+
char errbuf[64];
5348

5449
argv++;
55-
pat = *argv++;
56-
str = *argv++;
57-
while (*argv) {
58-
struct reg_flag *rf;
59-
for (rf = reg_flags; rf->name; rf++)
60-
if (!strcmp(*argv, rf->name)) {
61-
flags |= rf->flag;
62-
break;
63-
}
64-
if (!rf->name)
65-
die("do not recognize %s", *argv);
50+
argc--;
51+
52+
if (!argc)
53+
goto usage;
54+
55+
if (!strcmp(*argv, "--bug")) {
56+
if (argc == 1)
57+
return test_regex_bug();
58+
else
59+
goto usage;
60+
}
61+
if (!strcmp(*argv, "--silent")) {
62+
silent = 1;
6663
argv++;
64+
argc--;
65+
}
66+
if (!argc)
67+
goto usage;
68+
69+
pat = *argv++;
70+
if (argc == 1)
71+
str = NULL;
72+
else {
73+
str = *argv++;
74+
while (*argv) {
75+
struct reg_flag *rf;
76+
for (rf = reg_flags; rf->name; rf++)
77+
if (!strcmp(*argv, rf->name)) {
78+
flags |= rf->flag;
79+
break;
80+
}
81+
if (!rf->name)
82+
die("do not recognize flag %s", *argv);
83+
argv++;
84+
}
6785
}
6886
git_setup_gettext();
6987

70-
if (regcomp(&r, pat, flags))
71-
die("failed regcomp() for pattern '%s'", pat);
72-
if (regexec(&r, str, 1, m, 0))
73-
return 1;
88+
ret = regcomp(&r, pat, flags);
89+
if (ret) {
90+
if (silent)
91+
return ret;
92+
93+
regerror(ret, &r, errbuf, sizeof(errbuf));
94+
die("failed regcomp() for pattern '%s' (%s)", pat, errbuf);
95+
}
96+
if (!str)
97+
return 0;
98+
99+
ret = regexec(&r, str, 1, m, 0);
100+
if (ret) {
101+
if (silent || ret == REG_NOMATCH)
102+
return ret;
103+
104+
regerror(ret, &r, errbuf, sizeof(errbuf));
105+
die("failed regexec() for subject '%s' (%s)", str, errbuf);
106+
}
74107

75108
return 0;
109+
usage:
110+
usage("\ttest-tool regex --bug\n"
111+
"\ttest-tool regex [--silent] <pattern>\n"
112+
"\ttest-tool regex [--silent] <pattern> <string> [<options>]");
113+
return -1;
76114
}

t/t4210-log-i18n.sh

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@ latin1_e=$(printf '\351')
1010
# invalid UTF-8
1111
invalid_e=$(printf '\303\50)') # ")" at end to close opening "("
1212

13+
have_reg_illseq=
14+
if test_have_prereq GETTEXT_LOCALE &&
15+
! LC_ALL=$is_IS_locale test-tool regex --silent $latin1_e
16+
then
17+
have_reg_illseq=1
18+
fi
19+
1320
test_expect_success 'create commits in different encodings' '
1421
test_tick &&
1522
cat >msg <<-EOF &&
@@ -51,43 +58,77 @@ test_expect_success !MINGW 'log --grep does not find non-reencoded values (utf8)
5158
test_must_be_empty actual
5259
'
5360

54-
test_expect_success !MINGW 'log --grep does not find non-reencoded values (latin1)' '
61+
test_expect_success 'log --grep does not find non-reencoded values (latin1)' '
5562
git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual &&
5663
test_must_be_empty actual
5764
'
5865

66+
triggers_undefined_behaviour () {
67+
local engine=$1
68+
69+
case $engine in
70+
fixed)
71+
if test -n "$have_reg_illseq" &&
72+
! test_have_prereq LIBPCRE2
73+
then
74+
return 0
75+
fi
76+
;;
77+
basic|extended)
78+
if test -n "$have_reg_illseq"
79+
then
80+
return 0
81+
fi
82+
;;
83+
esac
84+
return 1
85+
}
86+
87+
mismatched_git_log () {
88+
local pattern=$1
89+
90+
LC_ALL=$is_IS_locale git log --encoding=ISO-8859-1 --format=%s \
91+
--grep=$pattern
92+
}
93+
5994
for engine in fixed basic extended perl
6095
do
6196
prereq=
6297
if test $engine = "perl"
6398
then
64-
prereq="PCRE"
65-
else
66-
prereq=""
99+
prereq=PCRE
67100
fi
68101
force_regex=
69102
if test $engine != "fixed"
70103
then
71-
force_regex=.*
104+
force_regex='.*'
72105
fi
73-
test_expect_success !MINGW,!REGEX_ILLSEQ,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" "
74-
cat >expect <<-\EOF &&
75-
latin1
76-
utf8
77-
EOF
78-
LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$latin1_e\" >actual &&
79-
test_cmp expect actual
80-
"
81106

82-
test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" "
83-
LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$utf8_e\" >actual &&
84-
test_must_be_empty actual
107+
test_expect_success $prereq "config grep.patternType=$engine" "
108+
git config grep.patternType $engine
85109
"
86110

87-
test_expect_success !MINGW,!REGEX_ILLSEQ,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" "
88-
LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$invalid_e\" >actual &&
111+
test_expect_success GETTEXT_LOCALE,$prereq "log --grep does not find non-reencoded values (latin1 + locale)" "
112+
mismatched_git_log '$force_regex$utf8_e' >actual &&
89113
test_must_be_empty actual
90114
"
115+
116+
if ! triggers_undefined_behaviour $engine
117+
then
118+
test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "log --grep searches in log output encoding (latin1 + locale)" "
119+
cat >expect <<-\EOF &&
120+
latin1
121+
utf8
122+
EOF
123+
mismatched_git_log '$force_regex$latin1_e' >actual &&
124+
test_cmp expect actual
125+
"
126+
127+
test_expect_success GETTEXT_LOCALE,$prereq "log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" "
128+
mismatched_git_log '$force_regex$invalid_e' >actual &&
129+
test_must_be_empty actual
130+
"
131+
fi
91132
done
92133

93134
test_done

t/test-lib.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,12 +1489,6 @@ case $uname_s in
14891489
test_set_prereq SED_STRIPS_CR
14901490
test_set_prereq GREP_STRIPS_CR
14911491
;;
1492-
FreeBSD)
1493-
test_set_prereq REGEX_ILLSEQ
1494-
test_set_prereq POSIXPERM
1495-
test_set_prereq BSLASHPSPEC
1496-
test_set_prereq EXECKEEPSPID
1497-
;;
14981492
*)
14991493
test_set_prereq POSIXPERM
15001494
test_set_prereq BSLASHPSPEC

0 commit comments

Comments
 (0)