Skip to content

Commit 966be95

Browse files
avargitster
authored andcommitted
grep: add tests to fix blind spots with \0 patterns
Address a big blind spot in the tests for patterns containing \0. The is_fixed() function considers any string that contains \0 fixed, even if it contains regular expression metacharacters, those patterns are currently matched with kwset. Before this change removing that memchr(s, 0, len) check from is_fixed() wouldn't change the result of any of the tests, since regcomp() will happily match the part before the \0. The kwset path is dependent on whether the the -i flag is on, and whether the pattern has any non-ASCII characters, but none of this was tested for. Signed-off-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 12fc32f commit 966be95

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

t/t7008-grep-binary.sh

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,18 @@ nul_match () {
2222
printf '$pattern' | q_to_nul >f &&
2323
test_must_fail git grep -f f $flags a
2424
"
25+
elif test "$matches" = T1
26+
then
27+
test_expect_failure "git grep -f f $flags '$pattern_human' a" "
28+
printf '$pattern' | q_to_nul >f &&
29+
git grep -f f $flags a
30+
"
31+
elif test "$matches" = T0
32+
then
33+
test_expect_failure "git grep -f f $flags '$pattern_human' a" "
34+
printf '$pattern' | q_to_nul >f &&
35+
test_must_fail git grep -f f $flags a
36+
"
2537
else
2638
test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false'
2739
fi
@@ -98,6 +110,65 @@ nul_match 1 '-Fi' 'YQf'
98110
nul_match 0 '-Fi' 'YQx'
99111
nul_match 1 '' 'yQf'
100112
nul_match 0 '' 'yQx'
113+
nul_match 1 '' 'æQð'
114+
nul_match 1 '-F' 'eQm[*]c'
115+
nul_match 1 '-Fi' 'EQM[*]C'
116+
117+
# Regex patterns that would match but shouldn't with -F
118+
nul_match 0 '-F' 'yQ[f]'
119+
nul_match 0 '-F' '[y]Qf'
120+
nul_match 0 '-Fi' 'YQ[F]'
121+
nul_match 0 '-Fi' '[Y]QF'
122+
nul_match 0 '-F' 'æQ[ð]'
123+
nul_match 0 '-F' '[æ]Qð'
124+
nul_match 0 '-Fi' 'ÆQ[Ð]'
125+
nul_match 0 '-Fi' '[Æ]QÐ'
126+
127+
# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0
128+
# patterns case-insensitively.
129+
nul_match T1 '-i' 'ÆQÐ'
130+
131+
# \0 implicitly disables regexes. This is an undocumented internal
132+
# limitation.
133+
nul_match T1 '' 'yQ[f]'
134+
nul_match T1 '' '[y]Qf'
135+
nul_match T1 '-i' 'YQ[F]'
136+
nul_match T1 '-i' '[Y]Qf'
137+
nul_match T1 '' 'æQ[ð]'
138+
nul_match T1 '' '[æ]Qð'
139+
nul_match T1 '-i' 'ÆQ[Ð]'
140+
141+
# ... because of \0 implicitly disabling regexes regexes that
142+
# should/shouldn't match don't do the right thing.
143+
nul_match T1 '' 'eQm.*cQ'
144+
nul_match T1 '-i' 'EQM.*cQ'
145+
nul_match T0 '' 'eQm[*]c'
146+
nul_match T0 '-i' 'EQM[*]C'
147+
148+
# Due to the REG_STARTEND extension when kwset() is disabled on -i &
149+
# non-ASCII the string will be matched in its entirety, but the
150+
# pattern will be cut off at the first \0.
151+
nul_match 0 '-i' 'NOMATCHQð'
152+
nul_match T0 '-i' '[Æ]QNOMATCH'
153+
nul_match T0 '-i' '[æ]QNOMATCH'
154+
# Matches, but for the wrong reasons, just stops at [æ]
155+
nul_match 1 '-i' '[Æ]Qð'
156+
nul_match 1 '-i' '[æ]Qð'
157+
158+
# Ensure that the matcher doesn't regress to something that stops at
159+
# \0
160+
nul_match 0 '-F' 'yQ[f]'
161+
nul_match 0 '-Fi' 'YQ[F]'
162+
nul_match 0 '' 'yQNOMATCH'
163+
nul_match 0 '' 'QNOMATCH'
164+
nul_match 0 '-i' 'YQNOMATCH'
165+
nul_match 0 '-i' 'QNOMATCH'
166+
nul_match 0 '-F' 'æQ[ð]'
167+
nul_match 0 '-Fi' 'ÆQ[Ð]'
168+
nul_match 0 '' 'yQNÓMATCH'
169+
nul_match 0 '' 'QNÓMATCH'
170+
nul_match 0 '-i' 'YQNÓMATCH'
171+
nul_match 0 '-i' 'QNÓMATCH'
101172

102173
test_expect_success 'grep respects binary diff attribute' '
103174
echo text >t &&

0 commit comments

Comments
 (0)