Skip to content

Commit 198379c

Browse files
committed
Add some additional tests that I found lying around.
1 parent 630b1cd commit 198379c

File tree

4 files changed

+231
-0
lines changed

4 files changed

+231
-0
lines changed

testdata/testinput1

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6557,4 +6557,95 @@ ef) x/x,mark
65576557
/A{ 3, }/
65586558
BBAAAAAACC
65596559

6560+
# This pattern validates regular expression patterns. The original that I was
6561+
# sent was this:
6562+
# /^((?:(?:[^?+*{}()[\]\\|]+|\\.|\[(?:\^?\\.|\^[^\\]|[^\\^])(?:[^\]\\]+|\\.)*\]|\((?:\?[:=!]|\?<[=!]|\?>)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/
6563+
# This is not very readable, and also does not handle all features. I have done
6564+
# some work on it.
6565+
6566+
/^
6567+
(?<re>
6568+
# A regular expression is zero or more of these items.
6569+
(?:
6570+
# An item is one of these:
6571+
(?:
6572+
[^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted .
6573+
\\.| # Quoted .
6574+
6575+
\[ # Class, which is [
6576+
(?: # Followed by
6577+
\^?\\.| # Optional ^ and any escaped character
6578+
\^[^\\]| # OR ^ and not escaped character
6579+
[^\\^] # OR neither ^ nor \
6580+
) # Followed by
6581+
(?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot
6582+
\]| # Class ends with ]
6583+
6584+
\( # Parenthesized group
6585+
(?: # Start with optional
6586+
\?[:=!]| # ? followed by : = !
6587+
\?<[=!]| # OR ?< followed by = or !
6588+
\?> # OR ?>
6589+
)?
6590+
(?&re)?? # Then a nested <re>
6591+
\)| # End parenthesized group
6592+
6593+
\(\? # Other parenthesized items
6594+
(?: # (? followed by
6595+
R| # R
6596+
[+-]?\d++ # Or optional +- and digits
6597+
)
6598+
\)| # End parens
6599+
6600+
\(\* # Verbs
6601+
(?:
6602+
COMMIT|
6603+
FAIL|
6604+
MARK:[^)]*|
6605+
(?:PRUNE|SKIP|THEN)(?::[^\)]*+)?
6606+
)
6607+
\)
6608+
) # End list of items
6609+
6610+
# Followed by an optional quantifier
6611+
6612+
(?:
6613+
(?:
6614+
[?+*] # ?+*
6615+
| # OR
6616+
\{\d+ # { digits
6617+
(?:,\d*)? # optionally followed by ,digits
6618+
\} # then closing }
6619+
| # OR
6620+
\{,\d+} # {,digits}
6621+
)
6622+
[?+]? # optional ungreedy or possessive
6623+
)?
6624+
6625+
| # OR an "item" is a branch ending
6626+
6627+
\|
6628+
6629+
)* # Zero or more top-level items.
6630+
) # End regex group.
6631+
$/x
6632+
[abcdef]
6633+
[abc\\]def]
6634+
a.b|abcd
6635+
ab()d
6636+
ab{1,3}d
6637+
ab{,3}d
6638+
ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
6639+
ab(*MARK:xyz)
6640+
(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s])
6641+
abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz
6642+
a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
6643+
\\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2
6644+
\= Expect no match
6645+
ab)d
6646+
ab(d
6647+
{4,5}
6648+
a[]b
6649+
(a)(?(1)a|b|c)
6650+
65606651
# End of testinput1

testdata/testinput2

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6059,4 +6059,11 @@ a)"xI
60596059
a
60606060
a\=noteol
60616061

6062+
# This matches a character that only exists once in the subject, sort of like a
6063+
# hypothetical "(.)(?<!\1.+)(?!.*\1)". That has unlimited variable length
6064+
# lookbehind, so is invalid. This pattern doesn't work in Perl 5.38.0.
6065+
6066+
/\G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2/g
6067+
aaabcccdeee
6068+
60626069
# End of testinput2

testdata/testoutput1

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10375,4 +10375,124 @@ No match
1037510375
BBAAAAAACC
1037610376
0: AAAAAA
1037710377

10378+
# This pattern validates regular expression patterns. The original that I was
10379+
# sent was this:
10380+
# /^((?:(?:[^?+*{}()[\]\\|]+|\\.|\[(?:\^?\\.|\^[^\\]|[^\\^])(?:[^\]\\]+|\\.)*\]|\((?:\?[:=!]|\?<[=!]|\?>)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/
10381+
# This is not very readable, and also does not handle all features. I have done
10382+
# some work on it.
10383+
10384+
/^
10385+
(?<re>
10386+
# A regular expression is zero or more of these items.
10387+
(?:
10388+
# An item is one of these:
10389+
(?:
10390+
[^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted .
10391+
\\.| # Quoted .
10392+
10393+
\[ # Class, which is [
10394+
(?: # Followed by
10395+
\^?\\.| # Optional ^ and any escaped character
10396+
\^[^\\]| # OR ^ and not escaped character
10397+
[^\\^] # OR neither ^ nor \
10398+
) # Followed by
10399+
(?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot
10400+
\]| # Class ends with ]
10401+
10402+
\( # Parenthesized group
10403+
(?: # Start with optional
10404+
\?[:=!]| # ? followed by : = !
10405+
\?<[=!]| # OR ?< followed by = or !
10406+
\?> # OR ?>
10407+
)?
10408+
(?&re)?? # Then a nested <re>
10409+
\)| # End parenthesized group
10410+
10411+
\(\? # Other parenthesized items
10412+
(?: # (? followed by
10413+
R| # R
10414+
[+-]?\d++ # Or optional +- and digits
10415+
)
10416+
\)| # End parens
10417+
10418+
\(\* # Verbs
10419+
(?:
10420+
COMMIT|
10421+
FAIL|
10422+
MARK:[^)]*|
10423+
(?:PRUNE|SKIP|THEN)(?::[^\)]*+)?
10424+
)
10425+
\)
10426+
) # End list of items
10427+
10428+
# Followed by an optional quantifier
10429+
10430+
(?:
10431+
(?:
10432+
[?+*] # ?+*
10433+
| # OR
10434+
\{\d+ # { digits
10435+
(?:,\d*)? # optionally followed by ,digits
10436+
\} # then closing }
10437+
| # OR
10438+
\{,\d+} # {,digits}
10439+
)
10440+
[?+]? # optional ungreedy or possessive
10441+
)?
10442+
10443+
| # OR an "item" is a branch ending
10444+
10445+
\|
10446+
10447+
)* # Zero or more top-level items.
10448+
) # End regex group.
10449+
$/x
10450+
[abcdef]
10451+
0: [abcdef]
10452+
1: [abcdef]
10453+
[abc\\]def]
10454+
0: [abc\]def]
10455+
1: [abc\]def]
10456+
a.b|abcd
10457+
0: a.b|abcd
10458+
1: a.b|abcd
10459+
ab()d
10460+
0: ab()d
10461+
1: ab()d
10462+
ab{1,3}d
10463+
0: ab{1,3}d
10464+
1: ab{1,3}d
10465+
ab{,3}d
10466+
0: ab{,3}d
10467+
1: ab{,3}d
10468+
ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
10469+
0: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
10470+
1: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
10471+
ab(*MARK:xyz)
10472+
0: ab(*MARK:xyz)
10473+
1: ab(*MARK:xyz)
10474+
(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s])
10475+
0: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])
10476+
1: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])
10477+
abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz
10478+
0: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz
10479+
1: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz
10480+
a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
10481+
0: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
10482+
1: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
10483+
\\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2
10484+
0: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2
10485+
1: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2
10486+
\= Expect no match
10487+
ab)d
10488+
No match
10489+
ab(d
10490+
No match
10491+
{4,5}
10492+
No match
10493+
a[]b
10494+
No match
10495+
(a)(?(1)a|b|c)
10496+
No match
10497+
1037810498
# End of testinput1

testdata/testoutput2

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17952,6 +17952,19 @@ No match
1795217952
a\=noteol
1795317953
0: a
1795417954

17955+
# This matches a character that only exists once in the subject, sort of like a
17956+
# hypothetical "(.)(?<!\1.+)(?!.*\1)". That has unlimited variable length
17957+
# lookbehind, so is invalid. This pattern doesn't work in Perl 5.38.0.
17958+
17959+
/\G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2/g
17960+
aaabcccdeee
17961+
0: b
17962+
1: aaa
17963+
2: b
17964+
0: d
17965+
1: ccc
17966+
2: d
17967+
1795517968
# End of testinput2
1795617969
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
1795717970
Error -62: bad serialized data

0 commit comments

Comments
 (0)