Skip to content

Commit b932cf8

Browse files
<regex>: Repair character class escapes outside character class definitions (#5160)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent e3e65be commit b932cf8

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

stl/inc/regex

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,7 +1710,7 @@ private:
17101710
bool _OctalDigits();
17111711
void _Do_ex_class(_Meta_type);
17121712
bool _CharacterClassEscape(bool);
1713-
_Prs_ret _ClassEscape(bool);
1713+
_Prs_ret _ClassEscape2();
17141714
_Prs_ret _ClassAtom();
17151715
void _ClassRanges();
17161716
void _CharacterClass();
@@ -4017,22 +4017,29 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { //
40174017
return false;
40184018
}
40194019

4020+
const bool _Negated = _Traits.isctype(_Char, _RxTraits::_Ch_upper);
40204021
if (_Addit) {
40214022
_Nfa._Add_class();
4023+
// GH-992: Outside character class definitions, _Cls completely defines the character class
4024+
// so negating _Cls and negating the entire character class are equivalent.
4025+
// Since the former negation is defective, do the latter instead.
4026+
if (_Negated) {
4027+
_Nfa._Negate();
4028+
}
40224029
}
40234030

4024-
_Nfa._Add_named_class(_Cls, _Traits.isctype(_Char, _RxTraits::_Ch_upper));
4031+
_Nfa._Add_named_class(_Cls, _Negated && !_Addit);
40254032
_Next();
40264033
return true;
40274034
}
40284035

40294036
template <class _FwdIt, class _Elem, class _RxTraits>
4030-
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape(bool _Addit) { // check for class escape
4037+
_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class escape
40314038
if ((_L_flags & _L_esc_bsl) && _Char == _Esc_bsl) { // handle escape backslash if allowed
40324039
_Val = _Esc_bsl;
40334040
_Next();
40344041
return _Prs_chr;
4035-
} else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(_Addit)) {
4042+
} else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(false)) {
40364043
return _Prs_set;
40374044
} else if (_DecimalDigits(regex_constants::error_escape)) { // check for invalid value
40384045
if (_Val != 0) {
@@ -4049,7 +4056,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class at
40494056
if (_Mchar == _Meta_esc) { // check for valid escape sequence
40504057
_Next();
40514058
if (_L_flags & _L_grp_esc) {
4052-
return _ClassEscape(false);
4059+
return _ClassEscape2();
40534060
} else if ((_L_flags & _L_esc_ffn && _Do_ffn(_Char))
40544061
|| (_L_flags & _L_esc_ffnx && _Do_ffnx(_Char))) { // advance to next character
40554062
_Next();

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,16 @@ void test_gh_5058() {
659659
}
660660
}
661661

662+
void test_gh_5160() {
663+
// GH-5160 fixed mishandled negated character class escapes
664+
// outside character class definitions
665+
const test_wregex neg_regex(&g_regexTester, LR"(Y\S*Z)");
666+
neg_regex.should_search_match(L"xxxYxx\x0078xxxZxxx", L"Yxx\x0078xxxZ"); // U+0078 LATIN SMALL LETTER X
667+
neg_regex.should_search_match(L"xxxYxx\x03C7xxxZxxx", L"Yxx\x03C7xxxZ"); // U+03C7 GREEK SMALL LETTER CHI
668+
neg_regex.should_search_fail(L"xxxYxx xxxZxxx");
669+
neg_regex.should_search_fail(L"xxxYxx\x2009xxxZxxx"); // U+2009 THIN SPACE
670+
}
671+
662672
int main() {
663673
test_dev10_449367_case_insensitivity_should_work();
664674
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
@@ -688,6 +698,7 @@ int main() {
688698
test_gh_993();
689699
test_gh_4995();
690700
test_gh_5058();
701+
test_gh_5160();
691702

692703
return g_regexTester.result();
693704
}

0 commit comments

Comments
 (0)