Skip to content

Commit e3e65be

Browse files
<regex>: Always reject character ranges with set limits (#5158)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 79137b6 commit e3e65be

File tree

4 files changed

+65
-32
lines changed

4 files changed

+65
-32
lines changed

stl/inc/regex

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4086,37 +4086,38 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas
40864086
return;
40874087
}
40884088

4089-
if (_Ret != _Prs_set) {
4090-
if (_Val == 0 && !(_L_flags & _L_bzr_chr)) {
4091-
_Error(regex_constants::error_escape);
4092-
}
4093-
4094-
if (_Mchar == _Meta_dash) { // check for valid range
4095-
_Next();
4096-
_Elem _Chr1 = static_cast<_Elem>(_Val);
4097-
if ((_Ret = _ClassAtom()) == _Prs_none) { // treat - as ordinary character
4098-
_Nfa._Add_char_to_class(static_cast<_Elem>(_Val));
4099-
_Nfa._Add_char_to_class(_Meta_dash);
4100-
return;
4101-
}
4089+
if (_Ret == _Prs_chr && _Val == 0 && !(_L_flags & _L_bzr_chr)) {
4090+
_Error(regex_constants::error_escape);
4091+
}
41024092

4103-
if (_Ret == _Prs_set) {
4104-
_Error(regex_constants::error_range); // set follows dash
4093+
if (_Mchar == _Meta_dash) { // check for valid range
4094+
_Next();
4095+
_Elem _Chr1 = static_cast<_Elem>(_Val);
4096+
const bool _Set_preceding = _Ret == _Prs_set;
4097+
if ((_Ret = _ClassAtom()) == _Prs_none) { // treat - as ordinary character
4098+
if (!_Set_preceding) {
4099+
_Nfa._Add_char_to_class(_Chr1);
41054100
}
4101+
_Nfa._Add_char_to_class(_Meta_dash);
4102+
return;
4103+
}
41064104

4107-
if (_Flags & regex_constants::collate) { // translate ends of range
4108-
_Val = _Traits.translate(static_cast<_Elem>(_Val));
4109-
_Chr1 = _Traits.translate(_Chr1);
4110-
}
4105+
if (_Set_preceding || _Ret == _Prs_set) {
4106+
_Error(regex_constants::error_range); // set precedes or follows dash
4107+
}
41114108

4112-
if (static_cast<typename _RxTraits::_Uelem>(_Val) < static_cast<typename _RxTraits::_Uelem>(_Chr1)) {
4113-
_Error(regex_constants::error_range);
4114-
}
4109+
if (_Flags & regex_constants::collate) { // translate ends of range
4110+
_Val = _Traits.translate(static_cast<_Elem>(_Val));
4111+
_Chr1 = _Traits.translate(_Chr1);
4112+
}
41154113

4116-
_Nfa._Add_range(_Chr1, static_cast<_Elem>(_Val));
4117-
} else {
4118-
_Nfa._Add_char_to_class(static_cast<_Elem>(_Val));
4114+
if (static_cast<typename _RxTraits::_Uelem>(_Val) < static_cast<typename _RxTraits::_Uelem>(_Chr1)) {
4115+
_Error(regex_constants::error_range);
41194116
}
4117+
4118+
_Nfa._Add_range(_Chr1, static_cast<_Elem>(_Val));
4119+
} else if (_Ret == _Prs_chr) {
4120+
_Nfa._Add_char_to_class(static_cast<_Elem>(_Val));
41204121
}
41214122
}
41224123
}

tests/libcxx/expected_results.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,6 @@ std/re/re.alg/re.alg.search/no_update_pos.pass.cpp FAIL
561561
std/re/re.const/re.synopt/syntax_option_type.pass.cpp FAIL
562562
std/re/re.regex/re.regex.construct/bad_backref.pass.cpp FAIL
563563
std/re/re.regex/re.regex.construct/bad_escape.pass.cpp FAIL
564-
std/re/re.regex/re.regex.construct/bad_range.pass.cpp FAIL
565564
std/re/re.regex/re.regex.construct/default.pass.cpp FAIL
566565
std/re/re.regex/re.regex.nonmemb/re.regex.nmswap/swap.pass.cpp FAIL
567566
std/re/re.regex/re.regex.swap/swap.pass.cpp FAIL

tests/std/include/test_regex_support.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,18 +160,20 @@ class regex_fixture {
160160
}
161161
}
162162

163-
void should_throw(const std::string& pattern, const std::regex_constants::error_type expectedCode) {
163+
void should_throw(const std::string& pattern, const std::regex_constants::error_type expectedCode,
164+
const std::regex_constants::syntax_option_type syntax = std::regex_constants::ECMAScript) {
164165
try {
165-
const std::regex r(pattern);
166-
printf(R"(regex r("%s") succeeded (which is bad).)"
166+
const std::regex r(pattern, syntax);
167+
printf(R"(regex r("%s", 0x%X) succeeded (which is bad).)"
167168
"\n",
168-
pattern.c_str());
169+
pattern.c_str(), static_cast<unsigned int>(syntax));
169170
fail_regex();
170171
} catch (const std::regex_error& e) {
171172
if (e.code() != expectedCode) {
172-
printf(R"(regex r("%s") threw 0x%X; expected 0x%X)"
173+
printf(R"(regex r("%s", 0x%X) threw 0x%X; expected 0x%X)"
173174
"\n",
174-
pattern.c_str(), static_cast<unsigned int>(e.code()), static_cast<unsigned int>(expectedCode));
175+
pattern.c_str(), static_cast<unsigned int>(syntax), static_cast<unsigned int>(e.code()),
176+
static_cast<unsigned int>(expectedCode));
175177
fail_regex();
176178
}
177179
}

tests/std/tests/VSO_0000000_regex_use/test.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,36 @@ void test_gh_993() {
582582
}
583583
}
584584

585+
void test_gh_4995() {
586+
// GH-4995: R"([\d-e])" should be rejected
587+
g_regexTester.should_throw(R"([\d-e])", error_range);
588+
g_regexTester.should_throw(R"([e-\d])", error_range);
589+
g_regexTester.should_throw(R"([\w-\d])", error_range);
590+
g_regexTester.should_throw("[[:digit:]-e]", error_range);
591+
g_regexTester.should_throw("[e-[:digit:]]", error_range);
592+
g_regexTester.should_throw("[[:alpha:]-[:digit:]]", error_range);
593+
g_regexTester.should_throw("[[=a=]-e]", error_range, ECMAScript | regex::collate);
594+
g_regexTester.should_throw("[e-[=a=]]", error_range, ECMAScript | regex::collate);
595+
g_regexTester.should_throw("[[=a=]-[=b=]]", error_range, ECMAScript | regex::collate);
596+
597+
// Test valid cases:
598+
g_regexTester.should_not_match("b", R"([\d-])");
599+
g_regexTester.should_match("5", R"([\d-])");
600+
g_regexTester.should_match("-", R"([\d-])");
601+
602+
g_regexTester.should_not_match("b", R"([-\d])");
603+
g_regexTester.should_match("5", R"([-\d])");
604+
g_regexTester.should_match("-", R"([-\d])");
605+
606+
g_regexTester.should_match("b", R"([a-c\d])");
607+
g_regexTester.should_match("5", R"([a-c\d])");
608+
g_regexTester.should_not_match("-", R"([a-c\d])");
609+
610+
g_regexTester.should_match("b", R"([\da-c])");
611+
g_regexTester.should_match("5", R"([\da-c])");
612+
g_regexTester.should_not_match("-", R"([\da-c])");
613+
}
614+
585615
void test_gh_5058() {
586616
// GH-5058 "<regex>: Small cleanups" changed some default constructors to be defaulted.
587617
// Verify that <regex> types are still const-default-constructible (N4993 [dcl.init.general]/8).
@@ -656,6 +686,7 @@ int main() {
656686
test_VSO_225160_match_eol_flag();
657687
test_VSO_226914_word_boundaries();
658688
test_gh_993();
689+
test_gh_4995();
659690
test_gh_5058();
660691

661692
return g_regexTester.result();

0 commit comments

Comments
 (0)