-
Notifications
You must be signed in to change notification settings - Fork 15k
[libc++] <regex>: Make unmatched backrefs always succeed in ECMAScript mode #154442
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…MAScript mode. Fix llvm#154408
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-libcxx Author: None (SainoNamkho) ChangesFix #154408 Full diff: https://github.com/llvm/llvm-project/pull/154442.diff 2 Files Affected:
diff --git a/libcxx/include/regex b/libcxx/include/regex
index 9bbc3a69021b9..ba69f5681a5d4 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1668,7 +1668,7 @@ void __end_marked_subexpression<_CharT>::__exec(__state& __s) const {
// __back_ref
-template <class _CharT>
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
class __back_ref : public __owns_one_state<_CharT> {
typedef __owns_one_state<_CharT> base;
@@ -1682,8 +1682,8 @@ public:
_LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
};
-template <class _CharT>
-void __back_ref<_CharT>::__exec(__state& __s) const {
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
+void __back_ref<_CharT, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
if (__mexp_ > __s.__sub_matches_.size())
std::__throw_regex_error<regex_constants::error_backref>();
sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
@@ -1697,6 +1697,9 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
}
+ } else if constexpr (_UnmatchedAlwaysSucceed) {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();;
} else {
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
@@ -1705,7 +1708,7 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
// __back_ref_icase
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
class __back_ref_icase : public __owns_one_state<_CharT> {
typedef __owns_one_state<_CharT> base;
@@ -1721,8 +1724,8 @@ public:
_LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
};
-template <class _CharT, class _Traits>
-void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
if (__sm.matched) {
ptrdiff_t __len = __sm.second - __sm.first;
@@ -1739,6 +1742,11 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
__s.__node_ = nullptr;
}
} else {
+ if constexpr (_UnmatchedAlwaysSucceed) {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();
+ return;
+ }
__not_equal:
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
@@ -1747,7 +1755,7 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
// __back_ref_collate
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
class __back_ref_collate : public __owns_one_state<_CharT> {
typedef __owns_one_state<_CharT> base;
@@ -1763,8 +1771,8 @@ public:
_LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
};
-template <class _CharT, class _Traits>
-void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
if (__sm.matched) {
ptrdiff_t __len = __sm.second - __sm.first;
@@ -1781,6 +1789,11 @@ void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
__s.__node_ = nullptr;
}
} else {
+ if constexpr (_UnmatchedAlwaysSucceed) {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();
+ return;
+ }
__not_equal:
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
@@ -2565,6 +2578,7 @@ private:
bool __greedy = true);
__bracket_expression<_CharT, _Traits>* __start_matching_list(bool __negate);
void __push_char(value_type __c);
+ template <bool _UnmatchedAlwaysSucceed = false>
void __push_back_ref(int __i);
void __push_alternation(__owns_one_state<_CharT>* __sa, __owns_one_state<_CharT>* __sb);
void __push_begin_marked_subexpression();
@@ -3807,7 +3821,7 @@ basic_regex<_CharT, _Traits>::__parse_decimal_escape(_ForwardIterator __first, _
}
if (__v == 0 || __v > mark_count())
std::__throw_regex_error<regex_constants::error_backref>();
- __push_back_ref(__v);
+ __push_back_ref<true>(__v);
}
}
return __first;
@@ -4149,13 +4163,14 @@ void basic_regex<_CharT, _Traits>::__push_word_boundary(bool __invert) {
}
template <class _CharT, class _Traits>
+template <bool _UnmatchedAlwaysSucceed>
void basic_regex<_CharT, _Traits>::__push_back_ref(int __i) {
if (flags() & icase)
- __end_->first() = new __back_ref_icase<_CharT, _Traits>(__traits_, __i, __end_->first());
+ __end_->first() = new __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
else if (flags() & collate)
- __end_->first() = new __back_ref_collate<_CharT, _Traits>(__traits_, __i, __end_->first());
+ __end_->first() = new __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
else
- __end_->first() = new __back_ref<_CharT>(__i, __end_->first());
+ __end_->first() = new __back_ref<_CharT, _UnmatchedAlwaysSucceed>(__i, __end_->first());
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
index 518c27e424484..a6a1a9cf87c24 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
@@ -762,6 +762,21 @@ int main(int, char**)
assert(m.position(0) == 0);
assert(m.str(0) == s);
}
+ {
+ std::cmatch m;
+ const char s[] = "a";
+ assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
+ assert(m.size() == 3);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s + std::char_traits<char>::length(s));
+ assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
+ }
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
@@ -1503,6 +1518,21 @@ int main(int, char**)
assert(m.position(0) == 0);
assert(m.str(0) == s);
}
+ {
+ std::wcmatch m;
+ const wchar_t s[] = L"a";
+ assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
+ assert(m.size() == 3);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
+ assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
+ }
#endif // TEST_HAS_NO_WIDE_CHARACTERS
return 0;
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions ,cpp -- libcxx/include/regex libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp View the diff from clang-format here.diff --git a/libcxx/include/regex b/libcxx/include/regex
index ba69f5681..205ccd625 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1699,7 +1699,8 @@ void __back_ref<_CharT, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
}
} else if constexpr (_UnmatchedAlwaysSucceed) {
__s.__do_ = __state::__accept_but_not_consume;
- __s.__node_ = this->first();;
+ __s.__node_ = this->first();
+ ;
} else {
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
index a6a1a9cf8..89273acc1 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
@@ -763,19 +763,19 @@ int main(int, char**)
assert(m.str(0) == s);
}
{
- std::cmatch m;
- const char s[] = "a";
- assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
- assert(m.size() == 3);
- assert(!m.prefix().matched);
- assert(m.prefix().first == s);
- assert(m.prefix().second == m[0].first);
- assert(!m.suffix().matched);
- assert(m.suffix().first == m[0].second);
- assert(m.suffix().second == s + std::char_traits<char>::length(s));
- assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
- assert(m.position(0) == 0);
- assert(m.str(0) == s);
+ std::cmatch m;
+ const char s[] = "a";
+ assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
+ assert(m.size() == 3);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s + std::char_traits<char>::length(s));
+ assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
}
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
@@ -1519,19 +1519,19 @@ int main(int, char**)
assert(m.str(0) == s);
}
{
- std::wcmatch m;
- const wchar_t s[] = L"a";
- assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
- assert(m.size() == 3);
- assert(!m.prefix().matched);
- assert(m.prefix().first == s);
- assert(m.prefix().second == m[0].first);
- assert(!m.suffix().matched);
- assert(m.suffix().first == m[0].second);
- assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
- assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
- assert(m.position(0) == 0);
- assert(m.str(0) == s);
+ std::wcmatch m;
+ const wchar_t s[] = L"a";
+ assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
+ assert(m.size() == 3);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
+ assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
}
#endif // TEST_HAS_NO_WIDE_CHARACTERS
|
} | ||
} else if constexpr (_UnmatchedAlwaysSucceed) { | ||
__s.__do_ = __state::__accept_but_not_consume; | ||
__s.__node_ = this->first();; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Repeated semicolon. (clang-format didn't suggest to remove it, but it's better to do so.)
__s.__node_ = this->first();; | |
__s.__node_ = this->first(); |
{ | ||
std::cmatch m; | ||
const char s[] = "a"; | ||
assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"})); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test also runs in C++03 mode. Ditto below.
assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"})); | |
assert(std::regex_search(s, m, std::regex("(a()|)\\2a"))); |
Fix #154408