Skip to content

Conversation

@SainoNamkho
Copy link

Fix #154408

@SainoNamkho SainoNamkho requested a review from a team as a code owner August 19, 2025 23:44
@github-actions
Copy link

Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this page.

If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using @ followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers.

If you have further questions, they may be answered by the LLVM GitHub User Guide.

You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums.

@llvmbot llvmbot added the libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. label Aug 19, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 19, 2025

@llvm/pr-subscribers-libcxx

Author: None (SainoNamkho)

Changes

Fix #154408


Full diff: https://github.com/llvm/llvm-project/pull/154442.diff

2 Files Affected:

  • (modified) libcxx/include/regex (+28-13)
  • (modified) libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp (+30)
diff --git a/libcxx/include/regex b/libcxx/include/regex
index 9bbc3a69021b9..ba69f5681a5d4 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1668,7 +1668,7 @@ void __end_marked_subexpression<_CharT>::__exec(__state& __s) const {
 
 // __back_ref
 
-template <class _CharT>
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
 class __back_ref : public __owns_one_state<_CharT> {
   typedef __owns_one_state<_CharT> base;
 
@@ -1682,8 +1682,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
 };
 
-template <class _CharT>
-void __back_ref<_CharT>::__exec(__state& __s) const {
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
+void __back_ref<_CharT, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
   if (__mexp_ > __s.__sub_matches_.size())
     std::__throw_regex_error<regex_constants::error_backref>();
   sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
@@ -1697,6 +1697,9 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
       __s.__do_   = __state::__reject;
       __s.__node_ = nullptr;
     }
+  } else if constexpr (_UnmatchedAlwaysSucceed) {
+    __s.__do_   = __state::__accept_but_not_consume;
+    __s.__node_ = this->first();;
   } else {
     __s.__do_   = __state::__reject;
     __s.__node_ = nullptr;
@@ -1705,7 +1708,7 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
 
 // __back_ref_icase
 
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
 class __back_ref_icase : public __owns_one_state<_CharT> {
   typedef __owns_one_state<_CharT> base;
 
@@ -1721,8 +1724,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
 };
 
-template <class _CharT, class _Traits>
-void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
   sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
   if (__sm.matched) {
     ptrdiff_t __len = __sm.second - __sm.first;
@@ -1739,6 +1742,11 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
       __s.__node_ = nullptr;
     }
   } else {
+    if constexpr (_UnmatchedAlwaysSucceed) {
+      __s.__do_   = __state::__accept_but_not_consume;
+      __s.__node_ = this->first();
+      return;
+    }
   __not_equal:
     __s.__do_   = __state::__reject;
     __s.__node_ = nullptr;
@@ -1747,7 +1755,7 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
 
 // __back_ref_collate
 
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
 class __back_ref_collate : public __owns_one_state<_CharT> {
   typedef __owns_one_state<_CharT> base;
 
@@ -1763,8 +1771,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
 };
 
-template <class _CharT, class _Traits>
-void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
   sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
   if (__sm.matched) {
     ptrdiff_t __len = __sm.second - __sm.first;
@@ -1781,6 +1789,11 @@ void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
       __s.__node_ = nullptr;
     }
   } else {
+    if constexpr (_UnmatchedAlwaysSucceed) {
+      __s.__do_   = __state::__accept_but_not_consume;
+      __s.__node_ = this->first();
+      return;
+    }
   __not_equal:
     __s.__do_   = __state::__reject;
     __s.__node_ = nullptr;
@@ -2565,6 +2578,7 @@ private:
                    bool __greedy       = true);
   __bracket_expression<_CharT, _Traits>* __start_matching_list(bool __negate);
   void __push_char(value_type __c);
+  template <bool _UnmatchedAlwaysSucceed = false>
   void __push_back_ref(int __i);
   void __push_alternation(__owns_one_state<_CharT>* __sa, __owns_one_state<_CharT>* __sb);
   void __push_begin_marked_subexpression();
@@ -3807,7 +3821,7 @@ basic_regex<_CharT, _Traits>::__parse_decimal_escape(_ForwardIterator __first, _
       }
       if (__v == 0 || __v > mark_count())
         std::__throw_regex_error<regex_constants::error_backref>();
-      __push_back_ref(__v);
+      __push_back_ref<true>(__v);
     }
   }
   return __first;
@@ -4149,13 +4163,14 @@ void basic_regex<_CharT, _Traits>::__push_word_boundary(bool __invert) {
 }
 
 template <class _CharT, class _Traits>
+template <bool _UnmatchedAlwaysSucceed>
 void basic_regex<_CharT, _Traits>::__push_back_ref(int __i) {
   if (flags() & icase)
-    __end_->first() = new __back_ref_icase<_CharT, _Traits>(__traits_, __i, __end_->first());
+    __end_->first() = new __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
   else if (flags() & collate)
-    __end_->first() = new __back_ref_collate<_CharT, _Traits>(__traits_, __i, __end_->first());
+    __end_->first() = new __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
   else
-    __end_->first() = new __back_ref<_CharT>(__i, __end_->first());
+    __end_->first() = new __back_ref<_CharT, _UnmatchedAlwaysSucceed>(__i, __end_->first());
   __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
 }
 
diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
index 518c27e424484..a6a1a9cf87c24 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
@@ -762,6 +762,21 @@ int main(int, char**)
         assert(m.position(0) == 0);
         assert(m.str(0) == s);
     }
+    {
+        std::cmatch m;
+        const char s[] = "a";
+        assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
+        assert(m.size() == 3);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == s + std::char_traits<char>::length(s));
+        assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+    }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
     {
@@ -1503,6 +1518,21 @@ int main(int, char**)
         assert(m.position(0) == 0);
         assert(m.str(0) == s);
     }
+    {
+        std::wcmatch m;
+        const wchar_t s[] = L"a";
+        assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
+        assert(m.size() == 3);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
+        assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+    }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
   return 0;

@SainoNamkho SainoNamkho changed the title [libc++] <regex>: Make unmatched backrefs should always succeed in ECMAScript mode [libc++] <regex>: Make unmatched backrefs always succeed in ECMAScript mode Aug 19, 2025
@github-actions
Copy link

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff HEAD~1 HEAD --extensions ,cpp -- libcxx/include/regex libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
View the diff from clang-format here.
diff --git a/libcxx/include/regex b/libcxx/include/regex
index ba69f5681..205ccd625 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1699,7 +1699,8 @@ void __back_ref<_CharT, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
     }
   } else if constexpr (_UnmatchedAlwaysSucceed) {
     __s.__do_   = __state::__accept_but_not_consume;
-    __s.__node_ = this->first();;
+    __s.__node_ = this->first();
+    ;
   } else {
     __s.__do_   = __state::__reject;
     __s.__node_ = nullptr;
diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
index a6a1a9cf8..89273acc1 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
@@ -763,19 +763,19 @@ int main(int, char**)
         assert(m.str(0) == s);
     }
     {
-        std::cmatch m;
-        const char s[] = "a";
-        assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
-        assert(m.size() == 3);
-        assert(!m.prefix().matched);
-        assert(m.prefix().first == s);
-        assert(m.prefix().second == m[0].first);
-        assert(!m.suffix().matched);
-        assert(m.suffix().first == m[0].second);
-        assert(m.suffix().second == s + std::char_traits<char>::length(s));
-        assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
-        assert(m.position(0) == 0);
-        assert(m.str(0) == s);
+      std::cmatch m;
+      const char s[] = "a";
+      assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
+      assert(m.size() == 3);
+      assert(!m.prefix().matched);
+      assert(m.prefix().first == s);
+      assert(m.prefix().second == m[0].first);
+      assert(!m.suffix().matched);
+      assert(m.suffix().first == m[0].second);
+      assert(m.suffix().second == s + std::char_traits<char>::length(s));
+      assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
+      assert(m.position(0) == 0);
+      assert(m.str(0) == s);
     }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
@@ -1519,19 +1519,19 @@ int main(int, char**)
         assert(m.str(0) == s);
     }
     {
-        std::wcmatch m;
-        const wchar_t s[] = L"a";
-        assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
-        assert(m.size() == 3);
-        assert(!m.prefix().matched);
-        assert(m.prefix().first == s);
-        assert(m.prefix().second == m[0].first);
-        assert(!m.suffix().matched);
-        assert(m.suffix().first == m[0].second);
-        assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
-        assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
-        assert(m.position(0) == 0);
-        assert(m.str(0) == s);
+      std::wcmatch m;
+      const wchar_t s[] = L"a";
+      assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
+      assert(m.size() == 3);
+      assert(!m.prefix().matched);
+      assert(m.prefix().first == s);
+      assert(m.prefix().second == m[0].first);
+      assert(!m.suffix().matched);
+      assert(m.suffix().first == m[0].second);
+      assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
+      assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+      assert(m.position(0) == 0);
+      assert(m.str(0) == s);
     }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 

}
} else if constexpr (_UnmatchedAlwaysSucceed) {
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Repeated semicolon. (clang-format didn't suggest to remove it, but it's better to do so.)

Suggested change
__s.__node_ = this->first();;
__s.__node_ = this->first();

{
std::cmatch m;
const char s[] = "a";
assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test also runs in C++03 mode. Ditto below.

Suggested change
assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
assert(std::regex_search(s, m, std::regex("(a()|)\\2a")));

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[libc++] <regex>: Unmatched backrefs should always succeed in ECMAScript mode.

3 participants