|
28 | 28 | #include <stdlib.h>
|
29 | 29 | #include <stdio.h>
|
30 | 30 | #include <string.h>
|
| 31 | +#include <iterator> |
31 | 32 |
|
32 | 33 | #include "unicode/localpointer.h"
|
33 | 34 | #include "unicode/regex.h"
|
@@ -107,6 +108,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
107 | 108 | TESTCASE_AUTO(TestBug13632);
|
108 | 109 | TESTCASE_AUTO(TestBug20359);
|
109 | 110 | TESTCASE_AUTO(TestBug20863);
|
| 111 | + TESTCASE_AUTO(TestBug23143); |
110 | 112 | TESTCASE_AUTO_END;
|
111 | 113 | }
|
112 | 114 |
|
@@ -5815,6 +5817,32 @@ void RegexTest::TestBug20359() {
|
5815 | 5817 | }
|
5816 | 5818 |
|
5817 | 5819 |
|
| 5820 | +void RegexTest::TestBug23143() { |
| 5821 | + // Test pattern with unpaired surrogate matching against text |
| 5822 | + // with a valid surrogate pair. Originally caused an assertion failure |
| 5823 | + // in the implementation. |
| 5824 | + |
| 5825 | + // Note: can't use normal C++ string literals because unpaired surrogates are illegal in them. |
| 5826 | + const char16_t regex_array[] = {u'a', 0xD805, u'.', u'*', u'b'}; |
| 5827 | + UnicodeString regex(regex_array, std::size(regex_array)); |
| 5828 | + |
| 5829 | + const char16_t haystack_array[] = {u'a', 0xD805, 0xDF20}; |
| 5830 | + UnicodeString haystack(haystack_array, std::size(haystack_array)); |
| 5831 | + |
| 5832 | + UErrorCode status = U_ZERO_ERROR; |
| 5833 | + std::unique_ptr<icu::RegexPattern> re(icu::RegexPattern::compile(regex, 0, status)); |
| 5834 | + if (!assertSuccess(WHERE, status)) { |
| 5835 | + return; |
| 5836 | + } |
| 5837 | + // re->dumpPattern(); |
| 5838 | + std::unique_ptr<icu::RegexMatcher> regex_matcher(re->matcher(haystack, status)); |
| 5839 | + if (!assertSuccess(WHERE, status)) { |
| 5840 | + return; |
| 5841 | + } |
| 5842 | + assertFalse(WHERE, regex_matcher->find(0, status)); |
| 5843 | + assertSuccess(WHERE, status); |
| 5844 | +} |
| 5845 | + |
5818 | 5846 | void RegexTest::TestBug20863() {
|
5819 | 5847 | // Test that patterns with a large number of named capture groups work correctly.
|
5820 | 5848 | //
|
|
0 commit comments