Skip to content

Commit ef5873e

Browse files
committed
[GR-63828] TRegex: reorganize flavor implementations.
PullRequest: graal/21401
2 parents 9fcc190 + 305f333 commit ef5873e

File tree

83 files changed

+940
-671
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+940
-671
lines changed

regex/mx.regex/suite.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
# SOFTWARE.
4040
#
4141
suite = {
42-
"mxversion": "7.55.2",
42+
"mxversion": "7.58.0",
4343

4444
"name" : "regex",
4545

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlagsTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444

4545
import org.junit.Test;
4646

47+
import com.oracle.truffle.regex.flavor.python.PythonFlags;
48+
4749
public class PythonFlagsTest {
4850

4951
private static PythonFlags parse(String flags) {

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JavaUtilPatternTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
import com.oracle.truffle.regex.RegexSyntaxException.ErrorCode;
6262
import com.oracle.truffle.regex.charset.Range;
6363
import com.oracle.truffle.regex.tregex.parser.CaseFoldData;
64-
import com.oracle.truffle.regex.tregex.parser.flavors.java.JavaFlags;
64+
import com.oracle.truffle.regex.flavor.java.JavaFlags;
6565
import com.oracle.truffle.regex.tregex.string.Encodings;
6666
import com.oracle.truffle.regex.util.EmptyArrays;
6767

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
*/
4141
package com.oracle.truffle.regex.tregex.test;
4242

43+
import java.util.Collections;
4344
import java.util.Map;
4445

4546
import org.junit.Test;
@@ -1665,6 +1666,7 @@ public void generatedTests() {
16651666
"axaxeageagageaxeaxeaxageaxagageaxeaxagageagaxaxeagaxeaxagagaxeagageaxeaxeagageaxeaxagaxaxaxageageagageagaxaxaxageaxageaxeageaxaxaxaxaxagaxagageaxeageageageaxeaxeaxageaxaxeaxeagaxagageaxeageaxeaxaxeaxageaxaxeagaxageageaz",
16661667
0, false);
16671668
test("(a{1100,1100})\\1", "i", "a".repeat(2400), 0, true, 0, 2200, 0, 1100);
1669+
test("[a]\\S{213,213}bcdz", "", "a".repeat(215) + ("bcxd" + "a".repeat(213)).repeat(3), 0, false);
16681670

16691671
/* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */
16701672
}
@@ -1686,4 +1688,16 @@ public void testForceLinearExecution() {
16861688
test(".*a{1,65534}.*", "", "_aabaaa_", 0, true, 0, 8);
16871689
expectUnsupported(".*a{1,65534}.*", "", OPT_FORCE_LINEAR_EXECUTION);
16881690
}
1691+
1692+
@Test
1693+
public void orcl38190286() {
1694+
test("[[:alpha:]]", "", "\ufffd", 0, true, 0, 3);
1695+
test("[[:alpha:]]", "", "\uD839", 0, false);
1696+
test("[[:alpha:]]", "", "\uDDF2", 0, false);
1697+
test("[[:alpha:]]", "", "\uD839\uDDF2", 0, false);
1698+
test("[[:alpha:]]", "", Collections.emptyMap(), Encodings.UTF_16, "\ufffd", 0, true, 0, 1);
1699+
test("[[:alpha:]]", "", Collections.emptyMap(), Encodings.UTF_16, "\uD839", 0, false);
1700+
test("[[:alpha:]]", "", Collections.emptyMap(), Encodings.UTF_16, "\uDDF2", 0, false);
1701+
test("[[:alpha:]]", "", Collections.emptyMap(), Encodings.UTF_16, "\uD839\uDDF2", 0, false);
1702+
}
16891703
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
import org.junit.Assert;
4949
import org.junit.Test;
5050

51-
import com.oracle.truffle.regex.errors.PyErrorMessages;
51+
import com.oracle.truffle.regex.flavor.python.PyErrorMessages;
5252
import com.oracle.truffle.regex.tregex.TRegexOptions;
5353
import com.oracle.truffle.regex.tregex.string.Encodings;
5454

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/RegexOptionsTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@
5656
import com.oracle.truffle.regex.RegexLanguage;
5757
import com.oracle.truffle.regex.RegexOptions;
5858
import com.oracle.truffle.regex.RegexSyntaxException;
59+
import com.oracle.truffle.regex.flavor.js.ECMAScriptFlavor;
60+
import com.oracle.truffle.regex.flavor.python.PythonFlavor;
61+
import com.oracle.truffle.regex.flavor.ruby.RubyFlavor;
5962
import com.oracle.truffle.regex.test.dummylang.TRegexTestDummyLanguage;
60-
import com.oracle.truffle.regex.tregex.parser.flavors.ECMAScriptFlavor;
61-
import com.oracle.truffle.regex.tregex.parser.flavors.MatchingMode;
62-
import com.oracle.truffle.regex.tregex.parser.flavors.PythonFlavor;
63-
import com.oracle.truffle.regex.tregex.parser.flavors.RubyFlavor;
63+
import com.oracle.truffle.regex.tregex.parser.MatchingMode;
6464
import com.oracle.truffle.regex.tregex.string.Encodings;
6565

6666
public class RegexOptionsTest extends RegexTestBase {

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/RegexTestBase.java

Lines changed: 42 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.junit.BeforeClass;
5757

5858
import com.oracle.truffle.api.CompilerDirectives;
59+
import com.oracle.truffle.api.strings.TranscodingErrorHandler;
5960
import com.oracle.truffle.api.strings.TruffleString;
6061
import com.oracle.truffle.api.strings.TruffleStringBuilder;
6162
import com.oracle.truffle.regex.RegexSyntaxException.ErrorCode;
@@ -131,49 +132,42 @@ Value execRegex(Value compiledRegex, String input, int fromIndex) {
131132
}
132133

133134
Value execRegex(Value compiledRegex, Encodings.Encoding encoding, String input, int fromIndex) {
134-
return execRegex(compiledRegex, encoding, TruffleString.fromJavaStringUncached(input, encoding.getTStringEncoding()), fromIndex);
135-
}
136-
137-
Value execRegex(Value compiledRegex, Encodings.Encoding encoding, TruffleString input, int fromIndex) {
138-
TruffleString converted = input.switchEncodingUncached(encoding.getTStringEncoding());
135+
TruffleString converted = toTruffleString(input, encoding);
139136
int length = converted.byteLength(encoding.getTStringEncoding()) >> encoding.getStride();
140-
return execRegex(compiledRegex, encoding, converted, fromIndex, length, 0, length);
141-
}
142-
143-
Value execRegex(Value compiledRegex, Encodings.Encoding encoding, TruffleString input, int fromIndex, int toIndex, int regionFrom, int regionTo) {
144-
return compiledRegex.invokeMember("exec", input.switchEncodingUncached(encoding.getTStringEncoding()), fromIndex, toIndex, regionFrom, regionTo);
137+
return compiledRegex.invokeMember("exec", converted, fromIndex, length, 0, length);
145138
}
146139

147140
Value execRegexBoolean(Value compiledRegex, Encodings.Encoding encoding, String input, int fromIndex) {
148-
return execRegexBoolean(compiledRegex, encoding, TruffleString.fromJavaStringUncached(input, encoding.getTStringEncoding()), fromIndex);
149-
}
150-
151-
Value execRegexBoolean(Value compiledRegex, Encodings.Encoding encoding, TruffleString input, int fromIndex) {
152-
TruffleString converted = input.switchEncodingUncached(encoding.getTStringEncoding());
141+
TruffleString converted = toTruffleString(input, encoding);
153142
int length = converted.byteLength(encoding.getTStringEncoding()) >> encoding.getStride();
154-
return execRegexBoolean(compiledRegex, encoding, converted, fromIndex, length, 0, length);
143+
return compiledRegex.invokeMember("execBoolean", converted, fromIndex, length, 0, length);
155144
}
156145

157-
Value execRegexBoolean(Value compiledRegex, Encodings.Encoding encoding, TruffleString input, int fromIndex, int toIndex, int regionFrom, int regionTo) {
158-
return compiledRegex.invokeMember("execBoolean", input.switchEncodingUncached(encoding.getTStringEncoding()), fromIndex, toIndex, regionFrom, regionTo);
146+
private static TruffleString toTruffleString(String input, Encodings.Encoding encoding) {
147+
TruffleString tStringUTF16 = TruffleString.fromJavaStringUncached(input, TruffleString.Encoding.UTF_16);
148+
return tStringUTF16.switchEncodingUncached(encoding.getTStringEncoding(), TranscodingErrorHandler.DEFAULT_KEEP_SURROGATES_IN_UTF8);
159149
}
160150

161151
void testBoolean(String pattern, String flags, Map<String, String> options, String input, int fromIndex, boolean isMatch) {
152+
testBoolean(pattern, flags, options, getTRegexEncoding(), input, fromIndex, isMatch);
153+
}
154+
155+
void testBoolean(String pattern, String flags, Map<String, String> options, Encodings.Encoding encoding, String input, int fromIndex, boolean isMatch) {
162156
String expectedResult = isMatch ? "Match" : "NoMatch";
163-
Source.Builder source = sourceBuilder(pattern, flags, options, getTRegexEncoding()).option("regexDummyLang.BooleanMatch", "true");
157+
Source.Builder source = sourceBuilder(pattern, flags, options, encoding).option("regexDummyLang.BooleanMatch", "true");
164158
try {
165159
Value compiledRegex = compileRegex(context, source);
166-
Value result = execRegexBoolean(compiledRegex, getTRegexEncoding(), input, fromIndex);
160+
Value result = execRegexBoolean(compiledRegex, encoding, input, fromIndex);
167161
if (result.asBoolean() != isMatch) {
168162
String actualResult = result.asBoolean() ? "Match" : "NoMatch";
169-
printTable(pattern, flags, input, fromIndex, expectedResult, actualResult);
163+
printTable(pattern, flags, encoding, input, fromIndex, expectedResult, actualResult);
170164
if (ASSERTS) {
171165
Assert.fail(options + regexSlashes(pattern, flags) + ' ' + quote(input) + " expected: " + expectedResult + ", actual: " + actualResult);
172166
}
173167
}
174168
} catch (PolyglotException e) {
175169
if (!ASSERTS && e.isSyntaxError()) {
176-
printTable(pattern, flags, input, fromIndex, expectedResult, syntaxErrorToString(e.getMessage()));
170+
printTable(pattern, flags, encoding, input, fromIndex, expectedResult, syntaxErrorToString(e.getMessage()));
177171
} else {
178172
throw e;
179173
}
@@ -198,24 +192,24 @@ void test(String pattern, String flags, Map<String, String> options, Encodings.E
198192
test(compiledRegex, pattern, flags, options, encoding, input, fromIndex, isMatch, captureGroupBoundsAndLastGroup);
199193
} catch (PolyglotException e) {
200194
if (!ASSERTS && e.isSyntaxError()) {
201-
printTable(pattern, flags, input, fromIndex, expectedResultToString(captureGroupBoundsAndLastGroup), syntaxErrorToString(e.getMessage()));
195+
printTable(pattern, flags, encoding, input, fromIndex, expectedResultToString(captureGroupBoundsAndLastGroup), syntaxErrorToString(e.getMessage()));
202196
} else {
203197
throw e;
204198
}
205199
}
206-
testBoolean(pattern, flags, options, input, fromIndex, isMatch);
200+
testBoolean(pattern, flags, options, encoding, input, fromIndex, isMatch);
207201
}
208202

209203
void test(Value compiledRegex, String pattern, String flags, Map<String, String> options, Encodings.Encoding encoding, String input, int fromIndex, boolean isMatch,
210204
int... captureGroupBoundsAndLastGroup) {
211205
Value result = execRegex(compiledRegex, encoding, input, fromIndex);
212206
int groupCount = compiledRegex.getMember("groupCount").asInt();
213-
validateResult(pattern, flags, options, input, fromIndex, result, groupCount, isMatch, captureGroupBoundsAndLastGroup);
207+
validateResult(pattern, flags, options, encoding, input, fromIndex, result, groupCount, isMatch, captureGroupBoundsAndLastGroup);
214208

215209
if (TEST_REGION_FROM_TO) {
216210
TruffleStringBuilder sb = TruffleStringBuilder.create(encoding.getTStringEncoding());
217211
sb.appendCodePointUncached('_');
218-
sb.appendStringUncached(TruffleString.fromJavaStringUncached(input, encoding.getTStringEncoding()));
212+
sb.appendStringUncached(toTruffleString(input, encoding));
219213
sb.appendCodePointUncached('_');
220214
TruffleString padded = sb.toStringUncached();
221215
int length = padded.byteLength(encoding.getTStringEncoding()) >> encoding.getStride();
@@ -227,39 +221,40 @@ void test(Value compiledRegex, String pattern, String flags, Map<String, String>
227221
if ((boundsAdjusted.length & 1) == 1) {
228222
boundsAdjusted[boundsAdjusted.length - 1] = captureGroupBoundsAndLastGroup[boundsAdjusted.length - 1];
229223
}
230-
Value resultSubstring = execRegex(compiledRegex, encoding, padded, fromIndex + 1, length - 1, 1, length - 1);
231-
validateResult(pattern, flags, options, input, fromIndex + 1, resultSubstring, groupCount, isMatch, boundsAdjusted);
224+
Value resultSubstring = compiledRegex.invokeMember("exec", padded, fromIndex + 1, length - 1, 1, length - 1);
225+
validateResult(pattern, flags, options, encoding, input, fromIndex + 1, resultSubstring, groupCount, isMatch, boundsAdjusted);
232226
}
233227
}
234228

235-
private static void validateResult(String pattern, String flags, Map<String, String> options, String input, int fromIndex, Value result, int groupCount, boolean isMatch,
229+
private static void validateResult(String pattern, String flags, Map<String, String> options, Encodings.Encoding encoding, String input, int fromIndex, Value result, int groupCount,
230+
boolean isMatch,
236231
int... captureGroupBoundsAndLastGroup) {
237232
if (isMatch != result.getMember("isMatch").asBoolean()) {
238-
fail(pattern, flags, options, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
233+
fail(pattern, flags, options, encoding, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
239234
return;
240235
}
241236
if (isMatch) {
242237
if (ASSERTS) {
243238
assertEquals(captureGroupBoundsAndLastGroup.length / 2, groupCount);
244239
}
245240
if (captureGroupBoundsAndLastGroup.length / 2 != groupCount) {
246-
fail(pattern, flags, options, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
241+
fail(pattern, flags, options, encoding, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
247242
return;
248243
}
249244
for (int i = 0; i < groupCount; i++) {
250245
if (captureGroupBoundsAndLastGroup[Group.groupNumberToBoundaryIndexStart(i)] != result.invokeMember("getStart", i).asInt() ||
251246
captureGroupBoundsAndLastGroup[Group.groupNumberToBoundaryIndexEnd(i)] != result.invokeMember("getEnd", i).asInt()) {
252-
fail(pattern, flags, options, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
247+
fail(pattern, flags, options, encoding, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
253248
return;
254249
}
255250
}
256251
} else if (result.getMember("isMatch").asBoolean()) {
257-
fail(pattern, flags, options, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
252+
fail(pattern, flags, options, encoding, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
258253
return;
259254
}
260255
int lastGroup = captureGroupBoundsAndLastGroup.length % 2 == 1 ? captureGroupBoundsAndLastGroup[captureGroupBoundsAndLastGroup.length - 1] : -1;
261256
if (lastGroup != result.getMember("lastGroup").asInt()) {
262-
fail(pattern, flags, options, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
257+
fail(pattern, flags, options, encoding, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
263258
return;
264259
}
265260
// print(pattern, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
@@ -305,7 +300,7 @@ void expectSyntaxError(String pattern, String flags, Map<String, String> options
305300
String msg = e.getMessage();
306301
int pos = e.getSourceLocation().getCharIndex();
307302
if (!msg.contains(expectedMessage)) {
308-
printTable(pattern, flags, input, fromIndex, syntaxErrorToString(expectedMessage), syntaxErrorToString(msg));
303+
printTable(pattern, flags, encoding, input, fromIndex, syntaxErrorToString(expectedMessage), syntaxErrorToString(msg));
309304
if (ASSERTS) {
310305
Assert.fail(String.format("/%s/%s : expected syntax error message containing \"%s\", but was \"%s\"", pattern, flags, expectedMessage, msg));
311306
}
@@ -317,7 +312,7 @@ void expectSyntaxError(String pattern, String flags, Map<String, String> options
317312
return;
318313
}
319314
Value result = execRegex(compiledRegex, encoding, input, fromIndex);
320-
printTable(pattern, flags, input, fromIndex, syntaxErrorToString(expectedMessage), actualResultToString(result, compiledRegex.getMember("groupCount").asInt(), false));
315+
printTable(pattern, flags, encoding, input, fromIndex, syntaxErrorToString(expectedMessage), actualResultToString(result, compiledRegex.getMember("groupCount").asInt(), false));
321316
if (ASSERTS) {
322317
Assert.fail(String.format("/%s/%s : expected \"%s\", but no exception was thrown", pattern, flags, expectedMessage));
323318
}
@@ -338,29 +333,30 @@ private static String generateErrorPosArrow(int pos) {
338333
return sb.append('^').toString();
339334
}
340335

341-
private static void fail(String pattern, String flags, Map<String, String> options, String input, int fromIndex, Value result, int groupCount, int... captureGroupBoundsAndLastGroup) {
336+
private static void fail(String pattern, String flags, Map<String, String> options, Encodings.Encoding encoding, String input, int fromIndex, Value result, int groupCount,
337+
int... captureGroupBoundsAndLastGroup) {
342338
String expectedResult = expectedResultToString(captureGroupBoundsAndLastGroup);
343339
String actualResult = actualResultToString(result, groupCount, captureGroupBoundsAndLastGroup.length % 2 == 1);
344-
printTable(pattern, flags, input, fromIndex, expectedResult, actualResult);
340+
printTable(pattern, flags, encoding, input, fromIndex, expectedResult, actualResult);
345341
if (ASSERTS) {
346342
Assert.fail(options + regexSlashes(pattern, flags) + ' ' + quote(input) + " expected: " + expectedResult + ", actual: " + actualResult);
347343
}
348344
}
349345

350-
private static void print(String pattern, String flags, String input, int fromIndex, Value result, int groupCount, int... captureGroupBoundsAndLastGroup) {
346+
private static void print(String pattern, String flags, Encodings.Encoding encoding, String input, int fromIndex, Value result, int groupCount, int... captureGroupBoundsAndLastGroup) {
351347
String actualResult = actualResultToString(result, groupCount, captureGroupBoundsAndLastGroup.length % 2 == 1);
352-
printTable(pattern, flags, input, fromIndex, actualResult, "");
348+
printTable(pattern, flags, encoding, input, fromIndex, actualResult, "");
353349
}
354350

355-
private static void printTable(String pattern, String flags, String input, int fromIndex, String expectedResult, String actualResult) {
351+
private static void printTable(String pattern, String flags, Encodings.Encoding encoding, String input, int fromIndex, String expectedResult, String actualResult) {
356352
if (TABLE_OMIT_FROM_INDEX) {
357-
String format = "%-20s%-20s%-30s%s%n";
358-
printTableHeader(format, "Pattern", "Input", "Expected result", "TRegex result");
359-
System.out.printf(format, regexSlashes(pattern, flags), quote(input), expectedResult, actualResult);
353+
String format = "%-20s%-12s%-20s%-30s%s%n";
354+
printTableHeader(format, "Pattern", "Encoding", "Input", "Expected result", "TRegex result");
355+
System.out.printf(format, regexSlashes(pattern, flags), encoding, quote(input), expectedResult, actualResult);
360356
} else {
361357
String format = "%-16s%-16s%-10s%-20s%s%n";
362-
printTableHeader(format, "Pattern", "Input", "Offset", "Expected result", "TRegex result");
363-
System.out.printf(format, regexSlashes(pattern, flags), quote(input), fromIndex, expectedResult, actualResult);
358+
printTableHeader(format, "Pattern", "Encoding", "Input", "Offset", "Expected result", "TRegex result");
359+
System.out.printf(format, regexSlashes(pattern, flags), encoding, quote(input), fromIndex, expectedResult, actualResult);
364360
}
365361
}
366362

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/RubyTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
import org.junit.Assert;
4646
import org.junit.Test;
4747

48-
import com.oracle.truffle.regex.errors.RbErrorMessages;
48+
import com.oracle.truffle.regex.flavor.ruby.RbErrorMessages;
4949
import com.oracle.truffle.regex.tregex.string.Encodings;
5050

5151
public class RubyTests extends RegexTestBase {

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/AbstractRegexObject.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@
4040
*/
4141
package com.oracle.truffle.regex;
4242

43+
import java.util.LinkedHashMap;
44+
import java.util.List;
45+
import java.util.Map;
46+
4347
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
4448
import com.oracle.truffle.api.TruffleLanguage;
4549
import com.oracle.truffle.api.interop.InteropLibrary;
@@ -51,10 +55,6 @@
5155
import com.oracle.truffle.regex.util.TruffleReadOnlyMap;
5256
import com.oracle.truffle.regex.util.TruffleSmallReadOnlyStringToIntMap;
5357

54-
import java.util.LinkedHashMap;
55-
import java.util.List;
56-
import java.util.Map;
57-
5858
@ExportLibrary(InteropLibrary.class)
5959
public abstract class AbstractRegexObject implements TruffleObject {
6060

0 commit comments

Comments
 (0)