Skip to content

Commit 2017b26

Browse files
committed
TRegex: update CaseFoldData for JavaFlavor SDK version 25
1 parent 4695863 commit 2017b26

File tree

4 files changed

+26
-13
lines changed

4 files changed

+26
-13
lines changed

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/flavor/java/JavaFlavor.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
import com.oracle.truffle.api.CompilerDirectives;
4444
import com.oracle.truffle.regex.RegexLanguage;
45+
import com.oracle.truffle.regex.RegexOptions;
4546
import com.oracle.truffle.regex.RegexSource;
4647
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
4748
import com.oracle.truffle.regex.tregex.parser.CaseFoldData;
@@ -85,12 +86,16 @@ public RegexValidator createValidator(RegexLanguage language, RegexSource source
8586

8687
@Override
8788
public EqualsIgnoreCasePredicate getEqualsIgnoreCasePredicate(RegexAST ast) {
88-
return (a, b, altMode) -> getCaseFoldingAlgorithm(altMode).getEqualsPredicate().test(a, b);
89+
return (a, b, altMode) -> getCaseFoldingAlgorithm(ast.getOptions(), altMode).getEqualsPredicate().test(a, b);
8990
}
9091

91-
public static CaseFoldData.CaseFoldUnfoldAlgorithm getCaseFoldingAlgorithm(boolean isUnicodeCase) {
92+
public static CaseFoldData.CaseFoldUnfoldAlgorithm getCaseFoldingAlgorithm(RegexOptions options, boolean isUnicodeCase) {
9293
if (isUnicodeCase) {
93-
return CaseFoldData.CaseFoldUnfoldAlgorithm.JavaUnicode;
94+
if (options.getJavaJDKVersion() <= 24) {
95+
return CaseFoldData.CaseFoldUnfoldAlgorithm.JavaUnicode15;
96+
} else {
97+
return CaseFoldData.CaseFoldUnfoldAlgorithm.JavaUnicode16;
98+
}
9499
} else {
95100
return CaseFoldData.CaseFoldUnfoldAlgorithm.Ascii;
96101
}

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/flavor/java/JavaRegexLexer.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,7 @@ protected boolean featureEnabledClassSetExpressions() {
288288

289289
@Override
290290
protected void caseFoldUnfold(CodePointSetAccumulator charClass) {
291-
CaseFoldData.CaseFoldUnfoldAlgorithm caseFolding = (getLocalFlags().isUnicodeCase() || getLocalFlags().isUnicodeCharacterClass())
292-
? CaseFoldData.CaseFoldUnfoldAlgorithm.JavaUnicode
293-
: CaseFoldData.CaseFoldUnfoldAlgorithm.Ascii;
291+
CaseFoldData.CaseFoldUnfoldAlgorithm caseFolding = JavaFlavor.getCaseFoldingAlgorithm(source.getOptions(), getLocalFlags().isUnicodeCase() || getLocalFlags().isUnicodeCharacterClass());
294292
CaseFoldData.applyCaseFoldUnfold(charClass, compilationBuffer.getCodePointSetAccumulator1(), caseFolding);
295293
}
296294

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/flavor/java/JavaRegexParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ public RegexAST parse() {
207207
break;
208208
case classSet:
209209
astBuilder.addClassSet((Token.ClassSet) token,
210-
getFlags().isCaseInsensitive() ? JavaFlavor.getCaseFoldingAlgorithm(getFlags().isUnicodeCase() || getFlags().isUnicodeCharacterClass()) : null);
210+
getFlags().isCaseInsensitive() ? JavaFlavor.getCaseFoldingAlgorithm(lexer.source.getOptions(), getFlags().isUnicodeCase() || getFlags().isUnicodeCharacterClass())
211+
: null);
211212
break;
212213
case literalString:
213214
literalString((Token.LiteralString) token);

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/CaseFoldData.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ public enum CaseFoldUnfoldAlgorithm {
6565
Ascii,
6666
ECMAScriptNonUnicode,
6767
ECMAScriptUnicode,
68-
JavaUnicode,
68+
JavaUnicode15,
69+
JavaUnicode16,
6970
OracleDBSimple,
7071
PythonUnicode;
7172

@@ -93,12 +94,13 @@ private static CaseFoldEquivalenceTable getTable(CaseFoldUnfoldAlgorithm algorit
9394
return UNICODE_16_0_0_EQ_SIMPLE;
9495
case Ascii:
9596
return ASCII;
96-
case JavaUnicode:
97-
// Currently supported JDK versions for the Java flavor are 21, 22 and 23, where 21
98-
// uses Unicode version 15.0.0 and the other versions use Unicode 15.1.0. There are
99-
// no differences in the case folding table between those two Unicode versions, so
100-
// we can use the same table on all supported JDK versions for now.
97+
case JavaUnicode15:
98+
// JDK version 21 uses Unicode version 15.0.0, and JDK versions 22-24 use Unicode
99+
// 15.1.0. There are no differences in the case folding table between those two
100+
// Unicode versions, so we can use the same table for both.
101101
return UNICODE_15_0_0_JAVA;
102+
case JavaUnicode16:
103+
return UNICODE_16_0_0_JAVA;
102104
case PythonUnicode:
103105
return UNICODE_15_1_0_PY;
104106
default:
@@ -1162,6 +1164,13 @@ public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) {
11621164
private static final CaseFoldEquivalenceTable UNICODE_15_0_0_JAVA = new CaseFoldEquivalenceTable(UNICODE_15_1_0_PY, new CodePointSet[]{
11631165
}, new int[]{
11641166
});
1167+
private static final CaseFoldEquivalenceTable UNICODE_16_0_0_JAVA = new CaseFoldEquivalenceTable(UNICODE_16_0_0_EQ_SIMPLE, new CodePointSet[]{
1168+
rangeSet(0x000049, 0x000049, 0x000069, 0x000069, 0x000130, 0x000131),
1169+
}, new int[]{
1170+
0x000049, 0x000049, DIRECT_MAPPING, 0,
1171+
0x000069, 0x000069, DIRECT_MAPPING, 0,
1172+
0x000130, 0x000131, DIRECT_MAPPING, 0,
1173+
});
11651174
private static final CaseFoldTable UNICODE_15_1_0_CF_FULL = new CaseFoldTable(null, new int[]{
11661175
0x000041, 0x00005a, INTEGER_OFFSET, 32,
11671176
0x0000b5, 0x0000b5, INTEGER_OFFSET, 775,

0 commit comments

Comments
 (0)