Skip to content

Commit b97ed66

Browse files
author
Xueming Shen
committed
8365675: Add String Unicode Case-Folding Support
Reviewed-by: rriggs, naoto, ihse
1 parent 618732f commit b97ed66

File tree

13 files changed

+1245
-212
lines changed

13 files changed

+1245
-212
lines changed

make/ToolsJdk.gmk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ TOOL_GENERATEEXTRAPROPERTIES = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_too
7979
build.tools.generateextraproperties.GenerateExtraProperties
8080

8181
TOOL_GENERATECASEFOLDING = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
82-
build.tools.generatecharacter.CaseFolding
82+
build.tools.generatecharacter.GenerateCaseFolding
8383

8484
TOOL_MAKEZIPREPRODUCIBLE = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
8585
build.tools.makezipreproducible.MakeZipReproducible

make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java

Lines changed: 0 additions & 73 deletions
This file was deleted.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation. Oracle designates this
8+
* particular file as subject to the "Classpath" exception as provided
9+
* by Oracle in the LICENSE file that accompanied this code.
10+
*
11+
* This code is distributed in the hope that it will be useful, but WITHOUT
12+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14+
* version 2 for more details (a copy is included in the LICENSE file that
15+
* accompanied this code).
16+
*
17+
* You should have received a copy of the GNU General Public License version
18+
* 2 along with this work; if not, write to the Free Software Foundation,
19+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20+
*
21+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22+
* or visit www.oracle.com if you need additional information or have any
23+
* questions.
24+
*/
25+
package build.tools.generatecharacter;
26+
27+
import java.nio.file.Files;
28+
import java.nio.file.Paths;
29+
import java.nio.file.StandardOpenOption;
30+
import java.util.Arrays;
31+
import java.util.stream.Collectors;
32+
import java.util.stream.IntStream;
33+
34+
public class GenerateCaseFolding {
35+
36+
public static void main(String[] args) throws Throwable {
37+
if (args.length != 3) {
38+
System.err.println("Usage: java GenerateCaseFolding TemplateFile CaseFolding.txt CaseFolding.java");
39+
System.exit(1);
40+
}
41+
var templateFile = Paths.get(args[0]);
42+
var caseFoldingTxt = Paths.get(args[1]);
43+
var genSrcFile = Paths.get(args[2]);
44+
45+
// java.lang
46+
var supportedTypes = "^.*; [CF]; .*$"; // full/1:M case folding
47+
String[][] caseFoldings = Files.lines(caseFoldingTxt)
48+
.filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
49+
.map(line -> {
50+
var fields = line.split("; ");
51+
var cp = fields[0];
52+
fields = fields[2].trim().split(" ");
53+
var folding = new String[fields.length + 1];
54+
folding[0] = cp;
55+
System.arraycopy(fields, 0, folding, 1, fields.length);
56+
return folding;
57+
})
58+
.toArray(size -> new String[size][]);
59+
60+
// util.regex
61+
var expandedSupportedTypes = "^.*; [CTS]; .*$";
62+
var expanded_caseFoldingEntries = Files.lines(caseFoldingTxt)
63+
.filter(line -> !line.startsWith("#") && line.matches(expandedSupportedTypes))
64+
.map(line -> {
65+
String[] cols = line.split("; ");
66+
return new String[]{cols[0], cols[1], cols[2]};
67+
})
68+
.filter(cols -> {
69+
// the folding case doesn't map back to the original char.
70+
var cp1 = Integer.parseInt(cols[0], 16);
71+
var cp2 = Integer.parseInt(cols[2], 16);
72+
return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
73+
})
74+
.map(cols -> String.format(" entry(0x%s, 0x%s)", cols[0], cols[2]))
75+
.collect(Collectors.joining(",\n", "", ""));
76+
77+
// hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
78+
// 0049; T; 0131; # LATIN CAPITAL LETTER I
79+
final String T_0x0131_0x49 = String.format(" entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
80+
81+
Files.write(
82+
genSrcFile,
83+
Files.lines(templateFile)
84+
.map(line -> line.contains("%%%Entries") ? genFoldingEntries(caseFoldings) : line)
85+
.map(line -> line.contains("%%%Expanded_Case_Map_Entries") ? T_0x0131_0x49 + expanded_caseFoldingEntries : line)
86+
.collect(Collectors.toList()),
87+
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
88+
}
89+
90+
private static long foldingToLong(String[] folding) {
91+
int cp = Integer.parseInt(folding[0], 16);
92+
long value = (long)Integer.parseInt(folding[1], 16);
93+
if (!Character.isSupplementaryCodePoint(cp) && folding.length != 2) {
94+
var shift = 16;
95+
for (int j = 2; j < folding.length; j++) {
96+
value |= (long)Integer.parseInt(folding[j], 16) << shift;
97+
shift <<= 1;
98+
}
99+
value = value | (long) (folding.length - 1) << 48;
100+
}
101+
return value;
102+
}
103+
104+
private static String genFoldingEntries(String[][] foldings) {
105+
StringBuilder sb = new StringBuilder();
106+
sb.append(" private static final int[] CASE_FOLDING_CPS = {\n");
107+
int width = 10;
108+
for (int i = 0; i < foldings.length; i++) {
109+
if (i % width == 0)
110+
sb.append(" ");
111+
sb.append(String.format("0X%s", foldings[i][0]));
112+
if (i < foldings.length - 1)
113+
sb.append(", ");
114+
if (i % width == width - 1 || i == foldings.length - 1)
115+
sb.append("\n");
116+
}
117+
sb.append(" };\n\n");
118+
119+
sb.append(" private static final long[] CASE_FOLDING_VALUES = {\n");
120+
width = 6;
121+
for (int i = 0; i < foldings.length; i++) {
122+
if (i % width == 0)
123+
sb.append(" "); // indent
124+
sb.append(String.format("0x%013xL", foldingToLong(foldings[i])));
125+
if (i < foldings.length - 1)
126+
sb.append(", ");
127+
if (i % width == width - 1 || i == foldings.length - 1) {
128+
sb.append("\n");
129+
}
130+
}
131+
sb.append(" };\n");
132+
return sb.toString();
133+
}
134+
}

make/modules/java.base/gensrc/GensrcCharacterData.gmk

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,22 @@ TARGETS += $(GENSRC_CHARACTERDATA)
7272

7373
################################################################################
7474

75+
76+
GENSRC_STRINGCASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/lang/CaseFolding.java
77+
78+
STRINGCASEFOLDING_TEMPLATE := $(MODULE_SRC)/share/classes/jdk/internal/lang/CaseFolding.java.template
79+
CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
80+
81+
$(GENSRC_STRINGCASEFOLDING): $(BUILD_TOOLS_JDK) $(STRINGCASEFOLDING_TEMPLATE) $(CASEFOLDINGTXT)
82+
$(call LogInfo, Generating $@)
83+
$(call MakeTargetDir)
84+
$(TOOL_GENERATECASEFOLDING) \
85+
$(STRINGCASEFOLDING_TEMPLATE) \
86+
$(CASEFOLDINGTXT) \
87+
$(GENSRC_STRINGCASEFOLDING)
88+
89+
TARGETS += $(GENSRC_STRINGCASEFOLDING)
90+
91+
7592
endif # include guard
7693
include MakeIncludeEnd.gmk

make/modules/java.base/gensrc/GensrcRegex.gmk

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,5 @@ TARGETS += $(GENSRC_INDICCONJUNCTBREAK)
5050

5151
################################################################################
5252

53-
GENSRC_CASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/CaseFolding.java
54-
55-
CASEFOLDINGTEMP := $(MODULE_SRC)/share/classes/jdk/internal/util/regex/CaseFolding.java.template
56-
CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
57-
58-
$(GENSRC_CASEFOLDING): $(BUILD_TOOLS_JDK) $(CASEFOLDINGTEMP) $(CASEFOLDINGTXT)
59-
$(call LogInfo, Generating $@)
60-
$(call MakeTargetDir)
61-
$(TOOL_GENERATECASEFOLDING) \
62-
$(CASEFOLDINGTEMP) \
63-
$(CASEFOLDINGTXT) \
64-
$(GENSRC_CASEFOLDING)
65-
66-
TARGETS += $(GENSRC_CASEFOLDING)
67-
68-
################################################################################
69-
7053
endif # include guard
7154
include MakeIncludeEnd.gmk

0 commit comments

Comments
 (0)