|
| 1 | +/* |
| 2 | + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. |
| 3 | + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | + * |
| 5 | + * This code is free software; you can redistribute it and/or modify it |
| 6 | + * under the terms of the GNU General Public License version 2 only, as |
| 7 | + * published by the Free Software Foundation. Oracle designates this |
| 8 | + * particular file as subject to the "Classpath" exception as provided |
| 9 | + * by Oracle in the LICENSE file that accompanied this code. |
| 10 | + * |
| 11 | + * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | + * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | + * accompanied this code). |
| 16 | + * |
| 17 | + * You should have received a copy of the GNU General Public License version |
| 18 | + * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | + * |
| 21 | + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 22 | + * or visit www.oracle.com if you need additional information or have any |
| 23 | + * questions. |
| 24 | + */ |
| 25 | +package build.tools.generatecharacter; |
| 26 | + |
| 27 | +import java.nio.file.Files; |
| 28 | +import java.nio.file.Paths; |
| 29 | +import java.nio.file.StandardOpenOption; |
| 30 | +import java.util.Arrays; |
| 31 | +import java.util.stream.Collectors; |
| 32 | +import java.util.stream.IntStream; |
| 33 | + |
| 34 | +public class GenerateCaseFolding { |
| 35 | + |
| 36 | + public static void main(String[] args) throws Throwable { |
| 37 | + if (args.length != 3) { |
| 38 | + System.err.println("Usage: java GenerateCaseFolding TemplateFile CaseFolding.txt CaseFolding.java"); |
| 39 | + System.exit(1); |
| 40 | + } |
| 41 | + var templateFile = Paths.get(args[0]); |
| 42 | + var caseFoldingTxt = Paths.get(args[1]); |
| 43 | + var genSrcFile = Paths.get(args[2]); |
| 44 | + |
| 45 | + // java.lang |
| 46 | + var supportedTypes = "^.*; [CF]; .*$"; // full/1:M case folding |
| 47 | + String[][] caseFoldings = Files.lines(caseFoldingTxt) |
| 48 | + .filter(line -> !line.startsWith("#") && line.matches(supportedTypes)) |
| 49 | + .map(line -> { |
| 50 | + var fields = line.split("; "); |
| 51 | + var cp = fields[0]; |
| 52 | + fields = fields[2].trim().split(" "); |
| 53 | + var folding = new String[fields.length + 1]; |
| 54 | + folding[0] = cp; |
| 55 | + System.arraycopy(fields, 0, folding, 1, fields.length); |
| 56 | + return folding; |
| 57 | + }) |
| 58 | + .toArray(size -> new String[size][]); |
| 59 | + |
| 60 | + // util.regex |
| 61 | + var expandedSupportedTypes = "^.*; [CTS]; .*$"; |
| 62 | + var expanded_caseFoldingEntries = Files.lines(caseFoldingTxt) |
| 63 | + .filter(line -> !line.startsWith("#") && line.matches(expandedSupportedTypes)) |
| 64 | + .map(line -> { |
| 65 | + String[] cols = line.split("; "); |
| 66 | + return new String[]{cols[0], cols[1], cols[2]}; |
| 67 | + }) |
| 68 | + .filter(cols -> { |
| 69 | + // the folding case doesn't map back to the original char. |
| 70 | + var cp1 = Integer.parseInt(cols[0], 16); |
| 71 | + var cp2 = Integer.parseInt(cols[2], 16); |
| 72 | + return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1; |
| 73 | + }) |
| 74 | + .map(cols -> String.format(" entry(0x%s, 0x%s)", cols[0], cols[2])) |
| 75 | + .collect(Collectors.joining(",\n", "", "")); |
| 76 | + |
| 77 | + // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's. |
| 78 | + // 0049; T; 0131; # LATIN CAPITAL LETTER I |
| 79 | + final String T_0x0131_0x49 = String.format(" entry(0x%04x, 0x%04x),\n", 0x0131, 0x49); |
| 80 | + |
| 81 | + Files.write( |
| 82 | + genSrcFile, |
| 83 | + Files.lines(templateFile) |
| 84 | + .map(line -> line.contains("%%%Entries") ? genFoldingEntries(caseFoldings) : line) |
| 85 | + .map(line -> line.contains("%%%Expanded_Case_Map_Entries") ? T_0x0131_0x49 + expanded_caseFoldingEntries : line) |
| 86 | + .collect(Collectors.toList()), |
| 87 | + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); |
| 88 | + } |
| 89 | + |
| 90 | + private static long foldingToLong(String[] folding) { |
| 91 | + int cp = Integer.parseInt(folding[0], 16); |
| 92 | + long value = (long)Integer.parseInt(folding[1], 16); |
| 93 | + if (!Character.isSupplementaryCodePoint(cp) && folding.length != 2) { |
| 94 | + var shift = 16; |
| 95 | + for (int j = 2; j < folding.length; j++) { |
| 96 | + value |= (long)Integer.parseInt(folding[j], 16) << shift; |
| 97 | + shift <<= 1; |
| 98 | + } |
| 99 | + value = value | (long) (folding.length - 1) << 48; |
| 100 | + } |
| 101 | + return value; |
| 102 | + } |
| 103 | + |
| 104 | + private static String genFoldingEntries(String[][] foldings) { |
| 105 | + StringBuilder sb = new StringBuilder(); |
| 106 | + sb.append(" private static final int[] CASE_FOLDING_CPS = {\n"); |
| 107 | + int width = 10; |
| 108 | + for (int i = 0; i < foldings.length; i++) { |
| 109 | + if (i % width == 0) |
| 110 | + sb.append(" "); |
| 111 | + sb.append(String.format("0X%s", foldings[i][0])); |
| 112 | + if (i < foldings.length - 1) |
| 113 | + sb.append(", "); |
| 114 | + if (i % width == width - 1 || i == foldings.length - 1) |
| 115 | + sb.append("\n"); |
| 116 | + } |
| 117 | + sb.append(" };\n\n"); |
| 118 | + |
| 119 | + sb.append(" private static final long[] CASE_FOLDING_VALUES = {\n"); |
| 120 | + width = 6; |
| 121 | + for (int i = 0; i < foldings.length; i++) { |
| 122 | + if (i % width == 0) |
| 123 | + sb.append(" "); // indent |
| 124 | + sb.append(String.format("0x%013xL", foldingToLong(foldings[i]))); |
| 125 | + if (i < foldings.length - 1) |
| 126 | + sb.append(", "); |
| 127 | + if (i % width == width - 1 || i == foldings.length - 1) { |
| 128 | + sb.append("\n"); |
| 129 | + } |
| 130 | + } |
| 131 | + sb.append(" };\n"); |
| 132 | + return sb.toString(); |
| 133 | + } |
| 134 | +} |
0 commit comments