Skip to content

Commit 95149ea

Browse files
authored
CLDR-18956 cldr-json: RBNF new format (#5060)
1 parent e9dea38 commit 95149ea

File tree

2 files changed

+172
-44
lines changed

2 files changed

+172
-44
lines changed

docs/site/downloads/cldr-48.md

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ adapting software to the conventions of different languages.
1919

2020
CLDR 48 was an open submission cycle allowing contributors to supply data for their languages via the CLDR Survey Tool —
2121
data that is widely used to support much of the world’s software.
22-
This data is also a factor in determining which languages are supported on mobile phones and computer operating systems.
22+
This data is also a factor in determining which languages are supported on mobile phones and computer operating systems.
2323

2424
### Changes
2525

@@ -225,7 +225,38 @@ For a full listing, see [Transforms Delta].
225225

226226
### JSON Data Changes
227227

228-
- TBD
228+
- RBNF
229+
- Just as with the RBNF data format change in XML [CLDR-8909], the JSON data also has a change in structure. [CLDR-18956].
230+
- Below is an example of the changed data format.
231+
- The new data item is the `_rbnfRulesFile` key. Its value is the name of a data file in the same directory, containing the raw rules. (Note: Do not interpret the .txt file’s name in any way.)
232+
- The previous data format is included for this release, but will be removed in a future release. In this case, the `%digits-ordinal` (and any other such keys) will be removed.
233+
234+
235+
```js
236+
{
237+
"rbnf": {
238+
"OrdinalRules": {
239+
"%digits-ordinal": [
240+
[
241+
"-x",
242+
"−→→;"
243+
],
244+
[
245+
"0",
246+
"=#,##0=;"
247+
]
248+
],
249+
"_rbnfRulesFile": "ar-OrdinalRules.txt"
250+
},
251+
}
252+
}
253+
```
254+
255+
The `ar-OrdinalRules.txt` file contains all rules for this locale:
256+
257+
%digits-ordinal:
258+
-x: −>>;
259+
0: =#,##0=;
229260

230261
### File Changes
231262
The following files are new in the release:
@@ -258,7 +289,7 @@ The following files are new in the release:
258289
### V49 advance warnings
259290
The following changes are planned for CLDR 49. Please plan accordingly to avoid disruption.
260291
- The default week numbering changes to ISO instead being based on the calendar week starting in CLDR 48 [CLDR-18275]. The calendar week will be more clearly targeted at matching usage in displayed month calendars.
261-
- The pre-Meiji Japanese eras will be removed: There was too much uncertainty in the exact values
292+
- The pre-Meiji Japanese eras will be removed: There was too much uncertainty in the exact values
262293
and feedback that the general practice for exact dates is to use Gregorian for pre-Meiji dates.
263294
- The major components in [supplementalData.xml](https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalData.xml) and [supplementalMetadata.xml](https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalMetadata.xml) files are slated to be organized more logically and moved into separate files.
264295
- This will make it easier for implementations to filter out data that they don't need, and make internal maintenance easier. This will not affect the data: just which file it is located in. Please plan to update XML and JSON parsers accordingly.
@@ -281,6 +312,8 @@ in particular, see [Exhibit 1](https://unicode.org/copyright.html#Exhibit1).
281312
For web pages with different views of CLDR data, see [http://cldr.unicode.org/index/charts](/index/charts).
282313

283314
[CLDR-5708]: https://unicode-org.atlassian.net/browse/CLDR-5708
315+
[CLDR-8909]: https://unicode-org.atlassian.net/browse/CLDR-8909
316+
[CLDR-11400]: https://unicode-org.atlassian.net/browse/CLDR-11400
284317
[CLDR-14479]: https://unicode-org.atlassian.net/browse/CLDR-14479
285318
[CLDR-16004]: https://unicode-org.atlassian.net/browse/CLDR-16004
286319
[CLDR-16715]: https://unicode-org.atlassian.net/browse/CLDR-16715
@@ -290,7 +323,8 @@ For web pages with different views of CLDR data, see [http://cldr.unicode.org/in
290323
[CLDR-18219]: https://unicode-org.atlassian.net/browse/CLDR-18219
291324
[CLDR-18275]: https://unicode-org.atlassian.net/browse/CLDR-18275
292325
[CLDR-18311]: https://unicode-org.atlassian.net/browse/CLDR-18311
293-
[CLDR-11400]: https://unicode-org.atlassian.net/browse/CLDR-11400
326+
[CLDR-18956]: https://unicode-org.atlassian.net/browse/CLDR-18956
327+
294328
[Delta DTDs]: https://unicode.org/cldr/charts/48/supplemental/dtd_deltas.html
295329
[BCP47 Delta]: https://unicode.org/cldr/charts/48/delta/bcp47.html
296330
[Supplemental Delta]: https://unicode.org/cldr/charts/48/delta/supplemental-data.html

tools/cldr-code/src/main/java/org/unicode/cldr/json/Ldml2JsonConverter.java

Lines changed: 134 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.unicode.cldr.util.PatternCache;
6767
import org.unicode.cldr.util.StandardCodes;
6868
import org.unicode.cldr.util.SupplementalDataInfo;
69+
import org.unicode.cldr.util.SupplementalDataInfo.RBNFGroup;
6970
import org.unicode.cldr.util.Timer;
7071
import org.unicode.cldr.util.XMLSource;
7172
import org.unicode.cldr.util.XPathParts;
@@ -90,7 +91,7 @@ public class Ldml2JsonConverter {
9091
private static final String CLDR_PKG_PREFIX = "cldr-";
9192
private static final String FULL_TIER_SUFFIX = "-full";
9293
private static final String MODERN_TIER_SUFFIX = "-modern";
93-
private static final String TRANSFORM_RAW_SUFFIX = ".txt";
94+
private static final String EXTERNAL_RAW_SUFFIX = ".txt";
9495
private static Logger logger = Logger.getLogger(Ldml2JsonConverter.class.getName());
9596

9697
enum RunType {
@@ -870,13 +871,45 @@ private int convertCldrItems(
870871
}
871872
if (type == RunType.rbnf) {
872873
if (item.getFullPath().contains("/rbnfRules")) {
873-
System.err.println("TODO CLDR-18956: skipping NEW rules.");
874-
continue;
875-
}
876-
try {
877-
item.adjustRbnfPath();
878-
} catch (Throwable t) {
879-
throw new RuntimeException(location + ": " + item.getPath(), t);
874+
XPathParts fullPath =
875+
XPathParts.getFrozenInstance(item.getFullPath())
876+
.cloneAsThawed();
877+
XPathParts dpath =
878+
XPathParts.getFrozenInstance(item.getPath())
879+
.cloneAsThawed();
880+
final String type = fullPath.getAttributeValue(-2, "type");
881+
if (type == null)
882+
throw new RuntimeException(
883+
location
884+
+ " Could not get rulesetGrouping type for "
885+
+ fullPath.toString());
886+
// here, write the raw data
887+
final String rawTransformFile =
888+
filename + "-" + type + EXTERNAL_RAW_SUFFIX;
889+
try (PrintWriter outf =
890+
FileUtilities.openUTF8Writer(outputDir, rawTransformFile)) {
891+
outf.println(item.getValue().trim());
892+
// note: not logging the write here- it will be logged when the
893+
// .json file is written.
894+
}
895+
dpath.setElement(-1, "ruleset");
896+
dpath.addElement("rulesFile");
897+
item.setPath(dpath.toString());
898+
// we have to do this because we want the rules file entry to sort
899+
// with the others.
900+
fullPath.setElement(-1, "ruleset");
901+
fullPath.setAttribute("ruleset", "_q", "-1");
902+
fullPath.setAttribute("ruleset", "type", "rulesFile");
903+
fullPath.addElement("rulesFile");
904+
item.setFullPath(fullPath.toString());
905+
// the value is now the raw filename
906+
item.setValue(rawTransformFile);
907+
} else {
908+
try {
909+
item.adjustRbnfPath();
910+
} catch (Throwable t) {
911+
throw new RuntimeException(location + ": " + item.getPath(), t);
912+
}
880913
}
881914
}
882915

@@ -905,7 +938,7 @@ private int convertCldrItems(
905938
if (item.getUntransformedPath()
906939
.startsWith("//supplementalData/transforms")) {
907940
// here, write the raw data
908-
final String rawTransformFile = filename + TRANSFORM_RAW_SUFFIX;
941+
final String rawTransformFile = filename + EXTERNAL_RAW_SUFFIX;
909942
try (PrintWriter outf =
910943
FileUtilities.openUTF8Writer(outputDir, rawTransformFile)) {
911944
outf.println(item.getValue().trim());
@@ -975,37 +1008,16 @@ private int convertCldrItems(
9751008
}
9761009

9771010
resolveSortingItems(out, nodesForLastItem, sortingItems);
1011+
9781012
resolveArrayItems(out, nodesForLastItem, arrayItems);
1013+
1014+
out = postProcessAfterResolveArrayItems(filename, out);
1015+
1016+
// special processing for unit preferences
9791017
if (js.section.contains("unitPreferenceData")) {
9801018
outputUnitPreferenceData(js, theItems, out, nodesForLastItem);
9811019
}
9821020

983-
// Special processing for transforms.
984-
if (type == RunType.transforms) {
985-
final JsonObject jo = out.getAsJsonObject("transforms");
986-
if (jo == null || jo.isEmpty()) {
987-
throw new RuntimeException(
988-
"Could not get transforms object in " + filename);
989-
}
990-
@SuppressWarnings("unchecked")
991-
final Entry<String, JsonElement>[] s = jo.entrySet().toArray(new Entry[0]);
992-
if (s == null || s.length != 1) {
993-
throw new RuntimeException(
994-
"Could not get 1 subelement of transforms in " + filename);
995-
}
996-
// key doesn't matter.
997-
// move subitem up
998-
out = s[0].getValue().getAsJsonObject();
999-
final Entry<String, JsonElement>[] s2 =
1000-
out.entrySet().toArray(new Entry[0]);
1001-
if (s2 == null || s2.length != 1) {
1002-
throw new RuntimeException(
1003-
"Could not get 1 sub-subelement of transforms in " + filename);
1004-
}
1005-
// move sub-subitem up.
1006-
out = s2[0].getValue().getAsJsonObject();
1007-
}
1008-
10091021
// write JSON
10101022
try (PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename)) {
10111023
outf.println(gson.toJson(out));
@@ -1051,6 +1063,67 @@ private int convertCldrItems(
10511063
return totalItemsInFile;
10521064
}
10531065

1066+
private JsonObject postProcessAfterResolveArrayItems(String filename, JsonObject out) {
1067+
// Special processing for transforms.
1068+
if (type == RunType.transforms) {
1069+
final JsonObject jo = out.getAsJsonObject("transforms");
1070+
if (jo == null || jo.isEmpty()) {
1071+
throw new RuntimeException("Could not get transforms object in " + filename);
1072+
}
1073+
@SuppressWarnings("unchecked")
1074+
final Entry<String, JsonElement>[] s = jo.entrySet().toArray(new Entry[0]);
1075+
if (s == null || s.length != 1) {
1076+
throw new RuntimeException(
1077+
"Could not get 1 subelement of transforms in " + filename);
1078+
}
1079+
// key doesn't matter.
1080+
// move subitem up
1081+
out = s[0].getValue().getAsJsonObject();
1082+
final Entry<String, JsonElement>[] s2 = out.entrySet().toArray(new Entry[0]);
1083+
if (s2 == null || s2.length != 1) {
1084+
throw new RuntimeException(
1085+
"Could not get 1 sub-subelement of transforms in " + filename);
1086+
}
1087+
// move sub-subitem up.
1088+
out = s2[0].getValue().getAsJsonObject();
1089+
} else if (type == RunType.rbnf) {
1090+
// hoist all "ruleset" children
1091+
/*
1092+
From:
1093+
"OrdinalRules": {
1094+
"ruleset": [
1095+
"ar-OrdinalRules.txt"
1096+
],
1097+
"%digits-ordinal": [
1098+
[
1099+
1100+
To:
1101+
"OrdinalRules": {
1102+
"_rbnfRulesFile": "ar-OrdinalRules.txt",
1103+
"%digits-ordinal": [
1104+
[
1105+
1106+
*/
1107+
1108+
JsonObject o = out.getAsJsonObject("rbnf");
1109+
if (o == null) return out;
1110+
o = o.getAsJsonObject("rbnf");
1111+
if (o == null) return out;
1112+
for (final Entry<String, JsonElement> e : o.entrySet()) {
1113+
final String type = e.getKey();
1114+
if (RBNFGroup.valueOf(type) != null) {
1115+
JsonObject ruleType = e.getValue().getAsJsonObject();
1116+
if (ruleType == null) continue;
1117+
JsonArray ruleset = ruleType.getAsJsonArray("ruleset");
1118+
if (ruleset == null) continue;
1119+
ruleType.addProperty("_rbnfRulesFile", ruleset.get(0).getAsString());
1120+
ruleType.remove("ruleset");
1121+
}
1122+
}
1123+
}
1124+
return out;
1125+
}
1126+
10541127
/**
10551128
* Provide an opportunity to fix up the JsonObject before write, after items were added.
10561129
*
@@ -1942,6 +2015,10 @@ private JsonArray outputStartArray(
19422015
// Get the name of the parent of the array
19432016
String objName = nodesInPath.get(arrayLevel - 1).getNodeKeyName();
19442017
JsonArray array = new JsonArray();
2018+
if (!o.isJsonObject()) {
2019+
throw new IllegalStateException(
2020+
"Not a JSON Object " + o + " when trying to add array " + objName + ": []");
2021+
}
19452022
o.getAsJsonObject().add(objName, array);
19462023

19472024
return array;
@@ -2136,10 +2213,16 @@ private void outputArrayItem(
21362213
Map<String, String> attrAsValueMap = cldrNode.getAttrAsValueMap();
21372214

21382215
if (attrAsValueMap.isEmpty()) {
2139-
JsonObject o = new JsonObject();
2140-
out.add(o);
2141-
o.addProperty(objName, value);
2142-
} else if (objName.equals("rbnfrule")) {
2216+
if (item.getUntransformedPath().startsWith("//ldml/rbnf/")
2217+
&& objName.equals("rulesFile")) {
2218+
out.add(value);
2219+
} else {
2220+
JsonObject o = new JsonObject();
2221+
out.add(o);
2222+
o.addProperty(objName, value);
2223+
}
2224+
} else if (item.getUntransformedPath().startsWith("//ldml/rbnf/")
2225+
&& objName.equals("rbnfrule")) {
21432226
writeRbnfLeafNode(out, item, attrAsValueMap);
21442227
} else {
21452228
JsonObject o = new JsonObject();
@@ -2208,9 +2291,20 @@ private String attrToKey(String key) {
22082291

22092292
private void writeRbnfLeafNode(
22102293
JsonElement out, CldrItem item, Map<String, String> attrAsValueMap) throws IOException {
2294+
if (item.getUntransformedPath().contains("rbnfRules")) {
2295+
throw new RuntimeException(
2296+
item.getUntransformedPath()
2297+
+ " = "
2298+
+ item.getFullPath()
2299+
+ " // "
2300+
+ attrAsValueMap.keySet().stream().collect(Collectors.joining(", ")));
2301+
}
22112302
if (attrAsValueMap.size() != 1) {
22122303
throw new IllegalArgumentException(
2213-
"Error, attributes seem wrong for RBNF " + item.getUntransformedPath());
2304+
"Error, attributes seem wrong for RBNF "
2305+
+ item.getUntransformedPath()
2306+
+ " = "
2307+
+ item.getFullPath());
22142308
}
22152309
Entry<String, String> entry = attrAsValueMap.entrySet().iterator().next();
22162310
JsonArray arr = new JsonArray();

0 commit comments

Comments
 (0)