@@ -40,16 +40,21 @@ enum SpecialProperty {
4040 Skip1FT ,
4141 Skip1ST ,
4242 SkipAny4 ,
43- Rational
4443 }
4544
4645 private static final String NEW_UNICODE_PROPS_DOCS =
4746 "https://github.com/unicode-org/unicodetools/blob/main/docs/newunicodeproperties.md" ;
4847 private static final VersionInfo MIN_VERSION = VersionInfo .getInstance (0 , 0 , 0 , 0 );
4948 public final UcdProperty property ;
50- public final int fieldNumber ;
5149 public final SpecialProperty special ;
5250
51+ /**
52+ * Maps from Unicode versions to field number. A property whose field number depends on the
53+ * version has more than one entry. A particular field number applies to the Unicode versions
54+ * after the previous-version entry, up to and including its own version.
55+ */
56+ TreeMap <VersionInfo , Integer > fieldNumbers ;
57+
5358 /**
5459 * Maps from Unicode versions to files. A property whose file depends on the version has more
5560 * than one entry. A particular file applies to the Unicode versions after the previous-version
@@ -102,7 +107,8 @@ public PropertyParsingInfo(
102107 this .files = new TreeMap <>();
103108 files .put (Settings .LATEST_VERSION_INFO , file );
104109 this .property = property ;
105- this .fieldNumber = fieldNumber ;
110+ this .fieldNumbers = new TreeMap <>();
111+ fieldNumbers .put (Settings .LATEST_VERSION_INFO , fieldNumber );
106112 this .special = special ;
107113 }
108114
@@ -121,6 +127,15 @@ private static void fromStrings(String... propertyInfo) {
121127 }
122128
123129 String last = propertyInfo [propertyInfo .length - 1 ];
130+
131+ int temp = 1 ;
132+ if (propertyInfo .length > 2
133+ && !propertyInfo [2 ].isEmpty ()
134+ && !VERSION .matcher (propertyInfo [2 ]).matches ()) {
135+ temp = Integer .parseInt (propertyInfo [2 ]);
136+ }
137+ int _fieldNumber = temp ;
138+
124139 if (VERSION .matcher (last ).matches ()) {
125140 propertyInfo [propertyInfo .length - 1 ] = "" ;
126141 PropertyParsingInfo result = property2PropertyInfo .get (_property );
@@ -129,16 +144,11 @@ private static void fromStrings(String... propertyInfo) {
129144 "No modern info for property with old file record: " + propName );
130145 }
131146 result .files .put (VersionInfo .getInstance (last .substring (1 )), _file );
147+ result .fieldNumbers .put (VersionInfo .getInstance (last .substring (1 )), _fieldNumber );
132148 file2PropertyInfoSet .put (_file , result );
133149 return ;
134150 }
135151
136- int temp = 1 ;
137- if (propertyInfo .length > 2 && !propertyInfo [2 ].isEmpty ()) {
138- temp = Integer .parseInt (propertyInfo [2 ]);
139- }
140- int _fieldNumber = temp ;
141-
142152 SpecialProperty _special =
143153 propertyInfo .length < 4 || propertyInfo [3 ].isEmpty ()
144154 ? SpecialProperty .None
@@ -173,7 +183,7 @@ public String toString() {
173183 + " ;\t "
174184 + property
175185 + " ;\t "
176- + fieldNumber
186+ + fieldNumbers
177187 + " ;\t "
178188 + special
179189 + " ;\t "
@@ -200,7 +210,8 @@ public int compareTo(PropertyParsingInfo arg0) {
200210 if (0 != (result = property .toString ().compareTo (arg0 .property .toString ()))) {
201211 return result ;
202212 }
203- return fieldNumber - arg0 .fieldNumber ;
213+ return fieldNumbers .get (Settings .LATEST_VERSION_INFO )
214+ - arg0 .fieldNumbers .get (Settings .LATEST_VERSION_INFO );
204215 }
205216
206217 public static String getFullFileName (UcdProperty prop , VersionInfo ucdVersion ) {
@@ -227,6 +238,20 @@ public String getFileName(VersionInfo ucdVersionRequested) {
227238 }
228239 }
229240
241+ public int getFieldNumber (VersionInfo ucdVersionRequested ) {
242+ int fieldNumber = 0 ;
243+ if (fieldNumbers .size () == 1 ) {
244+ return fieldNumbers .values ().iterator ().next ();
245+ }
246+ for (final var entry : fieldNumbers .entrySet ()) {
247+ if (ucdVersionRequested .compareTo (entry .getKey ()) <= 0 ) {
248+ fieldNumber = entry .getValue ();
249+ break ;
250+ }
251+ }
252+ return fieldNumber ;
253+ }
254+
230255 private static final VersionInfo V13 = VersionInfo .getInstance (13 );
231256
232257 public static final Normalizer2 NFD = Normalizer2 .getNFDInstance ();
@@ -595,12 +620,11 @@ static void parseSourceFile(
595620 if (propInfoSet .size () == 1
596621 && (propInfo = propInfoSet .iterator ().next ()).special
597622 == SpecialProperty .None
598- && propInfo .fieldNumber == 1 ) {
623+ && propInfo .getFieldNumber ( indexUnicodeProperties . ucdVersion ) == 1 ) {
599624 parseSimpleFieldFile (
600625 parser .withMissing (true ),
601626 propInfo ,
602- indexUnicodeProperties .property2UnicodeMap .get (propInfo .property ),
603- indexUnicodeProperties .ucdVersion ,
627+ indexUnicodeProperties ,
604628 nextProperties == null
605629 ? null
606630 : nextProperties .getProperty (propInfo .property ));
@@ -1255,18 +1279,6 @@ private static void parseFields(
12551279 switch (propInfo .special ) {
12561280 case None :
12571281 break ;
1258- case Rational :
1259- // int slashPos = string.indexOf('/');
1260- // double rational;
1261- // if (slashPos < 0) {
1262- // rational = Double.parseDouble(string);
1263- // } else {
1264- // rational =
1265- // Double.parseDouble(string.substring(0,slashPos)) /
1266- // Double.parseDouble(string.substring(slashPos+1));
1267- // }
1268- // string = Double.toString(rational);
1269- break ;
12701282 case Skip1ST :
12711283 if ("ST" .contains (parts [1 ])) {
12721284 continue ;
@@ -1286,7 +1298,9 @@ private static void parseFields(
12861298 throw new UnicodePropertyException ();
12871299 }
12881300 String value =
1289- propInfo .fieldNumber >= parts .length ? null : parts [propInfo .fieldNumber ];
1301+ propInfo .getFieldNumber (indexUnicodeProperties .ucdVersion ) >= parts .length
1302+ ? null
1303+ : parts [propInfo .getFieldNumber (indexUnicodeProperties .ucdVersion )];
12901304 if (propInfo .property == UcdProperty .Joining_Group
12911305 && indexUnicodeProperties .ucdVersion .compareTo (VersionInfo .UNICODE_4_0_1 )
12921306 <= 0
@@ -1326,7 +1340,9 @@ private static void parseFields(
13261340 } else {
13271341 for (final PropertyParsingInfo propInfo : propInfoSet ) {
13281342 final String value =
1329- propInfo .fieldNumber < parts .length ? parts [propInfo .fieldNumber ] : null ;
1343+ propInfo .getFieldNumber (indexUnicodeProperties .ucdVersion ) < parts .length
1344+ ? parts [propInfo .getFieldNumber (indexUnicodeProperties .ucdVersion )]
1345+ : null ;
13301346 setPropDefault (
13311347 propInfo .property ,
13321348 value ,
@@ -1340,9 +1356,11 @@ private static void parseFields(
13401356 private static void parseSimpleFieldFile (
13411357 UcdLineParser parser ,
13421358 PropertyParsingInfo propInfo ,
1343- UnicodeMap <String > data ,
1344- VersionInfo version ,
1359+ IndexUnicodeProperties indexUnicodeProperties ,
13451360 UnicodeProperty nextVersion ) {
1361+ final UnicodeMap <String > data =
1362+ indexUnicodeProperties .property2UnicodeMap .get (propInfo .property );
1363+ final VersionInfo version = indexUnicodeProperties .ucdVersion ;
13461364 for (UcdLineParser .UcdLine line : parser ) {
13471365 if (line .getType () == UcdLineParser .UcdLine .Contents .DATA ) {
13481366 if (propInfo .getDefaultValue (version ) == null ) {
@@ -1395,6 +1413,55 @@ private static void parseSimpleFieldFile(
13951413 false ,
13961414 nextVersion );
13971415 continue ;
1416+ } else if (propInfo .property == UcdProperty .Numeric_Value ) {
1417+ String extractedValue = line .getParts ()[1 ];
1418+ for (int cp = line .getRange ().start ; cp <= line .getRange ().end ; ++cp ) {
1419+ String unicodeDataValue =
1420+ indexUnicodeProperties
1421+ .getProperty (UcdProperty .Non_Unihan_Numeric_Value )
1422+ .getValue (cp );
1423+ var range = new IntRange ();
1424+ range .start = cp ;
1425+ range .end = cp ;
1426+ if (unicodeDataValue == null ) {
1427+ if (!extractedValue .endsWith (".0" )) {
1428+ throw new IllegalArgumentException (
1429+ "Non-integer numeric value extracted from Unihan for "
1430+ + Utility .hex (cp )
1431+ + ": "
1432+ + extractedValue );
1433+ }
1434+ propInfo .put (
1435+ data ,
1436+ line .getMissingSet (),
1437+ range ,
1438+ extractedValue .substring (0 , extractedValue .length () - 2 ),
1439+ null ,
1440+ false ,
1441+ nextVersion );
1442+ } else {
1443+ // Prior to Unicode 5.1, DerivedNumericValues.txt is useless for getting
1444+ // numeric values whose denominator is not a small power of two, as it
1445+ // only provides field 1, which is decimal with *mystery rounding* (in
1446+ // particular, not enough digits to disambiguate between binary32
1447+ // values).
1448+ // It is not normative either, so we use the value from UnicodeData.
1449+ // We use the values from DerivedNumericValues.txt when they are
1450+ // extracted from Unihan, as this avoids having to reconstruct old
1451+ // derivations here. In particular, Unihan numeric properties do *not*
1452+ // feed into the Numeric_Value until 4.0; see
1453+ // https://www.unicode.org/L2/L2003/03039.htm#94-C4.
1454+ propInfo .put (
1455+ data ,
1456+ line .getMissingSet (),
1457+ range ,
1458+ unicodeDataValue ,
1459+ null ,
1460+ false ,
1461+ nextVersion );
1462+ }
1463+ }
1464+ continue ;
13981465 } else if (line .getParts ().length != 2
13991466 && version .compareTo (VersionInfo .UNICODE_3_0_1 ) > 0 ) {
14001467 // Unicode 3.0 and earlier had name comments as an extra field.
@@ -1410,6 +1477,22 @@ private static void parseSimpleFieldFile(
14101477 false ,
14111478 nextVersion );
14121479 } else {
1480+ if (propInfo .property == UcdProperty .Numeric_Value
1481+ && line .getParts ().length == 3
1482+ && line .getParts ()[1 ].isEmpty ()
1483+ && line .getParts ()[2 ].equals ("NaN" )) {
1484+ // 5.1..6.1 have an improper line
1485+ // # @missing: 0000..10FFFF; ; NaN
1486+ // compare 6.2 and 6.3
1487+ // # @missing: 0000..10FFFF; NaN; ; NaN
1488+ // This causes the default for field 1 (which we use as the key for
1489+ // Numeric_Value, with some subsequent chicanery to actually get the data from
1490+ // UnicodeData) to be the empty string, rather than NaN.
1491+ // Before 5.1, there is no @missing line. After 6.3, the @missing line is in
1492+ // PropertyValueAliases, where it is independent of the format of the file
1493+ // specifying the property.
1494+ line .getParts ()[1 ] = "NaN" ;
1495+ }
14131496 setPropDefault (
14141497 propInfo .property ,
14151498 line .getParts ()[1 ],
0 commit comments