|
1 | 1 | package org.unicode.text.UCD; |
2 | 2 |
|
3 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; |
| 4 | +import static org.junit.jupiter.api.Assertions.assertTrue; |
4 | 5 |
|
5 | 6 | import com.ibm.icu.impl.UnicodeMap; |
6 | 7 | import com.ibm.icu.text.Normalizer2; |
7 | 8 | import com.ibm.icu.text.UTF16; |
8 | 9 | import com.ibm.icu.text.UnicodeSet; |
9 | 10 | import com.ibm.icu.text.UnicodeSet.EntryRange; |
| 11 | +import java.util.Arrays; |
10 | 12 | import java.util.Collections; |
11 | 13 | import java.util.EnumSet; |
| 14 | +import java.util.HashMap; |
| 15 | +import java.util.HashSet; |
| 16 | +import java.util.List; |
| 17 | +import java.util.Map; |
12 | 18 | import java.util.Set; |
| 19 | +import java.util.TreeMap; |
| 20 | +import java.util.function.Function; |
| 21 | +import java.util.stream.Collectors; |
13 | 22 | import org.junit.jupiter.api.Test; |
14 | 23 | import org.unicode.props.IndexUnicodeProperties; |
| 24 | +import org.unicode.props.PropertyNames.Named; |
| 25 | +import org.unicode.props.PropertyType; |
15 | 26 | import org.unicode.props.UcdProperty; |
16 | 27 | import org.unicode.props.UcdPropertyValues; |
17 | 28 | import org.unicode.props.UcdPropertyValues.Age_Values; |
18 | 29 | import org.unicode.props.UcdPropertyValues.Grapheme_Cluster_Break_Values; |
19 | 30 | import org.unicode.props.UcdPropertyValues.Script_Values; |
| 31 | +import org.unicode.props.UnicodeProperty; |
20 | 32 | import org.unicode.text.utility.Utility; |
21 | 33 |
|
22 | 34 | public class TestCodeInvariants { |
@@ -230,4 +242,132 @@ private static String showInfo( |
230 | 242 | + " ) " |
231 | 243 | + NAME.get(codePoint); |
232 | 244 | } |
| 245 | + |
| 246 | + @Test |
| 247 | + void testPropertyAliasUniqueness() { |
| 248 | + // All property aliases constitute a single namespace. Property aliases are |
| 249 | + // guaranteed to be unique within this namespace. |
| 250 | + testLM3NamespaceUniqueness( |
| 251 | + Arrays.asList(UcdProperty.values()), |
| 252 | + property -> property.getNames().getAllNames(), |
| 253 | + Set.of("Age"), |
| 254 | + "!!Stability policy violation!! (Property Alias Uniqueness)"); |
| 255 | + Set<Object> propertyNamespace = new HashSet<>(); |
| 256 | + propertyNamespace.addAll(Arrays.asList(UcdProperty.values())); |
| 257 | + propertyNamespace.add("code point"); |
| 258 | + propertyNamespace.add("none"); |
| 259 | + testLM3NamespaceUniqueness( |
| 260 | + propertyNamespace, |
| 261 | + x -> |
| 262 | + x instanceof String |
| 263 | + ? List.of((String) x) |
| 264 | + : ((UcdProperty) x).getNames().getAllNames(), |
| 265 | + Set.of("Age"), |
| 266 | + "Violation of UnicodeSet requirements: A property alias matches <code point> or <none>"); |
| 267 | + for (var property : UcdProperty.values()) { |
| 268 | + if (IndexUnicodeProperties.make() |
| 269 | + .getProperty(property) |
| 270 | + .isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { |
| 271 | + Set<String> expectedRedundant; |
| 272 | + switch (property) { |
| 273 | + case Block: |
| 274 | + expectedRedundant = Set.of("Arabic_Presentation_Forms-A"); |
| 275 | + break; |
| 276 | + case Decomposition_Type: |
| 277 | + expectedRedundant = |
| 278 | + Set.of( |
| 279 | + "can", "com", "enc", "fin", "font", "fra", "init", "iso", |
| 280 | + "med", "nar", "nb", "none", "sml", "sqr", "sub", "sup", |
| 281 | + "vert", "wide"); |
| 282 | + break; |
| 283 | + case Sentence_Break: |
| 284 | + expectedRedundant = Set.of("Sp"); |
| 285 | + break; |
| 286 | + default: |
| 287 | + expectedRedundant = Set.of(); |
| 288 | + break; |
| 289 | + } |
| 290 | + // For each property, all of its property value aliases constitute a separate |
| 291 | + // namespace, one per property. Within each of these property value alias |
| 292 | + // namespaces, property value aliases are guaranteed to be unique. |
| 293 | + testLM3NamespaceUniqueness( |
| 294 | + property.getEnums(), |
| 295 | + value -> ((Named) value).getNames().getAllNames(), |
| 296 | + expectedRedundant, |
| 297 | + "!!Stability policy violation!! (Property Alias Uniqueness for value aliases of " |
| 298 | + + property |
| 299 | + + ")"); |
| 300 | + } |
| 301 | + } |
| 302 | + Set<Object> unicodeSetUnaryQueryNames = |
| 303 | + Arrays.stream(UcdProperty.values()) |
| 304 | + .filter(p -> p.getType() == PropertyType.Binary) |
| 305 | + .collect(Collectors.toCollection(() -> new HashSet<>())); |
| 306 | + unicodeSetUnaryQueryNames.addAll( |
| 307 | + Arrays.asList(UcdPropertyValues.General_Category_Values.values())); |
| 308 | + unicodeSetUnaryQueryNames.addAll(Arrays.asList(UcdPropertyValues.Script_Values.values())); |
| 309 | + testLM3NamespaceUniqueness( |
| 310 | + unicodeSetUnaryQueryNames, |
| 311 | + x -> |
| 312 | + x instanceof UcdProperty |
| 313 | + ? ((UcdProperty) x).getNames().getAllNames() |
| 314 | + : ((Named) x).getNames().getAllNames(), |
| 315 | + Set.of("Age"), |
| 316 | + "Violation of UnicodeSet requirements: gc-sc-binary property namespace collision"); |
| 317 | + Set<Object> nonCollidingProperties = new HashSet<>(); |
| 318 | + nonCollidingProperties.addAll(Arrays.asList(UcdProperty.values())); |
| 319 | + nonCollidingProperties.addAll( |
| 320 | + Arrays.asList(UcdPropertyValues.General_Category_Values.values())); |
| 321 | + nonCollidingProperties.addAll(Arrays.asList(UcdPropertyValues.Script_Values.values())); |
| 322 | + nonCollidingProperties.remove(UcdProperty.ISO_Comment); // Collides with gc Other. |
| 323 | + nonCollidingProperties.remove(UcdProperty.Case_Folding); // Collides with gc Format. |
| 324 | + nonCollidingProperties.remove( |
| 325 | + UcdProperty.Lowercase_Mapping); // Collides with gc Cased_Letter. |
| 326 | + nonCollidingProperties.remove(UcdProperty.Script); // Collides with gc Currency_Symbol. |
| 327 | + testLM3NamespaceUniqueness( |
| 328 | + nonCollidingProperties, |
| 329 | + x -> |
| 330 | + x instanceof UcdProperty |
| 331 | + ? ((UcdProperty) x).getNames().getAllNames() |
| 332 | + : ((Named) x).getNames().getAllNames(), |
| 333 | + Set.of("Age"), |
| 334 | + "Unusual (not a violation of UnicodeSet requirement): New gc-sc-non-binary property namespace collision"); |
| 335 | + } |
| 336 | + |
| 337 | + <T> void testLM3NamespaceUniqueness( |
| 338 | + Iterable<T> namespace, |
| 339 | + Function<T, List<String>> getNames, |
| 340 | + Set<String> expectedRedundant, |
| 341 | + String message) { |
| 342 | + final Map<String, T> entitiesByAlias = new TreeMap<>(UnicodeProperty.PROPERTY_COMPARATOR); |
| 343 | + final Map<String, String> aliasesByLM3Skeleton = new HashMap<>(); |
| 344 | + for (T entity : namespace) { |
| 345 | + for (String alias : getNames.apply(entity)) { |
| 346 | + final var matchingEntity = entitiesByAlias.get(alias); |
| 347 | + final var lm3Skeleton = UnicodeProperty.toSkeleton(alias); |
| 348 | + final var matchingAlias = aliasesByLM3Skeleton.get(lm3Skeleton); |
| 349 | + assertTrue( |
| 350 | + matchingEntity == null || entity.equals(matchingEntity), |
| 351 | + message |
| 352 | + + ": alias " |
| 353 | + + alias |
| 354 | + + " for " |
| 355 | + + entity |
| 356 | + + " matches alias " |
| 357 | + + matchingAlias |
| 358 | + + " for " |
| 359 | + + matchingEntity); |
| 360 | + if (matchingEntity != null && !expectedRedundant.contains(alias)) { |
| 361 | + assertEquals( |
| 362 | + matchingAlias, |
| 363 | + alias, |
| 364 | + "Unusual (not a stability policy violation): distinct aliases for " |
| 365 | + + entity |
| 366 | + + " match each other"); |
| 367 | + } |
| 368 | + entitiesByAlias.putIfAbsent(alias, entity); |
| 369 | + aliasesByLM3Skeleton.putIfAbsent(lm3Skeleton, alias); |
| 370 | + } |
| 371 | + } |
| 372 | + } |
233 | 373 | } |
0 commit comments