Skip to content

Commit c7f0fdb

Browse files
eggrobinmarkusicu
andauthored
Test property name uniqueness (#1082)
* Something that should fail * GenerateEnums * Failing test * Revert "GenerateEnums" This reverts commit 2d74447. * Revert "Something that should fail" This reverts commit 7c13371. * Property value namespace uniqueness + redundancies * GenerateEnums * Value aliases covered * Other=ISO_Comment, et al. * spotless * messages * UTS46 uses the uppercase one * GenerateEnums * Test none and code point * use matchingEntity Co-authored-by: Markus Scherer <[email protected]> * putIfAbsent --------- Co-authored-by: Markus Scherer <[email protected]>
1 parent bbd085d commit c7f0fdb

File tree

3 files changed

+144
-4
lines changed

3 files changed

+144
-4
lines changed

unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -982,8 +982,8 @@ public static Identifier_Type_Values forName(String name) {
982982
}
983983

984984
public enum Idn_2008_Values implements Named {
985-
NV8("nv8"),
986-
XV8("xv8"),
985+
NV8("NV8"),
986+
XV8("XV8"),
987987
na("na");
988988
private final PropertyNames<Idn_2008_Values> names;
989989

unicodetools/src/main/resources/org/unicode/props/ExtraPropertyValueAliases.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ idns ; ds3m ; disallowed_STD3_mapped
127127

128128
# @missing: 0000..10FFFF; Idn_2008 ; na
129129

130-
idn8 ; nv8 ; NV8
131-
idn8 ; xv8 ; XV8
130+
idn8 ; NV8 ; NV8
131+
idn8 ; XV8 ; XV8
132132
idn8 ; na ; na
133133

134134
# @missing: 0000..10FFFF; Idn_Mapping ; <code point>

unicodetools/src/test/java/org/unicode/text/UCD/TestCodeInvariants.java

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,34 @@
11
package org.unicode.text.UCD;
22

33
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertTrue;
45

56
import com.ibm.icu.impl.UnicodeMap;
67
import com.ibm.icu.text.Normalizer2;
78
import com.ibm.icu.text.UTF16;
89
import com.ibm.icu.text.UnicodeSet;
910
import com.ibm.icu.text.UnicodeSet.EntryRange;
11+
import java.util.Arrays;
1012
import java.util.Collections;
1113
import java.util.EnumSet;
14+
import java.util.HashMap;
15+
import java.util.HashSet;
16+
import java.util.List;
17+
import java.util.Map;
1218
import java.util.Set;
19+
import java.util.TreeMap;
20+
import java.util.function.Function;
21+
import java.util.stream.Collectors;
1322
import org.junit.jupiter.api.Test;
1423
import org.unicode.props.IndexUnicodeProperties;
24+
import org.unicode.props.PropertyNames.Named;
25+
import org.unicode.props.PropertyType;
1526
import org.unicode.props.UcdProperty;
1627
import org.unicode.props.UcdPropertyValues;
1728
import org.unicode.props.UcdPropertyValues.Age_Values;
1829
import org.unicode.props.UcdPropertyValues.Grapheme_Cluster_Break_Values;
1930
import org.unicode.props.UcdPropertyValues.Script_Values;
31+
import org.unicode.props.UnicodeProperty;
2032
import org.unicode.text.utility.Utility;
2133

2234
public class TestCodeInvariants {
@@ -230,4 +242,132 @@ private static String showInfo(
230242
+ " ) "
231243
+ NAME.get(codePoint);
232244
}
245+
246+
@Test
247+
void testPropertyAliasUniqueness() {
248+
// All property aliases constitute a single namespace. Property aliases are
249+
// guaranteed to be unique within this namespace.
250+
testLM3NamespaceUniqueness(
251+
Arrays.asList(UcdProperty.values()),
252+
property -> property.getNames().getAllNames(),
253+
Set.of("Age"),
254+
"!!Stability policy violation!! (Property Alias Uniqueness)");
255+
Set<Object> propertyNamespace = new HashSet<>();
256+
propertyNamespace.addAll(Arrays.asList(UcdProperty.values()));
257+
propertyNamespace.add("code point");
258+
propertyNamespace.add("none");
259+
testLM3NamespaceUniqueness(
260+
propertyNamespace,
261+
x ->
262+
x instanceof String
263+
? List.of((String) x)
264+
: ((UcdProperty) x).getNames().getAllNames(),
265+
Set.of("Age"),
266+
"Violation of UnicodeSet requirements: A property alias matches <code point> or <none>");
267+
for (var property : UcdProperty.values()) {
268+
if (IndexUnicodeProperties.make()
269+
.getProperty(property)
270+
.isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) {
271+
Set<String> expectedRedundant;
272+
switch (property) {
273+
case Block:
274+
expectedRedundant = Set.of("Arabic_Presentation_Forms-A");
275+
break;
276+
case Decomposition_Type:
277+
expectedRedundant =
278+
Set.of(
279+
"can", "com", "enc", "fin", "font", "fra", "init", "iso",
280+
"med", "nar", "nb", "none", "sml", "sqr", "sub", "sup",
281+
"vert", "wide");
282+
break;
283+
case Sentence_Break:
284+
expectedRedundant = Set.of("Sp");
285+
break;
286+
default:
287+
expectedRedundant = Set.of();
288+
break;
289+
}
290+
// For each property, all of its property value aliases constitute a separate
291+
// namespace, one per property. Within each of these property value alias
292+
// namespaces, property value aliases are guaranteed to be unique.
293+
testLM3NamespaceUniqueness(
294+
property.getEnums(),
295+
value -> ((Named) value).getNames().getAllNames(),
296+
expectedRedundant,
297+
"!!Stability policy violation!! (Property Alias Uniqueness for value aliases of "
298+
+ property
299+
+ ")");
300+
}
301+
}
302+
Set<Object> unicodeSetUnaryQueryNames =
303+
Arrays.stream(UcdProperty.values())
304+
.filter(p -> p.getType() == PropertyType.Binary)
305+
.collect(Collectors.toCollection(() -> new HashSet<>()));
306+
unicodeSetUnaryQueryNames.addAll(
307+
Arrays.asList(UcdPropertyValues.General_Category_Values.values()));
308+
unicodeSetUnaryQueryNames.addAll(Arrays.asList(UcdPropertyValues.Script_Values.values()));
309+
testLM3NamespaceUniqueness(
310+
unicodeSetUnaryQueryNames,
311+
x ->
312+
x instanceof UcdProperty
313+
? ((UcdProperty) x).getNames().getAllNames()
314+
: ((Named) x).getNames().getAllNames(),
315+
Set.of("Age"),
316+
"Violation of UnicodeSet requirements: gc-sc-binary property namespace collision");
317+
Set<Object> nonCollidingProperties = new HashSet<>();
318+
nonCollidingProperties.addAll(Arrays.asList(UcdProperty.values()));
319+
nonCollidingProperties.addAll(
320+
Arrays.asList(UcdPropertyValues.General_Category_Values.values()));
321+
nonCollidingProperties.addAll(Arrays.asList(UcdPropertyValues.Script_Values.values()));
322+
nonCollidingProperties.remove(UcdProperty.ISO_Comment); // Collides with gc Other.
323+
nonCollidingProperties.remove(UcdProperty.Case_Folding); // Collides with gc Format.
324+
nonCollidingProperties.remove(
325+
UcdProperty.Lowercase_Mapping); // Collides with gc Cased_Letter.
326+
nonCollidingProperties.remove(UcdProperty.Script); // Collides with gc Currency_Symbol.
327+
testLM3NamespaceUniqueness(
328+
nonCollidingProperties,
329+
x ->
330+
x instanceof UcdProperty
331+
? ((UcdProperty) x).getNames().getAllNames()
332+
: ((Named) x).getNames().getAllNames(),
333+
Set.of("Age"),
334+
"Unusual (not a violation of UnicodeSet requirement): New gc-sc-non-binary property namespace collision");
335+
}
336+
337+
<T> void testLM3NamespaceUniqueness(
338+
Iterable<T> namespace,
339+
Function<T, List<String>> getNames,
340+
Set<String> expectedRedundant,
341+
String message) {
342+
final Map<String, T> entitiesByAlias = new TreeMap<>(UnicodeProperty.PROPERTY_COMPARATOR);
343+
final Map<String, String> aliasesByLM3Skeleton = new HashMap<>();
344+
for (T entity : namespace) {
345+
for (String alias : getNames.apply(entity)) {
346+
final var matchingEntity = entitiesByAlias.get(alias);
347+
final var lm3Skeleton = UnicodeProperty.toSkeleton(alias);
348+
final var matchingAlias = aliasesByLM3Skeleton.get(lm3Skeleton);
349+
assertTrue(
350+
matchingEntity == null || entity.equals(matchingEntity),
351+
message
352+
+ ": alias "
353+
+ alias
354+
+ " for "
355+
+ entity
356+
+ " matches alias "
357+
+ matchingAlias
358+
+ " for "
359+
+ matchingEntity);
360+
if (matchingEntity != null && !expectedRedundant.contains(alias)) {
361+
assertEquals(
362+
matchingAlias,
363+
alias,
364+
"Unusual (not a stability policy violation): distinct aliases for "
365+
+ entity
366+
+ " match each other");
367+
}
368+
entitiesByAlias.putIfAbsent(alias, entity);
369+
aliasesByLM3Skeleton.putIfAbsent(lm3Skeleton, alias);
370+
}
371+
}
372+
}
233373
}

0 commit comments

Comments
 (0)