From 2804ceb1eaf4cd4f6ec4ac9a7a3f21483a481448 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 13 Feb 2025 18:42:12 +0100 Subject: [PATCH 1/5] Centralize and fix age comparison --- .../org/unicode/jsp/UnicodeSetUtilities.java | 55 +++---------------- .../props/UnicodePropertySymbolTable.java | 48 ++++++++-------- .../org/unicode/unittest/TestUnicodeSet.java | 4 +- 3 files changed, 36 insertions(+), 71 deletions(-) diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java index 1b57ab096e..8ad6be51eb 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java @@ -12,9 +12,9 @@ import java.util.List; import java.util.regex.Pattern; import org.unicode.cldr.util.MultiComparator; -import org.unicode.jsp.UnicodeSetUtilities.ComparisonMatcher.Relation; import org.unicode.props.UnicodeProperty; import org.unicode.props.UnicodeProperty.PatternMatcher; +import org.unicode.props.UnicodePropertySymbolTable; public class UnicodeSetUtilities { @@ -292,7 +292,13 @@ private boolean applyPropertyAlias0( + prop.getValueAliases()); } if (isAge) { - set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq)); + set = + prop.getSet( + new UnicodePropertySymbolTable.ComparisonMatcher( + propertyValue, + UnicodePropertySymbolTable.Relation.geq, + UnicodePropertySymbolTable + .VERSION_STRING_COMPARATOR)); } else { if (prop.getName().equals("General_Category")) { for (String[] coarseValue : COARSE_GENERAL_CATEGORIES) { @@ -344,49 +350,4 @@ private boolean isValid(UnicodeProperty prop, String propertyValue) { return prop.isValidValue(propertyValue); } } - ; - - public static class ComparisonMatcher implements PatternMatcher { - Relation relation; - - enum Relation { - less, - leq, - equal, - geq, - greater - } - - static Comparator comparator = new UTF16.StringComparator(true, false, 0); - - String pattern; - - public ComparisonMatcher(String pattern, Relation comparator) { - this.relation = comparator; - this.pattern = pattern; - } - - @Override - public boolean test(String value) { - int comp = comparator.compare(pattern, value); - switch (relation) { - case less: - return comp < 0; - case leq: - return comp <= 0; - default: - return comp == 0; - case geq: - return comp >= 0; - case greater: - return comp > 0; - } - } - - @Override - public PatternMatcher set(String pattern) { - this.pattern = pattern; - return this; - } - } } diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java index 8ddfe42290..c1f3766a59 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java @@ -7,8 +7,8 @@ package org.unicode.props; import com.ibm.icu.impl.UnicodeRegex; -import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.VersionInfo; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -202,7 +202,9 @@ public boolean applyPropertyAlias0( set = prop.getSet( new ComparisonMatcher( - propertyValue, Relation.geq, DOUBLE_STRING_COMPARATOR)); + propertyValue, + Relation.geq, + VERSION_STRING_COMPARATOR)); } else { set = prop.getSet(propertyValue); } @@ -247,10 +249,6 @@ public static class ComparisonMatcher implements PatternMatcher { final Comparator comparator; String pattern; - public ComparisonMatcher(String pattern, Relation relation) { - this(pattern, relation, new UTF16.StringComparator(true, false, 0)); - } - public ComparisonMatcher(String pattern, Relation relation, Comparator comparator) { this.relation = relation; this.pattern = pattern; @@ -281,8 +279,11 @@ public PatternMatcher set(String pattern) { } } - /** Special parser for doubles. Anything not parsable is higher than everything else. */ - public static final Comparator DOUBLE_STRING_COMPARATOR = + /** + * Special parser for version strings. Anything not parsable is higher than everything + * parseable. + */ + public static final Comparator VERSION_STRING_COMPARATOR = new Comparator() { @Override @@ -294,22 +295,25 @@ public int compare(String o1, String o2) { } else if (o2 == null) { return 1; } else { - int f1 = o1.codePointAt(0); - int f2 = o2.codePointAt(0); - boolean n1 = f1 < '0' || f1 > '9'; - boolean n2 = f2 < '0' || f2 > '9'; - if (n1) { - return n2 ? o1.compareTo(o2) : 1; - } else if (n2) { - return -1; + boolean o1Valid = true; + boolean o2Valid = true; + VersionInfo v1 = null; + VersionInfo v2 = null; + try { + v1 = VersionInfo.getInstance(o1); + } catch (IllegalArgumentException e) { + o1Valid = false; } - double d1 = Double.parseDouble(o1); - double d2 = Double.parseDouble(o2); - if (Double.isNaN(d1) || Double.isNaN(d2)) { - throw new IllegalArgumentException(); + try { + v2 = VersionInfo.getInstance(o2); + } catch (IllegalArgumentException e) { + o2Valid = false; + } + if (o1Valid && o2Valid) { + return v1.compareTo(v2); + } else { + return o2Valid ? 1 : o1Valid ? -1 : o1.compareTo(o2); } - - return d1 > d2 ? 1 : d1 < d2 ? -1 : 0; } } }; diff --git a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java index ac83414131..cd1b96f7bf 100644 --- a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java +++ b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java @@ -33,11 +33,11 @@ private void checkOrder(String d1, String d2, int expected) { assertEquals( d1 + " ?< " + d2, expected, - UnicodePropertySymbolTable.DOUBLE_STRING_COMPARATOR.compare(d1, d2)); + UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d1, d2)); assertEquals( d2 + " ?< " + d1, -expected, - UnicodePropertySymbolTable.DOUBLE_STRING_COMPARATOR.compare(d2, d1)); + UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d2, d1)); } @Test From 7d7d448a72c877830430e801162a3668e2cc87ef Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 14 Feb 2025 01:39:13 +0100 Subject: [PATCH 2/5] Use the right props --- .../src/test/java/org/unicode/unittest/TestUnicodeSet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java index cd1b96f7bf..e4e9556be6 100644 --- a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java +++ b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java @@ -5,7 +5,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.junit.jupiter.api.Test; -import org.unicode.cldr.util.props.UnicodePropertySymbolTable; +import org.unicode.props.UnicodePropertySymbolTable; import org.unicode.text.utility.UnicodeSetParser; import org.unicode.text.utility.Utility; From 353713cf08e1d450e52ad48247b38e962f0b8c2c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 17 Feb 2025 17:24:08 +0100 Subject: [PATCH 3/5] sign --- .../org/unicode/unittest/TestUnicodeSet.java | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java index e4e9556be6..fa1022e63b 100644 --- a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java +++ b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java @@ -13,12 +13,12 @@ public class TestUnicodeSet extends TestFmwkMinusMinus { @Test public void TestAge() { - checkOrder("3.1", "3.2", -1); - checkOrder("3.2", "3.2", 0); - checkOrder("4.0", "3.2", 1); - checkOrder("10.0", "3.2", 1); - checkOrder("11.0", "3.2", 1); - checkOrder("NA", "11.0", 1); + checkOrder("3.1", "3.2", Comparison.SMALLER); + checkOrder("3.2", "3.2", Comparison.EQUAL); + checkOrder("4.0", "3.2", Comparison.GREATER); + checkOrder("10.0", "3.2", Comparison.GREATER); + checkOrder("11.0", "3.2", Comparison.GREATER); + checkOrder("NA", "11.0", Comparison.GREATER); final UnicodeSet U32 = new UnicodeSet("[:age=3.2:]").freeze(); if (!U32.contains(0x01F6) || !U32.contains(0x0220)) { @@ -29,15 +29,31 @@ public void TestAge() { } } - private void checkOrder(String d1, String d2, int expected) { + private static enum Comparison { + EQUAL, + GREATER, + SMALLER; + + public Comparison opposite() { + return this == GREATER ? SMALLER : this == SMALLER ? GREATER : this; + } + + public static Comparison fromCompareResult(int compareResult) { + return compareResult == 0 ? EQUAL : compareResult > 0 ? GREATER : SMALLER; + } + } + + private void checkOrder(String d1, String d2, Comparison expected) { assertEquals( d1 + " ?< " + d2, expected, - UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d1, d2)); + Comparison.fromCompareResult( + UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d1, d2))); assertEquals( d2 + " ?< " + d1, - -expected, - UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d2, d1)); + expected.opposite(), + Comparison.fromCompareResult( + UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d2, d1))); } @Test From 60a2c8126f449386a878d0f6040d0a07a799c884 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 17 Feb 2025 17:40:17 +0100 Subject: [PATCH 4/5] Just say no to strings --- .../org/unicode/jsp/UnicodeSetUtilities.java | 13 ++- .../props/UnicodePropertySymbolTable.java | 79 ++++++++----------- .../org/unicode/unittest/TestUnicodeSet.java | 12 ++- 3 files changed, 50 insertions(+), 54 deletions(-) diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java index 8ad6be51eb..554554d10e 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java @@ -7,6 +7,7 @@ import com.ibm.icu.text.UTF16.StringComparator; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.VersionInfo; import java.text.ParsePosition; import java.util.Comparator; import java.util.List; @@ -294,11 +295,15 @@ private boolean applyPropertyAlias0( if (isAge) { set = prop.getSet( - new UnicodePropertySymbolTable.ComparisonMatcher( - propertyValue, + new UnicodePropertySymbolTable.ComparisonMatcher< + VersionInfo>( + UnicodePropertySymbolTable.parseVersionInfoOrMax( + propertyValue), UnicodePropertySymbolTable.Relation.geq, - UnicodePropertySymbolTable - .VERSION_STRING_COMPARATOR)); + Comparator.nullsFirst(Comparator.naturalOrder()), + (s) -> + UnicodePropertySymbolTable + .parseVersionInfoOrMax(s))); } else { if (prop.getName().equals("General_Category")) { for (String[] coarseValue : COARSE_GENERAL_CATEGORIES) { diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java index c1f3766a59..958326d25f 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java @@ -12,6 +12,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.List; +import java.util.function.Function; import org.unicode.props.UnicodeProperty.PatternMatcher; /** @@ -201,10 +202,14 @@ public boolean applyPropertyAlias0( if (isAge) { set = prop.getSet( - new ComparisonMatcher( - propertyValue, + new ComparisonMatcher( + UnicodePropertySymbolTable.parseVersionInfoOrMax( + propertyValue), Relation.geq, - VERSION_STRING_COMPARATOR)); + Comparator.nullsFirst(Comparator.naturalOrder()), + (s) -> + UnicodePropertySymbolTable + .parseVersionInfoOrMax(s))); } else { set = prop.getSet(propertyValue); } @@ -244,20 +249,26 @@ public enum Relation { greater } - public static class ComparisonMatcher implements PatternMatcher { + public static class ComparisonMatcher implements PatternMatcher { final Relation relation; - final Comparator comparator; - String pattern; + final Comparator comparator; + final Function parser; + T expected; - public ComparisonMatcher(String pattern, Relation relation, Comparator comparator) { + public ComparisonMatcher( + T expected, + Relation relation, + Comparator comparator, + Function parser) { this.relation = relation; - this.pattern = pattern; + this.expected = expected; this.comparator = comparator; + this.parser = parser; } @Override public boolean test(String value) { - int comp = comparator.compare(pattern, value); + int comp = comparator.compare(expected, parser.apply(value)); switch (relation) { case less: return comp < 0; @@ -274,47 +285,19 @@ public boolean test(String value) { @Override public PatternMatcher set(String pattern) { - this.pattern = pattern; + this.expected = parser.apply(pattern); return this; } } - /** - * Special parser for version strings. Anything not parsable is higher than everything - * parseable. - */ - public static final Comparator VERSION_STRING_COMPARATOR = - new Comparator() { - - @Override - public int compare(String o1, String o2) { - if (o1 == o2) { - return 0; - } else if (o1 == null) { - return -1; - } else if (o2 == null) { - return 1; - } else { - boolean o1Valid = true; - boolean o2Valid = true; - VersionInfo v1 = null; - VersionInfo v2 = null; - try { - v1 = VersionInfo.getInstance(o1); - } catch (IllegalArgumentException e) { - o1Valid = false; - } - try { - v2 = VersionInfo.getInstance(o2); - } catch (IllegalArgumentException e) { - o2Valid = false; - } - if (o1Valid && o2Valid) { - return v1.compareTo(v2); - } else { - return o2Valid ? 1 : o1Valid ? -1 : o1.compareTo(o2); - } - } - } - }; + public static VersionInfo parseVersionInfoOrMax(String s) { + if (s == null) { + return null; + } + try { + return VersionInfo.getInstance(s); + } catch (IllegalArgumentException e) { + return VersionInfo.getInstance(255, 255, 255, 255); + } + } } diff --git a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java index fa1022e63b..648a30a461 100644 --- a/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java +++ b/unicodetools/src/test/java/org/unicode/unittest/TestUnicodeSet.java @@ -2,6 +2,8 @@ import com.ibm.icu.impl.UnicodeRegex; import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.VersionInfo; +import java.util.Comparator; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.junit.jupiter.api.Test; @@ -48,12 +50,18 @@ private void checkOrder(String d1, String d2, Comparison expected) { d1 + " ?< " + d2, expected, Comparison.fromCompareResult( - UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d1, d2))); + Comparator.nullsFirst(Comparator.naturalOrder()) + .compare( + UnicodePropertySymbolTable.parseVersionInfoOrMax(d1), + UnicodePropertySymbolTable.parseVersionInfoOrMax(d2)))); assertEquals( d2 + " ?< " + d1, expected.opposite(), Comparison.fromCompareResult( - UnicodePropertySymbolTable.VERSION_STRING_COMPARATOR.compare(d2, d1))); + Comparator.nullsFirst(Comparator.naturalOrder()) + .compare( + UnicodePropertySymbolTable.parseVersionInfoOrMax(d2), + UnicodePropertySymbolTable.parseVersionInfoOrMax(d1)))); } @Test From 60379b572650ada41c2de4503849e9c69415be39 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 19 Feb 2025 00:44:25 +0100 Subject: [PATCH 5/5] :: --- .../src/main/java/org/unicode/jsp/UnicodeSetUtilities.java | 4 +--- .../java/org/unicode/props/UnicodePropertySymbolTable.java | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java index 554554d10e..34eed8b30d 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java @@ -301,9 +301,7 @@ private boolean applyPropertyAlias0( propertyValue), UnicodePropertySymbolTable.Relation.geq, Comparator.nullsFirst(Comparator.naturalOrder()), - (s) -> - UnicodePropertySymbolTable - .parseVersionInfoOrMax(s))); + UnicodePropertySymbolTable::parseVersionInfoOrMax)); } else { if (prop.getName().equals("General_Category")) { for (String[] coarseValue : COARSE_GENERAL_CATEGORIES) { diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java index 958326d25f..5a495783e3 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java @@ -207,9 +207,7 @@ public boolean applyPropertyAlias0( propertyValue), Relation.geq, Comparator.nullsFirst(Comparator.naturalOrder()), - (s) -> - UnicodePropertySymbolTable - .parseVersionInfoOrMax(s))); + UnicodePropertySymbolTable::parseVersionInfoOrMax)); } else { set = prop.getSet(propertyValue); }