77import com .ibm .icu .text .UTF16 .StringComparator ;
88import com .ibm .icu .text .UnicodeSet ;
99import com .ibm .icu .util .ULocale ;
10- import com .ibm .icu .util .VersionInfo ;
1110import java .text .ParsePosition ;
1211import java .util .Comparator ;
13- import java .util .List ;
14- import java .util .Map ;
1512import java .util .regex .Pattern ;
1613import org .unicode .cldr .util .MultiComparator ;
17- import org .unicode .props .IndexUnicodeProperties ;
18- import org .unicode .props .UcdProperty ;
19- import org .unicode .props .UcdPropertyValues ;
20- import org .unicode .props .UnicodeProperty ;
21- import org .unicode .props .UnicodeProperty .PatternMatcher ;
22- import org .unicode .props .UnicodePropertySymbolTable ;
14+ import org .unicode .text .UCD .VersionedSymbolTable ;
2315
2416public class UnicodeSetUtilities {
2517
@@ -114,7 +106,12 @@ public static UnicodeSet parseUnicodeSet(String input) {
114106 input = input .trim () + "]]]]]" ;
115107 String parseInput = "[" + input + "]]]]]" ;
116108 ParsePosition parsePosition = new ParsePosition (0 );
117- UnicodeSet result = new UnicodeSet (parseInput , parsePosition , fullSymbolTable );
109+ UnicodeSet result =
110+ new UnicodeSet (
111+ parseInput ,
112+ parsePosition ,
113+ VersionedSymbolTable .forReview (UcdLoader ::getOldestLoadedUcd )
114+ .setUnversionedExtensions (XPropertyFactory .make ()));
118115 int parseEnd = parsePosition .getIndex ();
119116 if (parseEnd != parseInput .length ()
120117 && !UnicodeSetUtilities .OK_AT_END .containsAll (parseInput .substring (parseEnd ))) {
@@ -129,262 +126,4 @@ public static UnicodeSet parseUnicodeSet(String input) {
129126 }
130127 return result ;
131128 }
132-
133- static UnicodeSet .XSymbolTable fullSymbolTable = new MySymbolTable ();
134-
135- private static class MySymbolTable extends UnicodeSet .XSymbolTable {
136- UnicodeRegex unicodeRegex ;
137- XPropertyFactory factory ;
138-
139- public MySymbolTable () {
140- unicodeRegex = new UnicodeRegex ().setSymbolTable (this );
141- }
142-
143- // public boolean applyPropertyAlias0(String propertyName,
144- // String propertyValue, UnicodeSet result) {
145- // if (!propertyName.contains("*")) {
146- // return applyPropertyAlias(propertyName, propertyValue, result);
147- // }
148- // String[] propertyNames = propertyName.split("[*]");
149- // for (int i = propertyNames.length - 1; i >= 0; ++i) {
150- // String pname = propertyNames[i];
151- //
152- // }
153- // return null;
154- // }
155-
156- @ Override
157- public boolean applyPropertyAlias (
158- String propertyName , String propertyValue , UnicodeSet result ) {
159- boolean status = false ;
160- boolean invert = false ;
161- int posNotEqual = propertyName .indexOf ('\u2260' );
162- if (posNotEqual >= 0 ) {
163- propertyValue =
164- propertyValue .length () == 0
165- ? propertyName .substring (posNotEqual + 1 )
166- : propertyName .substring (posNotEqual + 1 ) + "=" + propertyValue ;
167- propertyName = propertyName .substring (0 , posNotEqual );
168- invert = true ;
169- }
170- if (propertyName .endsWith ("!" )) {
171- propertyName = propertyName .substring (0 , propertyName .length () - 1 );
172- invert = !invert ;
173- }
174- int posColon = propertyName .indexOf (':' );
175- String versionPrefix = "" ;
176- String versionlessPropertyName = propertyName ;
177- if (posColon >= 0 ) {
178- versionPrefix = propertyName .substring (0 , posColon + 1 );
179- versionlessPropertyName = propertyName .substring (posColon + 1 );
180- }
181-
182- if (factory == null ) {
183- factory = XPropertyFactory .make ();
184- }
185-
186- var gcProp = factory .getProperty (versionPrefix + "gc" );
187- var scProp = factory .getProperty (versionPrefix + "sc" );
188-
189- UnicodeProperty prop = factory .getProperty (propertyName );
190- if (propertyValue .length () != 0 ) {
191- if (prop == null ) {
192- propertyValue = propertyValue .trim ();
193- } else if (prop .isTrimmable ()) {
194- propertyValue = propertyValue .trim ();
195- } else {
196- int debug = 0 ;
197- }
198- status = applyPropertyAlias0 (prop , propertyValue , result , invert );
199- } else {
200- try {
201- status = applyPropertyAlias0 (gcProp , versionlessPropertyName , result , invert );
202- } catch (Exception e ) {
203- }
204- ;
205- if (!status ) {
206- try {
207- status =
208- applyPropertyAlias0 (
209- scProp , versionlessPropertyName , result , invert );
210- } catch (Exception e ) {
211- }
212- if (!status ) {
213- if (prop .isType (UnicodeProperty .BINARY_OR_ENUMERATED_OR_CATALOG_MASK )) {
214- try {
215- status = applyPropertyAlias0 (prop , "No" , result , !invert );
216- } catch (Exception e ) {
217- }
218- }
219- if (!status ) {
220- status = applyPropertyAlias0 (prop , "" , result , invert );
221- }
222- }
223- }
224- }
225- return status ;
226- }
227-
228- private static Map <UcdPropertyValues .General_Category_Values , String []>
229- COARSE_GENERAL_CATEGORIES =
230- Map .of (
231- UcdPropertyValues .General_Category_Values .Other ,
232- new String [] {"Cc" , "Cf" , "Cn" , "Co" , "Cs" },
233- UcdPropertyValues .General_Category_Values .Letter ,
234- new String [] {"Ll" , "Lm" , "Lo" , "Lt" , "Lu" },
235- UcdPropertyValues .General_Category_Values .Cased_Letter ,
236- new String [] {"Ll" , "Lt" , "Lu" },
237- UcdPropertyValues .General_Category_Values .Mark ,
238- new String [] {"Mc" , "Me" , "Mn" },
239- UcdPropertyValues .General_Category_Values .Number ,
240- new String [] {"Nd" , "Nl" , "No" },
241- UcdPropertyValues .General_Category_Values .Punctuation ,
242- new String [] {"Pc" , "Pd" , "Pe" , "Pf" , "Pi" , "Po" , "Ps" },
243- UcdPropertyValues .General_Category_Values .Symbol ,
244- new String [] {"Sc" , "Sk" , "Sm" , "So" },
245- UcdPropertyValues .General_Category_Values .Separator ,
246- new String [] {"Zl" , "Zp" , "Zs" });
247-
248- // TODO(eggrobin): I think this function only ever returns true; might as well make it void.
249- private boolean applyPropertyAlias0 (
250- UnicodeProperty prop , String propertyValue , UnicodeSet result , boolean invert ) {
251- result .clear ();
252- String propertyName = prop .getName ();
253- String trimmedPropertyValue = propertyValue .trim ();
254- PatternMatcher patternMatcher = null ;
255- if (trimmedPropertyValue .length () > 1
256- && trimmedPropertyValue .startsWith ("/" )
257- && trimmedPropertyValue .endsWith ("/" )) {
258- String fixedRegex =
259- unicodeRegex .transform (
260- trimmedPropertyValue .substring (
261- 1 , trimmedPropertyValue .length () - 1 ));
262- patternMatcher = new UnicodeProperty .RegexMatcher ().set (fixedRegex );
263- }
264- UnicodeProperty otherProperty = null ;
265- boolean testCp = false ;
266- boolean testNone = false ;
267- if (trimmedPropertyValue .length () > 1
268- && trimmedPropertyValue .startsWith ("@" )
269- && trimmedPropertyValue .endsWith ("@" )) {
270- String otherPropName =
271- trimmedPropertyValue .substring (1 , trimmedPropertyValue .length () - 1 ).trim ();
272- if (UnicodeProperty .equalNames ("code point" , otherPropName )) {
273- testCp = true ;
274- } else if (UnicodeProperty .equalNames ("none" , otherPropName )) {
275- testNone = true ;
276- } else {
277- otherProperty = factory .getProperty (otherPropName );
278- }
279- }
280- // TODO(egg): Name and Name_Alias require special handling (UAX44-LM2), and
281- // treating Name_Alias as aliases for Name.
282- boolean isAge = UnicodeProperty .equalNames ("age" , propertyName );
283- if (prop != null ) {
284- UnicodeSet set ;
285- if (testCp ) {
286- set = new UnicodeSet ();
287- for (int i = 0 ; i <= 0x10FFFF ; ++i ) {
288- if (invert != UnicodeProperty .equals (i , prop .getValue (i ))) {
289- set .add (i );
290- }
291- }
292- invert = false ;
293- } else if (testNone ) {
294- set = prop .getSet (UnicodeProperty .NULL_MATCHER );
295- } else if (otherProperty != null ) {
296- System .err .println (otherProperty + ", " + invert );
297- set = new UnicodeSet ();
298- for (int i = 0 ; i <= 0x10FFFF ; ++i ) {
299- String v1 = prop .getValue (i );
300- String v2 = otherProperty .getValue (i );
301- if (invert != UnicodeProperty .equals (v1 , v2 )) {
302- set .add (i );
303- }
304- }
305- invert = false ;
306- } else if (patternMatcher == null ) {
307- if (!isValid (prop , propertyValue )) {
308- throw new IllegalArgumentException (
309- "The value '"
310- + propertyValue
311- + "' is illegal. Values for "
312- + propertyName
313- + " must be in "
314- + prop .getAvailableValues ()
315- + " or in "
316- + prop .getValueAliases ());
317- }
318- if (isAge ) {
319- set =
320- prop .getSet (
321- new UnicodePropertySymbolTable .ComparisonMatcher <
322- VersionInfo >(
323- UnicodePropertySymbolTable .parseVersionInfoOrMax (
324- propertyValue ),
325- UnicodePropertySymbolTable .Relation .geq ,
326- Comparator .nullsFirst (Comparator .naturalOrder ()),
327- UnicodePropertySymbolTable ::parseVersionInfoOrMax ));
328- } else {
329- if (prop .getName ().equals ("General_Category" )) {
330- for (var entry : COARSE_GENERAL_CATEGORIES .entrySet ()) {
331- final var aliases = entry .getKey ().getNames ().getAllNames ();
332- if (aliases .stream ()
333- .anyMatch (
334- a ->
335- UnicodeProperty .equalNames (
336- propertyValue , a ))) {
337- for (var value : entry .getValue ()) {
338- prop .getSet (value , result );
339- }
340- return true ;
341- }
342- }
343- }
344- set = prop .getSet (propertyValue );
345- if (set .isEmpty ()
346- && prop instanceof IndexUnicodeProperties .IndexUnicodeProperty
347- && prop .getName ().equals ("Name" )) {
348- set =
349- ((IndexUnicodeProperties .IndexUnicodeProperty ) prop )
350- .getFactory ()
351- .getProperty (UcdProperty .Name_Alias )
352- .getSet (propertyValue );
353- }
354- }
355- } else if (isAge ) {
356- set = new UnicodeSet ();
357- List <String > values = prop .getAvailableValues ();
358- for (String value : values ) {
359- if (patternMatcher .test (value )) {
360- for (String other : values ) {
361- if (other .compareTo (value ) <= 0 ) {
362- set .addAll (prop .getSet (other ));
363- }
364- }
365- }
366- }
367- } else {
368- set = prop .getSet (patternMatcher );
369- }
370- if (invert ) {
371- if (isAge ) {
372- set .complement ();
373- } else {
374- set = prop .getUnicodeMap ().keySet ().removeAll (set );
375- }
376- }
377- result .addAll (set );
378- return true ;
379- }
380- throw new IllegalArgumentException ("Illegal property: " + propertyName );
381- }
382-
383- private boolean isValid (UnicodeProperty prop , String propertyValue ) {
384- // if (prop.getName().equals("General_Category")) {
385- // if (propertyValue)
386- // }
387- return prop .isValidValue (propertyValue );
388- }
389- }
390129}
0 commit comments