77import com .ibm .icu .text .UTF16 .StringComparator ;
88import com .ibm .icu .text .UnicodeSet ;
99import com .ibm .icu .util .ULocale ;
10- import com .ibm .icu .util .VersionInfo ;
1110import java .text .ParsePosition ;
1211import java .util .Comparator ;
13- import java .util .List ;
1412import java .util .regex .Pattern ;
1513import org .unicode .cldr .util .MultiComparator ;
16- import org .unicode .props .UnicodeProperty ;
17- import org .unicode .props .UnicodeProperty .PatternMatcher ;
18- import org .unicode .props .UnicodePropertySymbolTable ;
14+ import org .unicode .text .UCD .VersionedSymbolTable ;
1915
2016public class UnicodeSetUtilities {
2117
@@ -110,7 +106,12 @@ public static UnicodeSet parseUnicodeSet(String input) {
110106 input = input .trim () + "]]]]]" ;
111107 String parseInput = "[" + input + "]]]]]" ;
112108 ParsePosition parsePosition = new ParsePosition (0 );
113- UnicodeSet result = new UnicodeSet (parseInput , parsePosition , fullSymbolTable );
109+ UnicodeSet result =
110+ new UnicodeSet (
111+ parseInput ,
112+ parsePosition ,
113+ VersionedSymbolTable .forReview (UcdLoader ::getOldestLoadedUcd )
114+ .setUnversionedExtensions (XPropertyFactory .make ()));
114115 int parseEnd = parsePosition .getIndex ();
115116 if (parseEnd != parseInput .length ()
116117 && !UnicodeSetUtilities .OK_AT_END .containsAll (parseInput .substring (parseEnd ))) {
@@ -125,232 +126,4 @@ public static UnicodeSet parseUnicodeSet(String input) {
125126 }
126127 return result ;
127128 }
128-
129- static UnicodeSet .XSymbolTable fullSymbolTable = new MySymbolTable ();
130-
131- private static class MySymbolTable extends UnicodeSet .XSymbolTable {
132- UnicodeRegex unicodeRegex ;
133- XPropertyFactory factory ;
134-
135- public MySymbolTable () {
136- unicodeRegex = new UnicodeRegex ().setSymbolTable (this );
137- }
138-
139- // public boolean applyPropertyAlias0(String propertyName,
140- // String propertyValue, UnicodeSet result) {
141- // if (!propertyName.contains("*")) {
142- // return applyPropertyAlias(propertyName, propertyValue, result);
143- // }
144- // String[] propertyNames = propertyName.split("[*]");
145- // for (int i = propertyNames.length - 1; i >= 0; ++i) {
146- // String pname = propertyNames[i];
147- //
148- // }
149- // return null;
150- // }
151-
152- @ Override
153- public boolean applyPropertyAlias (
154- String propertyName , String propertyValue , UnicodeSet result ) {
155- boolean status = false ;
156- boolean invert = false ;
157- int posNotEqual = propertyName .indexOf ('\u2260' );
158- if (posNotEqual >= 0 ) {
159- propertyValue =
160- propertyValue .length () == 0
161- ? propertyName .substring (posNotEqual + 1 )
162- : propertyName .substring (posNotEqual + 1 ) + "=" + propertyValue ;
163- propertyName = propertyName .substring (0 , posNotEqual );
164- invert = true ;
165- }
166- if (propertyName .endsWith ("!" )) {
167- propertyName = propertyName .substring (0 , propertyName .length () - 1 );
168- invert = !invert ;
169- }
170- int posColon = propertyName .indexOf (':' );
171- String versionPrefix = "" ;
172- String versionlessPropertyName = propertyName ;
173- if (posColon >= 0 ) {
174- versionPrefix = propertyName .substring (0 , posColon + 1 );
175- versionlessPropertyName = propertyName .substring (posColon + 1 );
176- }
177-
178- if (factory == null ) {
179- factory = XPropertyFactory .make ();
180- }
181-
182- var gcProp = factory .getProperty (versionPrefix + "gc" );
183- var scProp = factory .getProperty (versionPrefix + "sc" );
184-
185- UnicodeProperty prop = factory .getProperty (propertyName );
186- if (propertyValue .length () != 0 ) {
187- if (prop == null ) {
188- propertyValue = propertyValue .trim ();
189- } else if (prop .isTrimmable ()) {
190- propertyValue = propertyValue .trim ();
191- } else {
192- int debug = 0 ;
193- }
194- status = applyPropertyAlias0 (prop , propertyValue , result , invert );
195- } else {
196- try {
197- status = applyPropertyAlias0 (gcProp , versionlessPropertyName , result , invert );
198- } catch (Exception e ) {
199- }
200- ;
201- if (!status ) {
202- try {
203- status =
204- applyPropertyAlias0 (
205- scProp , versionlessPropertyName , result , invert );
206- } catch (Exception e ) {
207- }
208- if (!status ) {
209- if (prop .isType (UnicodeProperty .BINARY_OR_ENUMERATED_OR_CATALOG_MASK )) {
210- try {
211- status = applyPropertyAlias0 (prop , "No" , result , !invert );
212- } catch (Exception e ) {
213- }
214- }
215- if (!status ) {
216- status = applyPropertyAlias0 (prop , "" , result , invert );
217- }
218- }
219- }
220- }
221- return status ;
222- }
223-
224- private static String [][] COARSE_GENERAL_CATEGORIES = {
225- {"Other" , "C" , "Cc" , "Cf" , "Cn" , "Co" , "Cs" },
226- {"Letter" , "L" , "Ll" , "Lm" , "Lo" , "Lt" , "Lu" },
227- {"Cased_Letter" , "LC" , "Ll" , "Lt" , "Lu" },
228- {"Mark" , "M" , "Mc" , "Me" , "Mn" },
229- {"Number" , "N" , "Nd" , "Nl" , "No" },
230- {"Punctuation" , "P" , "Pc" , "Pd" , "Pe" , "Pf" , "Pi" , "Po" , "Ps" },
231- {"Symbol" , "S" , "Sc" , "Sk" , "Sm" , "So" },
232- {"Separator" , "Z" , "Zl" , "Zp" , "Zs" },
233- };
234-
235- // TODO(eggrobin): I think this function only ever returns true; might as well make it void.
236- private boolean applyPropertyAlias0 (
237- UnicodeProperty prop , String propertyValue , UnicodeSet result , boolean invert ) {
238- result .clear ();
239- String propertyName = prop .getName ();
240- String trimmedPropertyValue = propertyValue .trim ();
241- PatternMatcher patternMatcher = null ;
242- if (trimmedPropertyValue .length () > 1
243- && trimmedPropertyValue .startsWith ("/" )
244- && trimmedPropertyValue .endsWith ("/" )) {
245- String fixedRegex =
246- unicodeRegex .transform (
247- trimmedPropertyValue .substring (
248- 1 , trimmedPropertyValue .length () - 1 ));
249- patternMatcher = new UnicodeProperty .RegexMatcher ().set (fixedRegex );
250- }
251- UnicodeProperty otherProperty = null ;
252- boolean testCp = false ;
253- if (trimmedPropertyValue .length () > 1
254- && trimmedPropertyValue .startsWith ("@" )
255- && trimmedPropertyValue .endsWith ("@" )) {
256- String otherPropName =
257- trimmedPropertyValue .substring (1 , trimmedPropertyValue .length () - 1 ).trim ();
258- if ("cp" .equalsIgnoreCase (otherPropName )) {
259- testCp = true ;
260- } else {
261- otherProperty = factory .getProperty (otherPropName );
262- }
263- }
264- boolean isAge = UnicodeProperty .equalNames ("age" , propertyName );
265- if (prop != null ) {
266- UnicodeSet set ;
267- if (testCp ) {
268- set = new UnicodeSet ();
269- for (int i = 0 ; i <= 0x10FFFF ; ++i ) {
270- if (invert != UnicodeProperty .equals (i , prop .getValue (i ))) {
271- set .add (i );
272- }
273- }
274- } else if (otherProperty != null ) {
275- set = new UnicodeSet ();
276- for (int i = 0 ; i <= 0x10FFFF ; ++i ) {
277- String v1 = prop .getValue (i );
278- String v2 = otherProperty .getValue (i );
279- if (invert != UnicodeProperty .equals (v1 , v2 )) {
280- set .add (i );
281- }
282- }
283- } else if (patternMatcher == null ) {
284- if (!isValid (prop , propertyValue )) {
285- throw new IllegalArgumentException (
286- "The value '"
287- + propertyValue
288- + "' is illegal. Values for "
289- + propertyName
290- + " must be in "
291- + prop .getAvailableValues ()
292- + " or in "
293- + prop .getValueAliases ());
294- }
295- if (isAge ) {
296- set =
297- prop .getSet (
298- new UnicodePropertySymbolTable .ComparisonMatcher <
299- VersionInfo >(
300- UnicodePropertySymbolTable .parseVersionInfoOrMax (
301- propertyValue ),
302- UnicodePropertySymbolTable .Relation .geq ,
303- Comparator .nullsFirst (Comparator .naturalOrder ()),
304- UnicodePropertySymbolTable ::parseVersionInfoOrMax ));
305- } else {
306- if (prop .getName ().equals ("General_Category" )) {
307- for (String [] coarseValue : COARSE_GENERAL_CATEGORIES ) {
308- final String longName = coarseValue [0 ];
309- final String shortName = coarseValue [1 ];
310- if (UnicodeProperty .equalNames (propertyValue , longName )
311- || UnicodeProperty .equalNames (propertyValue , shortName )) {
312- for (int i = 2 ; i < coarseValue .length ; ++i ) {
313- prop .getSet (coarseValue [i ], result );
314- }
315- return true ;
316- }
317- }
318- }
319- set = prop .getSet (propertyValue );
320- }
321- } else if (isAge ) {
322- set = new UnicodeSet ();
323- List <String > values = prop .getAvailableValues ();
324- for (String value : values ) {
325- if (patternMatcher .test (value )) {
326- for (String other : values ) {
327- if (other .compareTo (value ) <= 0 ) {
328- set .addAll (prop .getSet (other ));
329- }
330- }
331- }
332- }
333- } else {
334- set = prop .getSet (patternMatcher );
335- }
336- if (invert ) {
337- if (isAge ) {
338- set .complement ();
339- } else {
340- set = prop .getUnicodeMap ().keySet ().removeAll (set );
341- }
342- }
343- result .addAll (set );
344- return true ;
345- }
346- throw new IllegalArgumentException ("Illegal property: " + propertyName );
347- }
348-
349- private boolean isValid (UnicodeProperty prop , String propertyValue ) {
350- // if (prop.getName().equals("General_Category")) {
351- // if (propertyValue)
352- // }
353- return prop .isValidValue (propertyValue );
354- }
355- }
356129}
0 commit comments