Skip to content

Commit 4a96595

Browse files
committed
Use ICU4j for isidentifier
1 parent 7a1f3ab commit 4a96595

File tree

1 file changed

+4
-30
lines changed
  • graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str

1 file changed

+4
-30
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -362,40 +362,14 @@ public static boolean isIdentifier(String value) {
362362
return false;
363363
}
364364
int c = value.codePointAt(pos);
365-
int type = Character.getType(c);
366-
if (c != '_') {
367-
// Unicode XID_Start
368-
switch (type) {
369-
case Character.UPPERCASE_LETTER:
370-
case Character.LOWERCASE_LETTER:
371-
case Character.TITLECASE_LETTER:
372-
case Character.MODIFIER_LETTER:
373-
case Character.OTHER_LETTER:
374-
case Character.LETTER_NUMBER:
375-
break;
376-
default:
377-
return false;
378-
}
365+
if (c != '_' && !UCharacter.hasBinaryProperty(c, UProperty.XID_START)) {
366+
return false;
379367
}
380368
pos += Character.charCount(c);
381369
while (pos < value.length()) {
382370
c = value.codePointAt(pos);
383-
type = Character.getType(c);
384-
// Unicode XID_Continue
385-
switch (type) {
386-
case Character.UPPERCASE_LETTER:
387-
case Character.LOWERCASE_LETTER:
388-
case Character.TITLECASE_LETTER:
389-
case Character.MODIFIER_LETTER:
390-
case Character.OTHER_LETTER:
391-
case Character.LETTER_NUMBER:
392-
case Character.NON_SPACING_MARK:
393-
case Character.COMBINING_SPACING_MARK:
394-
case Character.DECIMAL_DIGIT_NUMBER:
395-
case Character.CONNECTOR_PUNCTUATION:
396-
break;
397-
default:
398-
return false;
371+
if (!UCharacter.hasBinaryProperty(c, UProperty.XID_CONTINUE)) {
372+
return false;
399373
}
400374
pos += Character.charCount(c);
401375
}

0 commit comments

Comments
 (0)