@@ -901,8 +901,52 @@ private static boolean isIdentifierStart(char ch) {
901
901
| (ch >= 0x03B1 & ch <= 0x03C9 ); // Greek lowercase letters
902
902
}
903
903
904
+ /**
905
+ Implement ECMAScript grammar for isIdentifierPart.
906
+ */
907
+ private static boolean isCombiningMark (char ch ) {
908
+ return Character .getType (ch ) == Character .NON_SPACING_MARK ;
909
+ }
910
+
911
+ // TODO (ctjl): Implement
912
+ private static boolean isConnectorPunctuation () {
913
+ return true ;
914
+ }
915
+
916
+ // TODO (ctjl): Implement
917
+ private static boolean isZeroWidthJoiner () {
918
+ return true ;
919
+ }
920
+
921
+ // TODO (ctjl): Implement
922
+ private static boolean isZeroWidthNonJoiner () {
923
+ return true ;
924
+ }
925
+
904
926
@ SuppressWarnings ("ShortCircuitBoolean" ) // Intentional to minimize branches in this code
905
927
private static boolean isIdentifierPart (char ch ) {
928
+ /**
929
+ https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
930
+ IdentifierPart ::
931
+ IdentifierStart
932
+ ✓ isIdentifierPart()
933
+
934
+ UnicodeCombiningMark
935
+ ✓ isCombiningMark()
936
+
937
+ UnicodeDigit
938
+ ✓ Character.isDigit()
939
+
940
+ UnicodeConnectorPunctuation
941
+ ✓ isConnectorPunctuation()
942
+
943
+ <ZWNJ>
944
+ ✓ isZeroWidthNonJoiner()
945
+
946
+ <ZWJ>
947
+ ✓ isZeroWidthJoiner()
948
+ */
949
+
906
950
// Most code is written in pure ASCII, so create a fast path here.
907
951
if (ch <= 127 ) {
908
952
return ((ch >= 'A' & ch <= 'Z' )
@@ -913,8 +957,10 @@ private static boolean isIdentifierPart(char ch) {
913
957
914
958
// Handle non-ASCII characters.
915
959
// TODO(tjgq): This should include all characters with the ID_Continue property, plus
916
- // Zero Width Non-Joiner and Zero Width Joiner.
917
- return isIdentifierStart (ch ) || Character .isDigit (ch );
960
+ // TODO(ctjl): Implement remaining grammar (zero-width joiners, etc.)
961
+ return isIdentifierStart (ch )
962
+ || isCombiningMark (ch )
963
+ || Character .isDigit (ch );
918
964
}
919
965
920
966
private Token scanStringLiteral (int beginIndex , char terminator ) {
0 commit comments