Skip to content

Commit 5ad0744

Browse files
committed
Added support for UnicodeCombiningMark, fixes #3639.
1 parent 76afa8e commit 5ad0744

File tree

3 files changed

+53
-2
lines changed

3 files changed

+53
-2
lines changed

build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
mvn -DskipTests -pl externs/pom.xml,pom-main.xml,pom-main-shaded.xml

src/com/google/javascript/jscomp/parsing/parser/Scanner.java

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -901,8 +901,52 @@ private static boolean isIdentifierStart(char ch) {
901901
| (ch >= 0x03B1 & ch <= 0x03C9); // Greek lowercase letters
902902
}
903903

904+
/**
905+
Implement ECMAScript grammar for isIdentifierPart.
906+
*/
907+
private static boolean isCombiningMark(char ch) {
908+
return Character.getType(ch) == Character.NON_SPACING_MARK;
909+
}
910+
911+
// TODO (ctjl): Implement
912+
private static boolean isConnectorPunctuation() {
913+
return true;
914+
}
915+
916+
// TODO (ctjl): Implement
917+
private static boolean isZeroWidthJoiner() {
918+
return true;
919+
}
920+
921+
// TODO (ctjl): Implement
922+
private static boolean isZeroWidthNonJoiner() {
923+
return true;
924+
}
925+
904926
@SuppressWarnings("ShortCircuitBoolean") // Intentional to minimize branches in this code
905927
private static boolean isIdentifierPart(char ch) {
928+
/**
929+
https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
930+
IdentifierPart ::
931+
IdentifierStart
932+
✓ isIdentifierPart()
933+
934+
UnicodeCombiningMark
935+
✓ isCombiningMark()
936+
937+
UnicodeDigit
938+
✓ Character.isDigit()
939+
940+
UnicodeConnectorPunctuation
941+
✓ isConnectorPunctuation()
942+
943+
<ZWNJ>
944+
✓ isZeroWidthNonJoiner()
945+
946+
<ZWJ>
947+
✓ isZeroWidthJoiner()
948+
*/
949+
906950
// Most code is written in pure ASCII, so create a fast path here.
907951
if (ch <= 127) {
908952
return ((ch >= 'A' & ch <= 'Z')
@@ -913,8 +957,10 @@ private static boolean isIdentifierPart(char ch) {
913957

914958
// Handle non-ASCII characters.
915959
// TODO(tjgq): This should include all characters with the ID_Continue property, plus
916-
// Zero Width Non-Joiner and Zero Width Joiner.
917-
return isIdentifierStart(ch) || Character.isDigit(ch);
960+
// TODO(ctjl): Implement remaining grammar (zero-width joiners, etc.)
961+
return isIdentifierStart(ch)
962+
|| isCombiningMark(ch)
963+
|| Character.isDigit(ch);
918964
}
919965

920966
private Token scanStringLiteral(int beginIndex, char terminator) {

test.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
var bar = {
2+
: "foo"
3+
};

0 commit comments

Comments
 (0)