Skip to content

Commit dc9c3e1

Browse files
Merge pull request #731 from Microsoft/acknowledgeTrivia
Use the 'skipTrivia' scanner flag for lexical classification
2 parents 8be8e1f + 25170ef commit dc9c3e1

File tree

3 files changed

+71
-51
lines changed

3 files changed

+71
-51
lines changed

src/compiler/types.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,9 @@ module ts {
228228
FirstPunctuation = OpenBraceToken,
229229
LastPunctuation = CaretEqualsToken,
230230
FirstToken = EndOfFileToken,
231-
LastToken = StringKeyword
231+
LastToken = StringKeyword,
232+
FirstTriviaToken = SingleLineCommentTrivia,
233+
LastTriviaToken = WhitespaceTrivia
232234
}
233235

234236
export enum NodeFlags {

src/services/services.ts

Lines changed: 28 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4378,13 +4378,13 @@ module ts {
43784378

43794379
/// Classifier
43804380
export function createClassifier(host: Logger): Classifier {
4381-
var scanner: Scanner;
4382-
var noRegexTable: boolean[];
4381+
var scanner = createScanner(ScriptTarget.ES5, /*skipTrivia*/ false);
43834382

43844383
/// We do not have a full parser support to know when we should parse a regex or not
43854384
/// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
43864385
/// we have a series of divide operator. this list allows us to be more accurate by ruling out
43874386
/// locations where a regexp cannot exist.
4387+
var noRegexTable: boolean[];
43884388
if (!noRegexTable) {
43894389
noRegexTable = [];
43904390
noRegexTable[SyntaxKind.Identifier] = true;
@@ -4404,8 +4404,7 @@ module ts {
44044404
function getClassificationsForLine(text: string, lexState: EndOfLineState): ClassificationResult {
44054405
var offset = 0;
44064406
var lastTokenOrCommentEnd = 0;
4407-
var lastToken = SyntaxKind.Unknown;
4408-
var inUnterminatedMultiLineComment = false;
4407+
var lastNonTriviaToken = SyntaxKind.Unknown;
44094408

44104409
// If we're in a string literal, then prepend: "\
44114410
// (and a newline). That way when we lex we'll think we're still in a string literal.
@@ -4427,63 +4426,49 @@ module ts {
44274426
break;
44284427
}
44294428

4429+
scanner.setText(text);
4430+
44304431
var result: ClassificationResult = {
44314432
finalLexState: EndOfLineState.Start,
44324433
entries: []
44334434
};
44344435

4435-
scanner = createScanner(ScriptTarget.ES5, /*skipTrivia*/ true, text, onError, processComment);
4436-
4436+
44374437
var token = SyntaxKind.Unknown;
44384438
do {
44394439
token = scanner.scan();
44404440

4441-
if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastToken]) {
4441+
if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastNonTriviaToken]) {
44424442
if (scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
44434443
token = SyntaxKind.RegularExpressionLiteral;
44444444
}
44454445
}
4446-
else if (lastToken === SyntaxKind.DotToken) {
4446+
else if (lastNonTriviaToken === SyntaxKind.DotToken) {
44474447
token = SyntaxKind.Identifier;
44484448
}
44494449

4450-
lastToken = token;
4450+
// Only recall the token if it was *not* trivia.
4451+
if (!(SyntaxKind.FirstTriviaToken <= token && token <= SyntaxKind.LastTriviaToken)) {
4452+
lastNonTriviaToken = token;
4453+
}
44514454

44524455
processToken();
44534456
}
44544457
while (token !== SyntaxKind.EndOfFileToken);
44554458

44564459
return result;
44574460

4458-
4459-
function onError(message: DiagnosticMessage): void {
4460-
inUnterminatedMultiLineComment = message.key === Diagnostics.Asterisk_Slash_expected.key;
4461-
}
4462-
4463-
function processComment(start: number, end: number) {
4464-
// add Leading white spaces
4465-
addLeadingWhiteSpace(start, end);
4466-
4467-
// add the comment
4468-
addResult(end - start, TokenClass.Comment);
4469-
}
4470-
44714461
function processToken(): void {
44724462
var start = scanner.getTokenPos();
44734463
var end = scanner.getTextPos();
44744464

4475-
// add Leading white spaces
4476-
addLeadingWhiteSpace(start, end);
4477-
44784465
// add the token
44794466
addResult(end - start, classFromKind(token));
44804467

44814468
if (end >= text.length) {
44824469
// We're at the end.
4483-
if (inUnterminatedMultiLineComment) {
4484-
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
4485-
}
4486-
else if (token === SyntaxKind.StringLiteral) {
4470+
if (token === SyntaxKind.StringLiteral) {
4471+
// Check to see if we finished up on a multiline string literal.
44874472
var tokenText = scanner.getTokenText();
44884473
if (tokenText.length > 0 && tokenText.charCodeAt(tokenText.length - 1) === CharacterCodes.backslash) {
44894474
var quoteChar = tokenText.charCodeAt(0);
@@ -4492,18 +4477,18 @@ module ts {
44924477
: EndOfLineState.InSingleQuoteStringLiteral;
44934478
}
44944479
}
4480+
else if (token === SyntaxKind.MultiLineCommentTrivia) {
4481+
// Check to see if the multiline comment was unclosed.
4482+
var tokenText = scanner.getTokenText()
4483+
if (!(tokenText.length > 3 && // need to avoid catching '/*/'
4484+
tokenText.charCodeAt(tokenText.length - 2) === CharacterCodes.asterisk &&
4485+
tokenText.charCodeAt(tokenText.length - 1) === CharacterCodes.slash)) {
4486+
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
4487+
}
4488+
}
44954489
}
44964490
}
44974491

4498-
function addLeadingWhiteSpace(start: number, end: number): void {
4499-
if (start > lastTokenOrCommentEnd) {
4500-
addResult(start - lastTokenOrCommentEnd, TokenClass.Whitespace);
4501-
}
4502-
4503-
// Remember the end of the last token
4504-
lastTokenOrCommentEnd = end;
4505-
}
4506-
45074492
function addResult(length: number, classification: TokenClass): void {
45084493
if (length > 0) {
45094494
// If this is the first classification we're adding to the list, then remove any
@@ -4596,6 +4581,11 @@ module ts {
45964581
return TokenClass.StringLiteral;
45974582
case SyntaxKind.RegularExpressionLiteral:
45984583
return TokenClass.RegExpLiteral;
4584+
case SyntaxKind.MultiLineCommentTrivia:
4585+
case SyntaxKind.SingleLineCommentTrivia:
4586+
return TokenClass.Comment;
4587+
case SyntaxKind.WhitespaceTrivia:
4588+
return TokenClass.Whitespace;
45994589
case SyntaxKind.Identifier:
46004590
default:
46014591
return TokenClass.Identifier;

tests/cases/unittests/services/colorization.ts

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ describe('Colorization', function () {
3636
}
3737
var finalEndOfLineState = classResult[classResult.length - 1];
3838

39-
assert.equal(position, code.length, "Expected accumilative length of all entries to match the length of the source. expected: " + code.length + ", but got: " + position);
39+
assert.equal(position, code.length, "Expected cumulative length of all entries to match the length of the source. expected: " + code.length + ", but got: " + position);
4040

4141
return {
4242
tuples: tuples,
@@ -84,8 +84,8 @@ describe('Colorization', function () {
8484
var actualEntry = getEntryAtPosistion(result, actualEntryPosition);
8585

8686
assert(actualEntry, "Could not find classification entry for '" + expectedEntry.value + "' at position: " + actualEntryPosition);
87-
assert.equal(actualEntry.length, expectedEntry.value.length, "Classification class does not match expected.");
88-
assert.equal(actualEntry.class, expectedEntry.class, "Classification class does not match expected.");
87+
assert.equal(actualEntry.class, expectedEntry.class, "Classification class does not match expected. Expected: " + ts.TokenClass[expectedEntry.class] + ", Actual: " + ts.TokenClass[actualEntry.class]);
88+
assert.equal(actualEntry.length, expectedEntry.value.length, "Classification length does not match expected. Expected: " + ts.TokenClass[expectedEntry.value.length] + ", Actual: " + ts.TokenClass[actualEntry.length]);
8989
}
9090
}
9191
}
@@ -105,7 +105,7 @@ describe('Colorization', function () {
105105
punctuation(";"));
106106
});
107107

108-
it("classifies correctelly a comment after a divide operator", function () {
108+
it("correctly classifies a comment after a divide operator", function () {
109109
test("1 / 2 // comment",
110110
ts.EndOfLineState.Start,
111111
numberLiteral("1"),
@@ -115,7 +115,7 @@ describe('Colorization', function () {
115115
comment("// comment"));
116116
});
117117

118-
it("classifies correctelly a literal after a divide operator", function () {
118+
it("correctly classifies a literal after a divide operator", function () {
119119
test("1 / 2, 3 / 4",
120120
ts.EndOfLineState.Start,
121121
numberLiteral("1"),
@@ -127,48 +127,76 @@ describe('Colorization', function () {
127127
operator(","));
128128
});
129129

130-
it("classifies correctelly an unterminated multi-line string", function () {
130+
it("correctly classifies an unterminated multi-line string", function () {
131131
test("'line1\\",
132132
ts.EndOfLineState.Start,
133133
stringLiteral("'line1\\"),
134134
finalEndOfLineState(ts.EndOfLineState.InSingleQuoteStringLiteral));
135135
});
136136

137-
it("classifies correctelly the second line of an unterminated multi-line string", function () {
137+
it("correctly classifies the second line of an unterminated multi-line string", function () {
138138
test("\\",
139139
ts.EndOfLineState.InDoubleQuoteStringLiteral,
140140
stringLiteral("\\"),
141141
finalEndOfLineState(ts.EndOfLineState.InDoubleQuoteStringLiteral));
142142
});
143143

144-
it("classifies correctelly the last line of a multi-line string", function () {
144+
it("correctly classifies the last line of a multi-line string", function () {
145145
test("'",
146146
ts.EndOfLineState.InSingleQuoteStringLiteral,
147147
stringLiteral("'"),
148148
finalEndOfLineState(ts.EndOfLineState.Start));
149149
});
150150

151-
it("classifies correctelly an unterminated multiline comment", function () {
151+
it("correctly classifies an unterminated multiline comment", function () {
152152
test("/*",
153153
ts.EndOfLineState.Start,
154154
comment("/*"),
155155
finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
156156
});
157157

158-
it("classifies correctelly an unterminated multiline comment with trailing space", function () {
158+
it("correctly classifies the termination of a multiline comment", function () {
159+
test(" */ ",
160+
ts.EndOfLineState.InMultiLineCommentTrivia,
161+
comment(" */"),
162+
finalEndOfLineState(ts.EndOfLineState.Start));
163+
});
164+
165+
it("correctly classifies the continuation of a multiline comment", function () {
166+
test("LOREM IPSUM DOLOR ",
167+
ts.EndOfLineState.InMultiLineCommentTrivia,
168+
comment("LOREM IPSUM DOLOR "),
169+
finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
170+
});
171+
172+
it("correctly classifies an unterminated multiline comment on a line ending in '/*/'", function () {
173+
test(" /*/",
174+
ts.EndOfLineState.Start,
175+
comment("/*/"),
176+
finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
177+
});
178+
179+
it("correctly classifies an unterminated multiline comment with trailing space", function () {
159180
test("/* ",
160181
ts.EndOfLineState.Start,
161182
comment("/* "),
162183
finalEndOfLineState(ts.EndOfLineState.InMultiLineCommentTrivia));
163184
});
164185

165-
it("classifies correctelly a keyword after a dot", function () {
186+
it("correctly classifies a keyword after a dot", function () {
166187
test("a.var",
167188
ts.EndOfLineState.Start,
168189
identifier("var"));
169190
});
170191

171-
it("classifies keyword after a dot on previous line", function () {
192+
it("classifies a property access with whitespace around the dot", function () {
193+
test(" x .\tfoo ()",
194+
ts.EndOfLineState.Start,
195+
identifier("x"),
196+
identifier("foo"));
197+
});
198+
199+
it("classifies a keyword after a dot on previous line", function () {
172200
test("var",
173201
ts.EndOfLineState.Start,
174202
keyword("var"),

0 commit comments

Comments
 (0)