Skip to content

Commit 089fd4d

Browse files
Add a common, dense, format for classification operations to lower cost of processing on the host side.
We now just return an array of triples to represent classified results. The triple contains: 1) the start of the classification. 2) the length of the classification. 3) the type of the clasification. We also encode this into a comma separated string when passing over to the managed side (as opposed to an JSON array). That way we don't pay such a high JSON parsing cost. Instead, we can just do a string.split(",") on the encoded triples and process each element ourselves.
1 parent c35f348 commit 089fd4d

File tree

5 files changed

+232
-115
lines changed

5 files changed

+232
-115
lines changed

src/harness/harnessLanguageService.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ module Harness.LanguageService {
235235
class ClassifierShimProxy implements ts.Classifier {
236236
constructor(private shim: ts.ClassifierShim) {
237237
}
238+
getLexicalClassifications2(text: string, lexState: ts.EndOfLineState, classifyKeywordsInGenerics?: boolean): ts.Classifications {
239+
throw new Error("NYI");
240+
}
238241
getClassificationsForLine(text: string, lexState: ts.EndOfLineState, classifyKeywordsInGenerics?: boolean): ts.ClassificationResult {
239242
var result = this.shim.getClassificationsForLine(text, lexState, classifyKeywordsInGenerics).split('\n');
240243
var entries: ts.ClassificationInfo[] = [];
@@ -300,10 +303,10 @@ module Harness.LanguageService {
300303
getSemanticClassifications(fileName: string, span: ts.TextSpan): ts.ClassifiedSpan[] {
301304
return unwrapJSONCallResult(this.shim.getSemanticClassifications(fileName, span.start, span.length));
302305
}
303-
getSyntacticClassifications2(fileName: string, span: ts.TextSpan): number[] {
306+
getSyntacticClassifications2(fileName: string, span: ts.TextSpan): ts.Classifications {
304307
return unwrapJSONCallResult(this.shim.getSyntacticClassifications2(fileName, span.start, span.length));
305308
}
306-
getSemanticClassifications2(fileName: string, span: ts.TextSpan): number[] {
309+
getSemanticClassifications2(fileName: string, span: ts.TextSpan): ts.Classifications {
307310
return unwrapJSONCallResult(this.shim.getSemanticClassifications2(fileName, span.start, span.length));
308311
}
309312
getCompletionsAtPosition(fileName: string, position: number): ts.CompletionInfo {

src/server/client.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,11 +533,11 @@ module ts.server {
533533
throw new Error("Not Implemented Yet.");
534534
}
535535

536-
getSyntacticClassifications2(fileName: string, span: TextSpan): number[] {
536+
getSyntacticClassifications2(fileName: string, span: TextSpan): Classifications {
537537
throw new Error("Not Implemented Yet.");
538538
}
539539

540-
getSemanticClassifications2(fileName: string, span: TextSpan): number[] {
540+
getSemanticClassifications2(fileName: string, span: TextSpan): Classifications {
541541
throw new Error("Not Implemented Yet.");
542542
}
543543

src/services/services.ts

Lines changed: 140 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -972,12 +972,19 @@ module ts {
972972
getSemanticDiagnostics(fileName: string): Diagnostic[];
973973
getCompilerOptionsDiagnostics(): Diagnostic[];
974974

975+
/**
976+
* @deprecated Use getSyntacticClassifications2 instead.
977+
*/
975978
getSyntacticClassifications(fileName: string, span: TextSpan): ClassifiedSpan[];
979+
980+
/**
981+
* @deprecated Use getSemanticClassifications2 instead.
982+
*/
976983
getSemanticClassifications(fileName: string, span: TextSpan): ClassifiedSpan[];
977984

978985
// Encoded as triples of [start, length, ClassificationType].
979-
getSyntacticClassifications2(fileName: string, span: TextSpan): number[];
980-
getSemanticClassifications2(fileName: string, span: TextSpan): number[];
986+
getSyntacticClassifications2(fileName: string, span: TextSpan): Classifications;
987+
getSemanticClassifications2(fileName: string, span: TextSpan): Classifications;
981988

982989
getCompletionsAtPosition(fileName: string, position: number): CompletionInfo;
983990
getCompletionEntryDetails(fileName: string, position: number, entryName: string): CompletionEntryDetails;
@@ -1022,6 +1029,11 @@ module ts {
10221029
dispose(): void;
10231030
}
10241031

1032+
export interface Classifications {
1033+
spans: number[],
1034+
endOfLineState: EndOfLineState
1035+
}
1036+
10251037
export interface ClassifiedSpan {
10261038
textSpan: TextSpan;
10271039
classificationType: string; // ClassificationTypeNames
@@ -1265,7 +1277,7 @@ module ts {
12651277
}
12661278

12671279
export const enum EndOfLineState {
1268-
Start,
1280+
None,
12691281
InMultiLineCommentTrivia,
12701282
InSingleQuoteStringLiteral,
12711283
InDoubleQuoteStringLiteral,
@@ -1315,8 +1327,10 @@ module ts {
13151327
* classifications which may be incorrectly categorized will be given
13161328
* back as Identifiers in order to allow the syntactic classifier to
13171329
* subsume the classification.
1330+
* @deprecated Use getLexicalClassifications instead.
13181331
*/
13191332
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult;
1333+
getLexicalClassifications2(text: string, endOfLineState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications;
13201334
}
13211335

13221336
/**
@@ -1501,15 +1515,17 @@ module ts {
15011515
numericLiteral = 4,
15021516
operator = 5,
15031517
stringLiteral = 6,
1504-
whiteSpace = 7,
1505-
text = 8,
1506-
punctuation = 9,
1507-
className = 10,
1508-
enumName = 11,
1509-
interfaceName = 12,
1510-
moduleName = 13,
1511-
typeParameterName = 14,
1512-
typeAlias = 15,
1518+
regularExpressionLiteral = 7,
1519+
whiteSpace = 8,
1520+
text = 9,
1521+
punctuation = 10,
1522+
className = 11,
1523+
enumName = 12,
1524+
interfaceName = 13,
1525+
moduleName = 14,
1526+
typeParameterName = 15,
1527+
typeAlias = 16,
1528+
parameterName = 17
15131529
}
15141530

15151531
/// Language Service
@@ -5830,7 +5846,7 @@ module ts {
58305846
return convertClassifications(getSemanticClassifications2(fileName, span));
58315847
}
58325848

5833-
function getSemanticClassifications2(fileName: string, span: TextSpan): number[] {
5849+
function getSemanticClassifications2(fileName: string, span: TextSpan): Classifications {
58345850
synchronizeHostData();
58355851

58365852
let sourceFile = getValidSourceFile(fileName);
@@ -5839,7 +5855,7 @@ module ts {
58395855
let result: number[] = [];
58405856
processNode(sourceFile);
58415857

5842-
return result;
5858+
return { spans: result, endOfLineState: EndOfLineState.None };
58435859

58445860
function pushClassification(start: number, length: number, type: ClassificationType) {
58455861
result.push(start);
@@ -5927,8 +5943,9 @@ module ts {
59275943
}
59285944
}
59295945

5930-
function convertClassifications(dense: number[]): ClassifiedSpan[] {
5931-
Debug.assert(dense.length % 3 === 0);
5946+
function convertClassifications(classifications: Classifications): ClassifiedSpan[] {
5947+
Debug.assert(classifications.spans.length % 3 === 0);
5948+
let dense = classifications.spans;
59325949
let result: ClassifiedSpan[] = [];
59335950
for (let i = 0, n = dense.length; i < n; i += 3) {
59345951
result.push({
@@ -5944,7 +5961,7 @@ module ts {
59445961
return convertClassifications(getSyntacticClassifications2(fileName, span));
59455962
}
59465963

5947-
function getSyntacticClassifications2(fileName: string, span: TextSpan): number[] {
5964+
function getSyntacticClassifications2(fileName: string, span: TextSpan): Classifications {
59485965
// doesn't use compiler - no need to synchronize with host
59495966
let sourceFile = syntaxTreeCache.getCurrentSourceFile(fileName);
59505967

@@ -5955,7 +5972,7 @@ module ts {
59555972
let result: number[] = [];
59565973
processElement(sourceFile);
59575974

5958-
return result;
5975+
return { spans: result, endOfLineState: EndOfLineState.None };
59595976

59605977
function pushClassification(start: number, length: number, type: ClassificationType) {
59615978
result.push(start);
@@ -6603,10 +6620,67 @@ module ts {
66036620
// if there are more cases we want the classifier to be better at.
66046621
return true;
66056622
}
6606-
6623+
6624+
function convertClassifications(classifications: Classifications, text: string): ClassificationResult {
6625+
var entries: ClassificationInfo[] = [];
6626+
let dense = classifications.spans;
6627+
let lastEnd = 0;
6628+
6629+
for (let i = 0, n = dense.length; i < n; i += 3) {
6630+
let start = dense[i];
6631+
let length = dense[i + 1];
6632+
let type = <ClassificationType>dense[i + 2];
6633+
6634+
// Make a whitespace entry between the last item and this one.
6635+
if (lastEnd >= 0) {
6636+
let whitespaceLength = start - lastEnd;
6637+
if (whitespaceLength > 0) {
6638+
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
6639+
}
6640+
}
6641+
6642+
entries.push({ length, classification: convertClassification(type) });
6643+
lastEnd = start + length;
6644+
}
6645+
6646+
let whitespaceLength = text.length - lastEnd;
6647+
if (whitespaceLength > 0) {
6648+
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
6649+
}
6650+
6651+
return { entries, finalLexState: classifications.endOfLineState };
6652+
}
6653+
6654+
function convertClassification(type: ClassificationType): TokenClass {
6655+
switch (type) {
6656+
case ClassificationType.comment: return TokenClass.Comment;
6657+
case ClassificationType.keyword: return TokenClass.Keyword;
6658+
case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
6659+
case ClassificationType.operator: return TokenClass.Operator;
6660+
case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
6661+
case ClassificationType.whiteSpace: return TokenClass.Whitespace;
6662+
case ClassificationType.punctuation: return TokenClass.Punctuation;
6663+
case ClassificationType.identifier:
6664+
case ClassificationType.className:
6665+
case ClassificationType.enumName:
6666+
case ClassificationType.interfaceName:
6667+
case ClassificationType.moduleName:
6668+
case ClassificationType.typeParameterName:
6669+
case ClassificationType.typeAlias:
6670+
case ClassificationType.text:
6671+
case ClassificationType.parameterName:
6672+
default:
6673+
return TokenClass.Identifier;
6674+
}
6675+
}
6676+
6677+
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
6678+
return convertClassifications(getLexicalClassifications2(text, lexState, syntacticClassifierAbsent), text);
6679+
}
6680+
66076681
// If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
66086682
// we will be more conservative in order to avoid conflicting with the syntactic classifier.
6609-
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
6683+
function getLexicalClassifications2(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
66106684
let offset = 0;
66116685
let token = SyntaxKind.Unknown;
66126686
let lastNonTriviaToken = SyntaxKind.Unknown;
@@ -6649,9 +6723,9 @@ module ts {
66496723

66506724
scanner.setText(text);
66516725

6652-
let result: ClassificationResult = {
6653-
finalLexState: EndOfLineState.Start,
6654-
entries: []
6726+
let result: Classifications = {
6727+
endOfLineState: EndOfLineState.None,
6728+
spans: []
66556729
};
66566730

66576731
// We can run into an unfortunate interaction between the lexical and syntactic classifier
@@ -6764,7 +6838,7 @@ module ts {
67646838
let start = scanner.getTokenPos();
67656839
let end = scanner.getTextPos();
67666840

6767-
addResult(end - start, classFromKind(token));
6841+
addResult(start, end, classFromKind(token));
67686842

67696843
if (end >= text.length) {
67706844
if (token === SyntaxKind.StringLiteral) {
@@ -6781,7 +6855,7 @@ module ts {
67816855
// If we have an odd number of backslashes, then the multiline string is unclosed
67826856
if (numBackslashes & 1) {
67836857
let quoteChar = tokenText.charCodeAt(0);
6784-
result.finalLexState = quoteChar === CharacterCodes.doubleQuote
6858+
result.endOfLineState = quoteChar === CharacterCodes.doubleQuote
67856859
? EndOfLineState.InDoubleQuoteStringLiteral
67866860
: EndOfLineState.InSingleQuoteStringLiteral;
67876861
}
@@ -6790,37 +6864,51 @@ module ts {
67906864
else if (token === SyntaxKind.MultiLineCommentTrivia) {
67916865
// Check to see if the multiline comment was unclosed.
67926866
if (scanner.isUnterminated()) {
6793-
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
6867+
result.endOfLineState = EndOfLineState.InMultiLineCommentTrivia;
67946868
}
67956869
}
67966870
else if (isTemplateLiteralKind(token)) {
67976871
if (scanner.isUnterminated()) {
67986872
if (token === SyntaxKind.TemplateTail) {
6799-
result.finalLexState = EndOfLineState.InTemplateMiddleOrTail;
6873+
result.endOfLineState = EndOfLineState.InTemplateMiddleOrTail;
68006874
}
68016875
else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) {
6802-
result.finalLexState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
6876+
result.endOfLineState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
68036877
}
68046878
else {
68056879
Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
68066880
}
68076881
}
68086882
}
68096883
else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) {
6810-
result.finalLexState = EndOfLineState.InTemplateSubstitutionPosition;
6884+
result.endOfLineState = EndOfLineState.InTemplateSubstitutionPosition;
68116885
}
68126886
}
68136887
}
68146888

6815-
function addResult(length: number, classification: TokenClass): void {
6816-
if (length > 0) {
6817-
// If this is the first classification we're adding to the list, then remove any
6818-
// offset we have if we were continuing a construct from the previous line.
6819-
if (result.entries.length === 0) {
6820-
length -= offset;
6821-
}
6889+
function addResult(start: number, end: number, classification: ClassificationType): void {
6890+
if (classification === ClassificationType.whiteSpace) {
6891+
// Don't bother with whitespace classifications. They're not needed.
6892+
return;
6893+
}
6894+
6895+
if (start === 0 && offset > 0) {
6896+
// We're classifying the first token, and this was a case where we prepended
6897+
// text. We should consider the start of this token to be at the start of
6898+
// the original text.
6899+
start += offset;
6900+
}
6901+
6902+
// All our tokens are in relation to the augmented text. Move them back to be
6903+
// relative to the original text.
6904+
start -= offset;
6905+
end -= offset;
6906+
let length = end - start;
68226907

6823-
result.entries.push({ length: length, classification: classification });
6908+
if (length > 0) {
6909+
result.spans.push(start);
6910+
result.spans.push(length);
6911+
result.spans.push(classification);
68246912
}
68256913
}
68266914
}
@@ -6887,41 +6975,44 @@ module ts {
68876975
return token >= SyntaxKind.FirstKeyword && token <= SyntaxKind.LastKeyword;
68886976
}
68896977

6890-
function classFromKind(token: SyntaxKind) {
6978+
function classFromKind(token: SyntaxKind): ClassificationType {
68916979
if (isKeyword(token)) {
6892-
return TokenClass.Keyword;
6980+
return ClassificationType.keyword;
68936981
}
68946982
else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
6895-
return TokenClass.Operator;
6983+
return ClassificationType.operator;
68966984
}
68976985
else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
6898-
return TokenClass.Punctuation;
6986+
return ClassificationType.punctuation;
68996987
}
69006988

69016989
switch (token) {
69026990
case SyntaxKind.NumericLiteral:
6903-
return TokenClass.NumberLiteral;
6991+
return ClassificationType.numericLiteral;
69046992
case SyntaxKind.StringLiteral:
6905-
return TokenClass.StringLiteral;
6993+
return ClassificationType.stringLiteral;
69066994
case SyntaxKind.RegularExpressionLiteral:
6907-
return TokenClass.RegExpLiteral;
6995+
return ClassificationType.regularExpressionLiteral;
69086996
case SyntaxKind.ConflictMarkerTrivia:
69096997
case SyntaxKind.MultiLineCommentTrivia:
69106998
case SyntaxKind.SingleLineCommentTrivia:
6911-
return TokenClass.Comment;
6999+
return ClassificationType.comment;
69127000
case SyntaxKind.WhitespaceTrivia:
69137001
case SyntaxKind.NewLineTrivia:
6914-
return TokenClass.Whitespace;
7002+
return ClassificationType.whiteSpace;
69157003
case SyntaxKind.Identifier:
69167004
default:
69177005
if (isTemplateLiteralKind(token)) {
6918-
return TokenClass.StringLiteral;
7006+
return ClassificationType.stringLiteral;
69197007
}
6920-
return TokenClass.Identifier;
7008+
return ClassificationType.identifier;
69217009
}
69227010
}
69237011

6924-
return { getClassificationsForLine };
7012+
return {
7013+
getClassificationsForLine,
7014+
getLexicalClassifications2
7015+
};
69257016
}
69267017

69277018
/// getDefaultLibraryFilePath

0 commit comments

Comments
 (0)