Skip to content

Commit 591b352

Browse files
authored
Add semantic token LSP support for Quarto files (#868)
* First draft of semantic token support for Quarto files * Actually, let's put this in the `quarto-utils` package * Better to use `virtualDocForLanguage()` for this provider * Add some tests * Update CHANGELOG
1 parent 9838c73 commit 591b352

File tree

8 files changed

+514
-9
lines changed

8 files changed

+514
-9
lines changed

apps/lsp/src/middleware.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* middleware.ts
33
*
4-
* Copyright (C) 2023 by Posit Software, PBC
4+
* Copyright (C) 2023-2025 by Posit Software, PBC
55
* Copyright (c) Microsoft Corporation. All rights reserved.
66
*
77
* Unless you have received this program directly from Posit Software pursuant
@@ -14,7 +14,8 @@
1414
*
1515
*/
1616

17-
import { Connection, ServerCapabilities } from "vscode-languageserver"
17+
import { Connection, ServerCapabilities } from "vscode-languageserver";
18+
import { QUARTO_SEMANTIC_TOKEN_LEGEND } from "quarto-utils";
1819

1920

2021
// capabilities provided just so we can intercept them w/ middleware on the client
@@ -28,8 +29,12 @@ export function middlewareCapabilities(): ServerCapabilities {
2829
},
2930
documentFormattingProvider: true,
3031
documentRangeFormattingProvider: true,
31-
definitionProvider: true
32-
}
32+
definitionProvider: true,
33+
semanticTokensProvider: {
34+
legend: QUARTO_SEMANTIC_TOKEN_LEGEND,
35+
full: true
36+
}
37+
};
3338
};
3439

3540
// methods provided just so we can intercept them w/ middleware on the client
@@ -51,4 +56,8 @@ export function middlewareRegister(connection: Connection) {
5156
return null;
5257
});
5358

59+
connection.languages.semanticTokens.on(async () => {
60+
return { data: [] };
61+
});
62+
5463
}

apps/quarto-utils/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
*/
1515

1616
export * from './r-utils';
17+
export * from './semantic-tokens-legend';
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* semantic-tokens-legend.ts
3+
*
4+
* Copyright (C) 2025 by Posit Software, PBC
5+
*
6+
* Unless you have received this program directly from Posit Software pursuant
7+
* to the terms of a commercial license agreement with Posit Software, then
8+
* this program is licensed to you under the terms of version 3 of the
9+
* GNU Affero General Public License. This program is distributed WITHOUT
10+
* ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11+
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12+
* AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13+
*
14+
*/
15+
16+
/**
17+
* Semantic token legend for Quarto documents
18+
*
19+
* Based on standard VS Code semantic token types and modifiers:
20+
* https://code.visualstudio.com/api/language-extensions/semantic-highlight-guide#standard-token-types-and-modifiers
21+
*
22+
* This legend is used by both the LSP server (to advertise capabilities)
23+
* and the VS Code extension (to remap tokens from embedded language providers)
24+
*/
25+
export const QUARTO_SEMANTIC_TOKEN_LEGEND = {
26+
tokenTypes: [
27+
'namespace', 'class', 'enum', 'interface', 'struct',
28+
'typeParameter', 'type', 'parameter', 'variable', 'property',
29+
'enumMember', 'decorator', 'event', 'function', 'method',
30+
'macro', 'label', 'comment', 'string', 'keyword',
31+
'number', 'regexp', 'operator',
32+
// Commonly used by language servers, widely supported by themes
33+
'module'
34+
],
35+
tokenModifiers: [
36+
'declaration', 'definition', 'readonly', 'static', 'deprecated',
37+
'abstract', 'async', 'modification', 'documentation', 'defaultLibrary'
38+
]
39+
};

apps/vscode/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
- Added a new setting `quarto.useBundledQuartoInPositron` to prefer the Quarto CLI bundled with Positron when available. This setting has precedence _between_ `quarto.path` and `quarto.usePipQuarto`, and has no effect outside of Positron (<https://github.com/quarto-dev/quarto/pull/841>).
77
- Visual Editor: uses a text box for alternative text and captions in callouts, images, and tables interface. (<https://github.com/quarto-dev/quarto/pull/644>)
88
- Fixed a bug where previewing showed "Not Found" on Quarto files with spaces in the name in subfolders of projects (<https://github.com/quarto-dev/quarto/pull/853>).
9+
- Added support for semantic highlighting in Quarto documents, when using an LSP that supports it (for example, Pylance) (<https://github.com/quarto-dev/quarto/pull/868>).
910

1011
## 1.126.0 (Release on 2025-10-08)
1112

apps/vscode/src/lsp/client.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* client.ts
33
*
4-
* Copyright (C) 2022 by Posit Software, PBC
4+
* Copyright (C) 2022-2025 by Posit Software, PBC
55
*
66
* Unless you have received this program directly from Posit Software pursuant
77
* to the terms of a commercial license agreement with Posit Software, then
@@ -64,6 +64,7 @@ import {
6464
embeddedDocumentFormattingProvider,
6565
embeddedDocumentRangeFormattingProvider,
6666
} from "../providers/format";
67+
import { embeddedSemanticTokensProvider } from "../providers/semantic-tokens";
6768
import { getHover, getSignatureHelpHover } from "../core/hover";
6869
import { imageHover } from "../providers/hover-image";
6970
import { LspInitializationOptions, QuartoContext } from "quarto-core";
@@ -109,6 +110,7 @@ export async function activateLsp(
109110
provideDocumentRangeFormattingEdits: embeddedDocumentRangeFormattingProvider(
110111
engine
111112
),
113+
provideDocumentSemanticTokens: embeddedSemanticTokensProvider(engine),
112114
};
113115
if (config.get("cells.hoverHelp.enabled", true)) {
114116
middleware.provideHover = embeddedHoverProvider(engine);
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
/*
2+
* semantic-tokens.ts
3+
*
4+
* Copyright (C) 2025 by Posit Software, PBC
5+
*
6+
* Unless you have received this program directly from Posit Software pursuant
7+
* to the terms of a commercial license agreement with Posit Software, then
8+
* this program is licensed to you under the terms of version 3 of the
9+
* GNU Affero General Public License. This program is distributed WITHOUT
10+
* ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11+
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12+
* AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13+
*
14+
*/
15+
16+
import {
17+
CancellationToken,
18+
commands,
19+
Position,
20+
SemanticTokens,
21+
SemanticTokensBuilder,
22+
TextDocument,
23+
Uri,
24+
window,
25+
} from "vscode";
26+
import { DocumentSemanticsTokensSignature } from "vscode-languageclient";
27+
import { MarkdownEngine } from "../markdown/engine";
28+
import { isQuartoDoc } from "../core/doc";
29+
import {
30+
unadjustedSemanticTokens,
31+
virtualDocForLanguage,
32+
withVirtualDocUri,
33+
languageAtPosition,
34+
mainLanguage
35+
} from "../vdoc/vdoc";
36+
import { EmbeddedLanguage } from "../vdoc/languages";
37+
import { QUARTO_SEMANTIC_TOKEN_LEGEND } from "quarto-utils";
38+
39+
/**
40+
* Decode semantic tokens from delta-encoded format to absolute positions
41+
*
42+
* Semantic tokens are encoded as [deltaLine, deltaStartChar, length, tokenType, tokenModifiers, ...]
43+
* This function converts them to absolute line/character positions for easier manipulation.
44+
*/
45+
export function decodeSemanticTokens(tokens: SemanticTokens): Array<{
46+
line: number;
47+
startChar: number;
48+
length: number;
49+
tokenType: number;
50+
tokenModifiers: number;
51+
}> {
52+
const decoded: Array<{
53+
line: number;
54+
startChar: number;
55+
length: number;
56+
tokenType: number;
57+
tokenModifiers: number;
58+
}> = [];
59+
60+
let currentLine = 0;
61+
let currentChar = 0;
62+
63+
for (let i = 0; i < tokens.data.length; i += 5) {
64+
const deltaLine = tokens.data[i];
65+
const deltaStartChar = tokens.data[i + 1];
66+
const length = tokens.data[i + 2];
67+
const tokenType = tokens.data[i + 3];
68+
const tokenModifiers = tokens.data[i + 4];
69+
70+
// Update absolute position
71+
currentLine += deltaLine;
72+
if (deltaLine > 0) {
73+
currentChar = deltaStartChar;
74+
} else {
75+
currentChar += deltaStartChar;
76+
}
77+
78+
decoded.push({
79+
line: currentLine,
80+
startChar: currentChar,
81+
length,
82+
tokenType,
83+
tokenModifiers
84+
});
85+
}
86+
87+
return decoded;
88+
}
89+
90+
/**
91+
* Encode semantic tokens from absolute positions to delta-encoded format
92+
*
93+
* Uses VS Code's built-in SemanticTokensBuilder for proper delta encoding.
94+
*/
95+
export function encodeSemanticTokens(
96+
tokens: Array<{
97+
line: number;
98+
startChar: number;
99+
length: number;
100+
tokenType: number;
101+
tokenModifiers: number;
102+
}>,
103+
resultId?: string
104+
): SemanticTokens {
105+
const builder = new SemanticTokensBuilder();
106+
107+
for (const token of tokens) {
108+
builder.push(
109+
token.line,
110+
token.startChar,
111+
token.length,
112+
token.tokenType,
113+
token.tokenModifiers
114+
);
115+
}
116+
117+
return builder.build(resultId);
118+
}
119+
120+
/**
121+
* Build a map from source type/modifier names to target indices
122+
*/
123+
function buildLegendMap(
124+
sourceNames: string[],
125+
targetNames: string[]
126+
): Map<number, number> {
127+
const map = new Map<number, number>();
128+
129+
for (let i = 0; i < sourceNames.length; i++) {
130+
const targetIndex = targetNames.indexOf(sourceNames[i]);
131+
if (targetIndex >= 0) {
132+
map.set(i, targetIndex);
133+
}
134+
}
135+
136+
return map;
137+
}
138+
139+
/**
140+
* Remap a modifier bitfield from source indices to target indices
141+
*/
142+
function remapModifierBitfield(
143+
sourceModifiers: number,
144+
modifierMap: Map<number, number>
145+
): number {
146+
let targetModifiers = 0;
147+
148+
// Check each bit in the source bitfield
149+
for (const [sourceBit, targetBit] of modifierMap) {
150+
if (sourceModifiers & (1 << sourceBit)) {
151+
targetModifiers |= (1 << targetBit);
152+
}
153+
}
154+
155+
return targetModifiers;
156+
}
157+
158+
/**
159+
* Remap token type/modifier indices from source legend to target legend
160+
* Only maps types that exist in both legends (standard types only)
161+
*/
162+
export function remapTokenIndices(
163+
tokens: SemanticTokens,
164+
sourceLegend: { tokenTypes: string[]; tokenModifiers: string[]; },
165+
targetLegend: { tokenTypes: string[]; tokenModifiers: string[]; }
166+
): SemanticTokens {
167+
// Build mappings once
168+
const typeMap = buildLegendMap(sourceLegend.tokenTypes, targetLegend.tokenTypes);
169+
const modifierMap = buildLegendMap(sourceLegend.tokenModifiers, targetLegend.tokenModifiers);
170+
171+
// Decode, filter, and remap tokens
172+
const decoded = decodeSemanticTokens(tokens);
173+
const remapped = decoded
174+
.filter(token => typeMap.has(token.tokenType))
175+
.map(token => ({
176+
...token,
177+
tokenType: typeMap.get(token.tokenType)!,
178+
tokenModifiers: remapModifierBitfield(token.tokenModifiers, modifierMap)
179+
}));
180+
181+
return encodeSemanticTokens(remapped, tokens.resultId);
182+
}
183+
184+
export function embeddedSemanticTokensProvider(engine: MarkdownEngine) {
185+
return async (
186+
document: TextDocument,
187+
token: CancellationToken,
188+
next: DocumentSemanticsTokensSignature
189+
): Promise<SemanticTokens | null | undefined> => {
190+
// Only handle Quarto documents
191+
if (!isQuartoDoc(document, true)) {
192+
return await next(document, token);
193+
}
194+
195+
// Ensure we are dealing with the active document
196+
const editor = window.activeTextEditor;
197+
const activeDocument = editor?.document;
198+
if (!editor || activeDocument?.uri.toString() !== document.uri.toString()) {
199+
// Not the active document, delegate to default
200+
return await next(document, token);
201+
}
202+
203+
// Parse the document to get all tokens
204+
const tokens = engine.parse(document);
205+
206+
// Try to find language at cursor position, otherwise use main language
207+
const line = editor.selection.active.line;
208+
const position = new Position(line, 0);
209+
let language = languageAtPosition(tokens, position);
210+
if (!language) {
211+
language = mainLanguage(tokens);
212+
}
213+
214+
if (!language) {
215+
// No language found, delegate to default
216+
return await next(document, token);
217+
}
218+
219+
// Create virtual doc for all blocks of this language
220+
const vdoc = virtualDocForLanguage(document, tokens, language);
221+
222+
return await withVirtualDocUri(vdoc, document.uri, "semanticTokens", async (uri: Uri) => {
223+
try {
224+
// Get the legend from the embedded language provider
225+
const legend = await commands.executeCommand<any>(
226+
"vscode.provideDocumentSemanticTokensLegend",
227+
uri
228+
);
229+
230+
const tokens = await commands.executeCommand<SemanticTokens>(
231+
"vscode.provideDocumentSemanticTokens",
232+
uri
233+
);
234+
235+
if (!tokens || tokens.data.length === 0) {
236+
return tokens;
237+
}
238+
239+
// Remap token indices from embedded provider's legend to our universal legend
240+
let remappedTokens = tokens;
241+
if (legend) {
242+
remappedTokens = remapTokenIndices(tokens, legend, QUARTO_SEMANTIC_TOKEN_LEGEND);
243+
}
244+
245+
// Adjust token positions from virtual doc to real doc coordinates
246+
return unadjustedSemanticTokens(vdoc.language, remappedTokens);
247+
} catch (error) {
248+
return undefined;
249+
}
250+
});
251+
};
252+
}

0 commit comments

Comments
 (0)