-
Notifications
You must be signed in to change notification settings - Fork 47
Add semantic token LSP support for Quarto files #868
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
d082ba8
347d8f9
3fc179b
739e525
05d70a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,3 +14,4 @@ | |
| */ | ||
|
|
||
| export * from './r-utils'; | ||
| export * from './semantic-tokens-legend'; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| /* | ||
| * semantic-tokens-legend.ts | ||
| * | ||
| * Copyright (C) 2025 by Posit Software, PBC | ||
| * | ||
| * Unless you have received this program directly from Posit Software pursuant | ||
| * to the terms of a commercial license agreement with Posit Software, then | ||
| * this program is licensed to you under the terms of version 3 of the | ||
| * GNU Affero General Public License. This program is distributed WITHOUT | ||
| * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT, | ||
| * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the | ||
| * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details. | ||
| * | ||
| */ | ||
|
|
||
| /** | ||
| * Semantic token legend for Quarto documents | ||
| * | ||
| * Based on standard VS Code semantic token types and modifiers: | ||
| * https://code.visualstudio.com/api/language-extensions/semantic-highlight-guide#standard-token-types-and-modifiers | ||
| * | ||
| * This legend is used by both the LSP server (to advertise capabilities) | ||
| * and the VS Code extension (to remap tokens from embedded language providers) | ||
| */ | ||
| export const QUARTO_SEMANTIC_TOKEN_LEGEND = { | ||
| tokenTypes: [ | ||
| 'namespace', 'class', 'enum', 'interface', 'struct', | ||
| 'typeParameter', 'type', 'parameter', 'variable', 'property', | ||
| 'enumMember', 'decorator', 'event', 'function', 'method', | ||
| 'macro', 'label', 'comment', 'string', 'keyword', | ||
| 'number', 'regexp', 'operator', | ||
| // Commonly used by language servers, widely supported by themes | ||
| 'module' | ||
| ], | ||
| tokenModifiers: [ | ||
| 'declaration', 'definition', 'readonly', 'static', 'deprecated', | ||
| 'abstract', 'async', 'modification', 'documentation', 'defaultLibrary' | ||
| ] | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,252 @@ | ||
| /* | ||
| * semantic-tokens.ts | ||
| * | ||
| * Copyright (C) 2025 by Posit Software, PBC | ||
| * | ||
| * Unless you have received this program directly from Posit Software pursuant | ||
| * to the terms of a commercial license agreement with Posit Software, then | ||
| * this program is licensed to you under the terms of version 3 of the | ||
| * GNU Affero General Public License. This program is distributed WITHOUT | ||
| * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT, | ||
| * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the | ||
| * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details. | ||
| * | ||
| */ | ||
|
|
||
| import { | ||
| CancellationToken, | ||
| commands, | ||
| Position, | ||
| SemanticTokens, | ||
| SemanticTokensBuilder, | ||
| TextDocument, | ||
| Uri, | ||
| window, | ||
| } from "vscode"; | ||
| import { DocumentSemanticsTokensSignature } from "vscode-languageclient"; | ||
| import { MarkdownEngine } from "../markdown/engine"; | ||
| import { isQuartoDoc } from "../core/doc"; | ||
| import { | ||
| unadjustedSemanticTokens, | ||
| virtualDocForLanguage, | ||
| withVirtualDocUri, | ||
| languageAtPosition, | ||
| mainLanguage | ||
| } from "../vdoc/vdoc"; | ||
| import { EmbeddedLanguage } from "../vdoc/languages"; | ||
| import { QUARTO_SEMANTIC_TOKEN_LEGEND } from "quarto-utils"; | ||
|
|
||
| /** | ||
| * Decode semantic tokens from delta-encoded format to absolute positions | ||
| * | ||
| * Semantic tokens are encoded as [deltaLine, deltaStartChar, length, tokenType, tokenModifiers, ...] | ||
|
Comment on lines
+40
to
+42
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does "delta-encoded" mean that i.e. imagine we extracted line and delta line data to their own arrays, then
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, from what I understand, "delta-encoded" means the token positions are stored as relative offsets from the previous token, rather than absolute positions, so |
||
| * This function converts them to absolute line/character positions for easier manipulation. | ||
| */ | ||
| export function decodeSemanticTokens(tokens: SemanticTokens): Array<{ | ||
| line: number; | ||
| startChar: number; | ||
| length: number; | ||
| tokenType: number; | ||
| tokenModifiers: number; | ||
| }> { | ||
| const decoded: Array<{ | ||
| line: number; | ||
| startChar: number; | ||
| length: number; | ||
| tokenType: number; | ||
| tokenModifiers: number; | ||
| }> = []; | ||
|
|
||
| let currentLine = 0; | ||
| let currentChar = 0; | ||
|
|
||
| for (let i = 0; i < tokens.data.length; i += 5) { | ||
| const deltaLine = tokens.data[i]; | ||
| const deltaStartChar = tokens.data[i + 1]; | ||
| const length = tokens.data[i + 2]; | ||
| const tokenType = tokens.data[i + 3]; | ||
| const tokenModifiers = tokens.data[i + 4]; | ||
|
|
||
| // Update absolute position | ||
| currentLine += deltaLine; | ||
| if (deltaLine > 0) { | ||
| currentChar = deltaStartChar; | ||
| } else { | ||
| currentChar += deltaStartChar; | ||
| } | ||
|
|
||
| decoded.push({ | ||
| line: currentLine, | ||
| startChar: currentChar, | ||
| length, | ||
| tokenType, | ||
| tokenModifiers | ||
| }); | ||
| } | ||
|
|
||
| return decoded; | ||
| } | ||
|
|
||
| /** | ||
| * Encode semantic tokens from absolute positions to delta-encoded format | ||
| * | ||
| * Uses VS Code's built-in SemanticTokensBuilder for proper delta encoding. | ||
| */ | ||
| export function encodeSemanticTokens( | ||
| tokens: Array<{ | ||
| line: number; | ||
| startChar: number; | ||
| length: number; | ||
| tokenType: number; | ||
| tokenModifiers: number; | ||
| }>, | ||
| resultId?: string | ||
| ): SemanticTokens { | ||
| const builder = new SemanticTokensBuilder(); | ||
|
|
||
| for (const token of tokens) { | ||
| builder.push( | ||
| token.line, | ||
| token.startChar, | ||
| token.length, | ||
| token.tokenType, | ||
| token.tokenModifiers | ||
| ); | ||
| } | ||
|
|
||
| return builder.build(resultId); | ||
| } | ||
|
|
||
| /** | ||
| * Build a map from source type/modifier names to target indices | ||
| */ | ||
| function buildLegendMap( | ||
| sourceNames: string[], | ||
| targetNames: string[] | ||
| ): Map<number, number> { | ||
| const map = new Map<number, number>(); | ||
|
|
||
| for (let i = 0; i < sourceNames.length; i++) { | ||
| const targetIndex = targetNames.indexOf(sourceNames[i]); | ||
| if (targetIndex >= 0) { | ||
| map.set(i, targetIndex); | ||
| } | ||
| } | ||
|
|
||
| return map; | ||
| } | ||
|
|
||
| /** | ||
| * Remap a modifier bitfield from source indices to target indices | ||
| */ | ||
| function remapModifierBitfield( | ||
| sourceModifiers: number, | ||
| modifierMap: Map<number, number> | ||
| ): number { | ||
| let targetModifiers = 0; | ||
|
|
||
| // Check each bit in the source bitfield | ||
| for (const [sourceBit, targetBit] of modifierMap) { | ||
| if (sourceModifiers & (1 << sourceBit)) { | ||
| targetModifiers |= (1 << targetBit); | ||
| } | ||
| } | ||
|
|
||
| return targetModifiers; | ||
| } | ||
|
|
||
| /** | ||
| * Remap token type/modifier indices from source legend to target legend | ||
| * Only maps types that exist in both legends (standard types only) | ||
| */ | ||
| export function remapTokenIndices( | ||
| tokens: SemanticTokens, | ||
| sourceLegend: { tokenTypes: string[]; tokenModifiers: string[]; }, | ||
| targetLegend: { tokenTypes: string[]; tokenModifiers: string[]; } | ||
| ): SemanticTokens { | ||
| // Build mappings once | ||
| const typeMap = buildLegendMap(sourceLegend.tokenTypes, targetLegend.tokenTypes); | ||
| const modifierMap = buildLegendMap(sourceLegend.tokenModifiers, targetLegend.tokenModifiers); | ||
|
|
||
| // Decode, filter, and remap tokens | ||
| const decoded = decodeSemanticTokens(tokens); | ||
| const remapped = decoded | ||
| .filter(token => typeMap.has(token.tokenType)) | ||
| .map(token => ({ | ||
| ...token, | ||
| tokenType: typeMap.get(token.tokenType)!, | ||
| tokenModifiers: remapModifierBitfield(token.tokenModifiers, modifierMap) | ||
| })); | ||
|
|
||
| return encodeSemanticTokens(remapped, tokens.resultId); | ||
| } | ||
|
|
||
| export function embeddedSemanticTokensProvider(engine: MarkdownEngine) { | ||
| return async ( | ||
| document: TextDocument, | ||
| token: CancellationToken, | ||
| next: DocumentSemanticsTokensSignature | ||
| ): Promise<SemanticTokens | null | undefined> => { | ||
| // Only handle Quarto documents | ||
| if (!isQuartoDoc(document, true)) { | ||
| return await next(document, token); | ||
| } | ||
|
|
||
| // Ensure we are dealing with the active document | ||
| const editor = window.activeTextEditor; | ||
| const activeDocument = editor?.document; | ||
| if (!editor || activeDocument?.uri.toString() !== document.uri.toString()) { | ||
| // Not the active document, delegate to default | ||
| return await next(document, token); | ||
| } | ||
|
|
||
| // Parse the document to get all tokens | ||
| const tokens = engine.parse(document); | ||
|
|
||
| // Try to find language at cursor position, otherwise use main language | ||
| const line = editor.selection.active.line; | ||
| const position = new Position(line, 0); | ||
| let language = languageAtPosition(tokens, position); | ||
| if (!language) { | ||
| language = mainLanguage(tokens); | ||
| } | ||
|
|
||
| if (!language) { | ||
| // No language found, delegate to default | ||
| return await next(document, token); | ||
| } | ||
|
|
||
| // Create virtual doc for all blocks of this language | ||
| const vdoc = virtualDocForLanguage(document, tokens, language); | ||
|
|
||
| return await withVirtualDocUri(vdoc, document.uri, "semanticTokens", async (uri: Uri) => { | ||
| try { | ||
| // Get the legend from the embedded language provider | ||
| const legend = await commands.executeCommand<any>( | ||
| "vscode.provideDocumentSemanticTokensLegend", | ||
| uri | ||
| ); | ||
|
|
||
| const tokens = await commands.executeCommand<SemanticTokens>( | ||
| "vscode.provideDocumentSemanticTokens", | ||
| uri | ||
| ); | ||
|
|
||
| if (!tokens || tokens.data.length === 0) { | ||
| return tokens; | ||
| } | ||
|
|
||
| // Remap token indices from embedded provider's legend to our universal legend | ||
| let remappedTokens = tokens; | ||
| if (legend) { | ||
| remappedTokens = remapTokenIndices(tokens, legend, QUARTO_SEMANTIC_TOKEN_LEGEND); | ||
| } | ||
|
|
||
| // Adjust token positions from virtual doc to real doc coordinates | ||
| return unadjustedSemanticTokens(vdoc.language, remappedTokens); | ||
| } catch (error) { | ||
| return undefined; | ||
| } | ||
| }); | ||
| }; | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this supposed to return with an empty array? If so, why?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe so, based on how semantic tokens work. Returning
{ data: [] }says, "I'm handling this request successfully, but have no tokens to provide", whilenullwould mean "capability not available" or "error". It's different from the other handlers wherenullis the standard way to say "no result".This is the first time I've worked with semantic tokens, but I did find the spec helpful: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens