|
| 1 | +// SPDX-FileCopyrightText : © 2025 TU Wien <[email protected]> |
| 2 | +// SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | +// |
| 4 | +// This program is free software: you can redistribute it and/or modify |
| 5 | +// it under the terms of the GNU General Public License as published by |
| 6 | +// the Free Software Foundation, either version 3 of the License, or |
| 7 | +// (at your option) any later version. |
| 8 | +// |
| 9 | +// This program is distributed in the hope that it will be useful, |
| 10 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | +// GNU General Public License for more details. |
| 13 | +// |
| 14 | +// You should have received a copy of the GNU General Public License |
| 15 | +// along with this program. If not, see <https://www.gnu.org/licenses/>. |
| 16 | + |
| 17 | +package vadl.ast; |
| 18 | + |
| 19 | +import java.io.ByteArrayInputStream; |
| 20 | +import java.lang.reflect.Field; |
| 21 | +import java.lang.reflect.Modifier; |
| 22 | +import java.nio.charset.StandardCharsets; |
| 23 | +import java.util.ArrayList; |
| 24 | +import java.util.List; |
| 25 | +import java.util.Map; |
| 26 | +import org.eclipse.lsp4j.SemanticTokenTypes; |
| 27 | + |
| 28 | +/** |
| 29 | + * Tokenizer used by the language server. Provides LSP semantic tokens. |
| 30 | + * Must be in this package, otherwise it couldn't access {@code Token}. |
| 31 | + */ |
| 32 | +public final class LspTokenizer { |
| 33 | + |
| 34 | + private final Map<String, Integer> tokenTypesMap; |
| 35 | + @SuppressWarnings("unused") |
| 36 | + private final Map<String, Integer> tokenModifiersMap; |
| 37 | + |
| 38 | + /** |
| 39 | + * Maps VADL Scanner Token Kinds to LSP token types. |
| 40 | + */ |
| 41 | + private static final String[] tokenKindsMap; |
| 42 | + |
| 43 | + static { |
| 44 | + // Generate tokenKindsMap |
| 45 | + tokenKindsMap = new String[Parser.maxT + 1]; |
| 46 | + |
| 47 | + // Hardcoded mappings: |
| 48 | + tokenKindsMap[Parser._hexLit] = SemanticTokenTypes.Number; |
| 49 | + tokenKindsMap[Parser._binLit] = SemanticTokenTypes.Number; |
| 50 | + tokenKindsMap[Parser._decLit] = SemanticTokenTypes.Number; |
| 51 | + tokenKindsMap[Parser._identifierToken] = SemanticTokenTypes.Variable; |
| 52 | + tokenKindsMap[Parser._string] = SemanticTokenTypes.String; |
| 53 | + |
| 54 | + // Look through all known token kinds |
| 55 | + for (Field field : Parser.class.getDeclaredFields()) { |
| 56 | + var m = field.getModifiers(); |
| 57 | + if (!Modifier.isPublic(m) || !Modifier.isStatic(m) || !Modifier.isFinal(m) |
| 58 | + || !int.class.isAssignableFrom(field.getType())) { |
| 59 | + continue; |
| 60 | + } |
| 61 | + var name = field.getName(); |
| 62 | + if (!name.startsWith("_")) { |
| 63 | + continue; |
| 64 | + } |
| 65 | + int kind; |
| 66 | + try { |
| 67 | + kind = field.getInt(null); |
| 68 | + } catch (IllegalAccessException e) { |
| 69 | + continue; |
| 70 | + } |
| 71 | + |
| 72 | + if (tokenKindsMap[kind] != null) { |
| 73 | + // This mapping has already been set above |
| 74 | + continue; |
| 75 | + } |
| 76 | + // Operators according to ParserUtils / Token name "SYM_*" |
| 77 | + if (ParserUtils.BIN_OPS[kind] || ParserUtils.UN_OPS[kind] || name.startsWith("_SYM_")) { |
| 78 | + tokenKindsMap[kind] = SemanticTokenTypes.Operator; |
| 79 | + continue; |
| 80 | + } |
| 81 | + // Token name "T_*" |
| 82 | + if (name.startsWith("_T_")) { |
| 83 | + // Don't know what to map these to |
| 84 | + continue; |
| 85 | + } |
| 86 | + // Everything else should be Keyword |
| 87 | + tokenKindsMap[kind] = SemanticTokenTypes.Keyword; |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + |
| 92 | + /** |
| 93 | + * Creates a new tokenizer. |
| 94 | + * |
| 95 | + * @param tokenTypesMap Maps Semantic token types to their integer index in the legend (which is |
| 96 | + * part of server capabilities). This is required for encoding |
| 97 | + * semanticTokens responses. Should only contain types that the client |
| 98 | + * supports. |
| 99 | + * @param tokenModifiersMap Maps Semantic token modifiers to their integer index in the legend |
| 100 | + * (which is part of server capabilities). This is required for encoding |
| 101 | + * semanticTokens responses. Should only contain modifiers that the |
| 102 | + * client supports. |
| 103 | + */ |
| 104 | + public LspTokenizer(Map<String, Integer> tokenTypesMap, Map<String, Integer> tokenModifiersMap) { |
| 105 | + this.tokenTypesMap = tokenTypesMap; |
| 106 | + this.tokenModifiersMap = tokenModifiersMap; |
| 107 | + } |
| 108 | + |
| 109 | + /** |
| 110 | + * Returns LSP Tokens for the given source code. |
| 111 | + * |
| 112 | + * @param content of a VADL source code file |
| 113 | + * @return Token list encoded for a semanticTokens response. Note: deltaStart and length are |
| 114 | + * calculated for UTF-8 encoding. |
| 115 | + */ |
| 116 | + public List<Integer> getTokens(String content) { |
| 117 | + Scanner scanner = new Scanner( |
| 118 | + new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)) |
| 119 | + ); |
| 120 | + |
| 121 | + // Note: We assume that all Tokens are single-line, i.e. no need to split them up at line |
| 122 | + // boundaries for LSP. So far, the only elements in OpenVADL that may span multiple lines |
| 123 | + // are comments, but the CocoR Scanner doesn't produce Tokens for those anyway. |
| 124 | + |
| 125 | + List<Integer> lspTokens = new ArrayList<>(); |
| 126 | + int previousLine = 1; // Token.line starts at 1 |
| 127 | + int previousCol = 1; // Same for Token.col |
| 128 | + for (Token t = scanner.Scan(); t.kind != Parser._EOF; t = scanner.Scan()) { |
| 129 | + int tokenType = getTokenTypeFromScannerKind(t.kind); |
| 130 | + if (tokenType < 0) { |
| 131 | + continue; |
| 132 | + } |
| 133 | + |
| 134 | + int deltaLine = t.line - previousLine; |
| 135 | + previousLine = t.line; |
| 136 | + |
| 137 | + if (deltaLine != 0) { |
| 138 | + previousCol = 1; |
| 139 | + } |
| 140 | + int deltaStart = t.col - previousCol; |
| 141 | + previousCol = t.col; |
| 142 | + |
| 143 | + // deltaLine, deltaStart, length, tokenType, tokenModifiers |
| 144 | + lspTokens.add(deltaLine); |
| 145 | + lspTokens.add(deltaStart); |
| 146 | + lspTokens.add(t.val.length()); |
| 147 | + lspTokens.add(tokenType); |
| 148 | + lspTokens.add(0); |
| 149 | + } |
| 150 | + return lspTokens; |
| 151 | + } |
| 152 | + |
| 153 | + private int getTokenTypeFromScannerKind(int kind) { |
| 154 | + return tokenTypesMap.getOrDefault(tokenKindsMap[kind], -1); |
| 155 | + } |
| 156 | +} |
0 commit comments