|
| 1 | +import * as vscode from "vscode"; |
| 2 | + |
| 3 | +class ParserError extends Error {} |
| 4 | + |
| 5 | +export interface CMakeAST { |
| 6 | + document: vscode.TextDocument; |
| 7 | + invocations: CommandInvocationAST[]; |
| 8 | +} |
| 9 | + |
| 10 | +export interface CommandInvocationAST { |
| 11 | + command: Token; |
| 12 | + lparen: Token; |
| 13 | + args: Token[]; |
| 14 | + rparen: Token; |
| 15 | +} |
| 16 | + |
| 17 | +export class Token { |
| 18 | + constructor( |
| 19 | + public type: TokenType, |
| 20 | + public raw: string, |
| 21 | + public document: vscode.TextDocument, |
| 22 | + public offset: number, |
| 23 | + public value: string |
| 24 | + ) { } |
| 25 | + |
| 26 | + public get endOffset(): number { |
| 27 | + return this.offset + this.raw.length; |
| 28 | + } |
| 29 | +} |
| 30 | + |
| 31 | +interface TokenType { |
| 32 | + name: string; |
| 33 | + re: RegExp; |
| 34 | +} |
| 35 | + |
| 36 | +const BRACKETED_RE = /\[(=*)\[.*\]\1\]/s; |
| 37 | +const SPACE: TokenType = { name: 'SPACE', re: /[ \t]+/ }; |
| 38 | +const NEWLINE: TokenType = { name: 'NEWLINE', re: /\n/ }; |
| 39 | +const IDENT: TokenType = { name: 'IDENT', re: /[A-Za-z_][A-Za-z0-9_]*/ }; |
| 40 | +const LPAREN: TokenType = { name: 'LPAREN', re: /\(/ }; |
| 41 | +const RPAREN: TokenType = { name: 'RPAREN', re: /\)/ }; |
| 42 | +const BRACKETED: TokenType = { name: 'BRACKETED', re: BRACKETED_RE }; |
| 43 | +const QUOTED: TokenType = { name: 'QUOTED', re: /"(?:\\.|[^"])*"/s }; |
| 44 | +const UNQUOTED: TokenType = { name: 'UNQUOTED', re: /(?:\\.|[^\s()#"\\'])+/s }; |
| 45 | +// TODO: "legacy" identifiers with quotes in them |
| 46 | +const LINE_COMMENT: TokenType = { name: 'LINE_COMMENT', re: /#.*\n/ }; |
| 47 | +const BRACKETED_COMMENT: TokenType = { name: 'BRACKETED_COMMENT', re: regexpPrepend('#', BRACKETED_RE) }; |
| 48 | +const EOF: TokenType = { name: 'EOF', re: /$/ }; |
| 49 | +const SPACE_TYPES: TokenType[] = [SPACE, NEWLINE]; |
| 50 | +const COMMENT_TYPES: TokenType[] = [LINE_COMMENT, BRACKETED_COMMENT]; |
| 51 | +const ARG_TYPES: TokenType[] = [ |
| 52 | + LPAREN, RPAREN, UNQUOTED, QUOTED, BRACKETED |
| 53 | +]; |
| 54 | + |
| 55 | +export class CMakeParser { |
| 56 | + private text: string; |
| 57 | + private offset: number; |
| 58 | + private pushbackBuffer: Token[] = []; |
| 59 | + |
| 60 | + constructor(private document: vscode.TextDocument, offset?: number) { |
| 61 | + this.offset = offset ?? 0; |
| 62 | + this.text = document.getText(); |
| 63 | + } |
| 64 | + |
| 65 | + public parseDocument(): CMakeAST { |
| 66 | + return { |
| 67 | + document: this.document, |
| 68 | + invocations: Array.from(this.parseCommandInvocations()) |
| 69 | + }; |
| 70 | + } |
| 71 | + |
| 72 | + private *parseCommandInvocations(): Generator<CommandInvocationAST> { |
| 73 | + // Slightly more permissive in terms of comment placement than the |
| 74 | + // official grammar. |
| 75 | + while (true) { |
| 76 | + const next = this.skipSpaceAndComments(IDENT, EOF); |
| 77 | + if (next.type === EOF) { |
| 78 | + return; |
| 79 | + } |
| 80 | + this.pushbackBuffer.push(next); |
| 81 | + yield this.parseCommandInvocation(); |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + /** |
| 86 | + * Parse one Command Invocation. Call in a loop to parse an entire file |
| 87 | + */ |
| 88 | + public parseCommandInvocation(): CommandInvocationAST { |
| 89 | + const command = this.skipSpace(IDENT); |
| 90 | + const lparen = this.skipSpace(LPAREN); |
| 91 | + const args: Token[] = []; |
| 92 | + let depth = 1; |
| 93 | + let token; |
| 94 | + while (depth) { |
| 95 | + token = this.skipSpaceAndComments(...ARG_TYPES); |
| 96 | + switch (token.type) { |
| 97 | + case LPAREN: |
| 98 | + depth++; break; |
| 99 | + case RPAREN: |
| 100 | + depth--; break; |
| 101 | + case UNQUOTED: case QUOTED: case BRACKETED: |
| 102 | + args.push(token); break; |
| 103 | + default: |
| 104 | + this.error(`unexpected ${token.type.name} ${token.raw}`); |
| 105 | + } |
| 106 | + } |
| 107 | + const rparen = token as Token; |
| 108 | + this.assignArgumentValues(args); |
| 109 | + |
| 110 | + return { command, args, lparen, rparen }; |
| 111 | + } |
| 112 | + |
| 113 | + private assignArgumentValues(args: Token[]) { |
| 114 | + for (const arg of args) { |
| 115 | + switch (arg.type) { |
| 116 | + case QUOTED: |
| 117 | + arg.value = unescape(arg.raw.slice(1, -1)); break; |
| 118 | + case BRACKETED: |
| 119 | + arg.value = arg.raw.replace(/^\[(=*)\[(.*)\]\1\]$/, '$1'); break; |
| 120 | + case UNQUOTED: default: |
| 121 | + arg.value = unescape(arg.raw); break; |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + private skipSpace(...expect: TokenType[]): Token { |
| 127 | + return this.skipTokens(SPACE_TYPES, expect); |
| 128 | + } |
| 129 | + |
| 130 | + private skipSpaceAndComments(...expect: TokenType[]): Token { |
| 131 | + return this.skipTokens([...SPACE_TYPES, ...COMMENT_TYPES], expect); |
| 132 | + } |
| 133 | + |
| 134 | + private skipTokens(skip: TokenType[], expect: TokenType[]): Token { |
| 135 | + expect = [...expect, ...skip]; |
| 136 | + let token; |
| 137 | + do { |
| 138 | + token = this.nextToken(...expect); |
| 139 | + } while (skip.includes(token.type)); |
| 140 | + |
| 141 | + return token; |
| 142 | + } |
| 143 | + |
| 144 | + private nextToken(...expect: TokenType[]): Token { |
| 145 | + let token: Token | null | undefined = this.pushbackBuffer.pop(); |
| 146 | + if (token) { |
| 147 | + if (expect.includes(token.type)) { |
| 148 | + return token; |
| 149 | + } |
| 150 | + } else { |
| 151 | + token = this.scanToken(...expect); |
| 152 | + if (token) { |
| 153 | + return token; |
| 154 | + } |
| 155 | + } |
| 156 | + if (this.offset === this.text.length) { |
| 157 | + this.error(`unexpected EOF`); |
| 158 | + } |
| 159 | + this.error(`unexpected ${this.text[this.offset]}`); |
| 160 | + } |
| 161 | + |
| 162 | + private scanToken(...expect: TokenType[]): Token | null { |
| 163 | + for (const matcher of expect) { |
| 164 | + const token = this.tryMatch(matcher); |
| 165 | + if (token !== null) { |
| 166 | + return token; |
| 167 | + } |
| 168 | + } |
| 169 | + return null; |
| 170 | + } |
| 171 | + |
| 172 | + private tryMatch(matcher: TokenType): Token | null { |
| 173 | + const re = regexpPrepend('^', matcher.re); |
| 174 | + const match = re.exec(this.text.slice(this.offset)); |
| 175 | + if (!match) { |
| 176 | + return null; |
| 177 | + } |
| 178 | + const token = new Token( |
| 179 | + matcher, |
| 180 | + match[0], |
| 181 | + this.document, |
| 182 | + this.offset, |
| 183 | + match[0] // may be overwritten later with a post-processed value |
| 184 | + ); |
| 185 | + this.offset += match[0].length; |
| 186 | + return token; |
| 187 | + } |
| 188 | + |
| 189 | + private error(msg: string): never { |
| 190 | + const pos = this.document.positionAt(this.offset); |
| 191 | + throw new ParserError( |
| 192 | + `${this.document.fileName}:${pos.line + 1}:${pos.character + 1}: ${msg}`); |
| 193 | + } |
| 194 | +} |
| 195 | + |
| 196 | +export function regexpPrepend(prefix: string, re: RegExp): RegExp { |
| 197 | + return RegExp(prefix + re.source, re.flags); |
| 198 | +} |
| 199 | + |
| 200 | +function unescape(s: string): string { |
| 201 | + return s.replace(/\\(.)/g, '$1'); |
| 202 | +} |
0 commit comments