Skip to content

Commit c87888b

Browse files
committed
Add cmakeParser.ts to parse CMakeLists.txt files
This parser accepts the grammar described in [cmake-language(7)](https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#syntax). It is somewhat more lenient than that parser, in that it permits comments in places that the official grammar would reject them.
1 parent 91d42f6 commit c87888b

File tree

1 file changed

+202
-0
lines changed

1 file changed

+202
-0
lines changed

src/cmakeParser.ts

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
import * as vscode from "vscode";
2+
3+
class ParserError extends Error {}
4+
5+
export interface CMakeAST {
6+
document: vscode.TextDocument;
7+
invocations: CommandInvocationAST[];
8+
}
9+
10+
export interface CommandInvocationAST {
11+
command: Token;
12+
lparen: Token;
13+
args: Token[];
14+
rparen: Token;
15+
}
16+
17+
export class Token {
18+
constructor(
19+
public type: TokenType,
20+
public raw: string,
21+
public document: vscode.TextDocument,
22+
public offset: number,
23+
public value: string
24+
) { }
25+
26+
public get endOffset(): number {
27+
return this.offset + this.raw.length;
28+
}
29+
}
30+
31+
interface TokenType {
32+
name: string;
33+
re: RegExp;
34+
}
35+
36+
const BRACKETED_RE = /\[(=*)\[.*\]\1\]/s;
37+
const SPACE: TokenType = { name: 'SPACE', re: /[ \t]+/ };
38+
const NEWLINE: TokenType = { name: 'NEWLINE', re: /\n/ };
39+
const IDENT: TokenType = { name: 'IDENT', re: /[A-Za-z_][A-Za-z0-9_]*/ };
40+
const LPAREN: TokenType = { name: 'LPAREN', re: /\(/ };
41+
const RPAREN: TokenType = { name: 'RPAREN', re: /\)/ };
42+
const BRACKETED: TokenType = { name: 'BRACKETED', re: BRACKETED_RE };
43+
const QUOTED: TokenType = { name: 'QUOTED', re: /"(?:\\.|[^"])*"/s };
44+
const UNQUOTED: TokenType = { name: 'UNQUOTED', re: /(?:\\.|[^\s()#"\\'])+/s };
45+
// TODO: "legacy" identifiers with quotes in them
46+
const LINE_COMMENT: TokenType = { name: 'LINE_COMMENT', re: /#.*\n/ };
47+
const BRACKETED_COMMENT: TokenType = { name: 'BRACKETED_COMMENT', re: regexpPrepend('#', BRACKETED_RE) };
48+
const EOF: TokenType = { name: 'EOF', re: /$/ };
49+
const SPACE_TYPES: TokenType[] = [SPACE, NEWLINE];
50+
const COMMENT_TYPES: TokenType[] = [LINE_COMMENT, BRACKETED_COMMENT];
51+
const ARG_TYPES: TokenType[] = [
52+
LPAREN, RPAREN, UNQUOTED, QUOTED, BRACKETED
53+
];
54+
55+
export class CMakeParser {
56+
private text: string;
57+
private offset: number;
58+
private pushbackBuffer: Token[] = [];
59+
60+
constructor(private document: vscode.TextDocument, offset?: number) {
61+
this.offset = offset ?? 0;
62+
this.text = document.getText();
63+
}
64+
65+
public parseDocument(): CMakeAST {
66+
return {
67+
document: this.document,
68+
invocations: Array.from(this.parseCommandInvocations())
69+
};
70+
}
71+
72+
private *parseCommandInvocations(): Generator<CommandInvocationAST> {
73+
// Slightly more permissive in terms of comment placement than the
74+
// official grammar.
75+
while (true) {
76+
const next = this.skipSpaceAndComments(IDENT, EOF);
77+
if (next.type === EOF) {
78+
return;
79+
}
80+
this.pushbackBuffer.push(next);
81+
yield this.parseCommandInvocation();
82+
}
83+
}
84+
85+
/**
86+
* Parse one Command Invocation. Call in a loop to parse an entire file
87+
*/
88+
public parseCommandInvocation(): CommandInvocationAST {
89+
const command = this.skipSpace(IDENT);
90+
const lparen = this.skipSpace(LPAREN);
91+
const args: Token[] = [];
92+
let depth = 1;
93+
let token;
94+
while (depth) {
95+
token = this.skipSpaceAndComments(...ARG_TYPES);
96+
switch (token.type) {
97+
case LPAREN:
98+
depth++; break;
99+
case RPAREN:
100+
depth--; break;
101+
case UNQUOTED: case QUOTED: case BRACKETED:
102+
args.push(token); break;
103+
default:
104+
this.error(`unexpected ${token.type.name} ${token.raw}`);
105+
}
106+
}
107+
const rparen = token as Token;
108+
this.assignArgumentValues(args);
109+
110+
return { command, args, lparen, rparen };
111+
}
112+
113+
private assignArgumentValues(args: Token[]) {
114+
for (const arg of args) {
115+
switch (arg.type) {
116+
case QUOTED:
117+
arg.value = unescape(arg.raw.slice(1, -1)); break;
118+
case BRACKETED:
119+
arg.value = arg.raw.replace(/^\[(=*)\[(.*)\]\1\]$/, '$1'); break;
120+
case UNQUOTED: default:
121+
arg.value = unescape(arg.raw); break;
122+
}
123+
}
124+
}
125+
126+
private skipSpace(...expect: TokenType[]): Token {
127+
return this.skipTokens(SPACE_TYPES, expect);
128+
}
129+
130+
private skipSpaceAndComments(...expect: TokenType[]): Token {
131+
return this.skipTokens([...SPACE_TYPES, ...COMMENT_TYPES], expect);
132+
}
133+
134+
private skipTokens(skip: TokenType[], expect: TokenType[]): Token {
135+
expect = [...expect, ...skip];
136+
let token;
137+
do {
138+
token = this.nextToken(...expect);
139+
} while (skip.includes(token.type));
140+
141+
return token;
142+
}
143+
144+
private nextToken(...expect: TokenType[]): Token {
145+
let token: Token | null | undefined = this.pushbackBuffer.pop();
146+
if (token) {
147+
if (expect.includes(token.type)) {
148+
return token;
149+
}
150+
} else {
151+
token = this.scanToken(...expect);
152+
if (token) {
153+
return token;
154+
}
155+
}
156+
if (this.offset === this.text.length) {
157+
this.error(`unexpected EOF`);
158+
}
159+
this.error(`unexpected ${this.text[this.offset]}`);
160+
}
161+
162+
private scanToken(...expect: TokenType[]): Token | null {
163+
for (const matcher of expect) {
164+
const token = this.tryMatch(matcher);
165+
if (token !== null) {
166+
return token;
167+
}
168+
}
169+
return null;
170+
}
171+
172+
private tryMatch(matcher: TokenType): Token | null {
173+
const re = regexpPrepend('^', matcher.re);
174+
const match = re.exec(this.text.slice(this.offset));
175+
if (!match) {
176+
return null;
177+
}
178+
const token = new Token(
179+
matcher,
180+
match[0],
181+
this.document,
182+
this.offset,
183+
match[0] // may be overwritten later with a post-processed value
184+
);
185+
this.offset += match[0].length;
186+
return token;
187+
}
188+
189+
private error(msg: string): never {
190+
const pos = this.document.positionAt(this.offset);
191+
throw new ParserError(
192+
`${this.document.fileName}:${pos.line + 1}:${pos.character + 1}: ${msg}`);
193+
}
194+
}
195+
196+
export function regexpPrepend(prefix: string, re: RegExp): RegExp {
197+
return RegExp(prefix + re.source, re.flags);
198+
}
199+
200+
function unescape(s: string): string {
201+
return s.replace(/\\(.)/g, '$1');
202+
}

0 commit comments

Comments
 (0)