Skip to content

Commit 518d5eb

Browse files
committed
Regenerate the c++ keywords classifier
1 parent 8adc36e commit 518d5eb

23 files changed

+1002
-692
lines changed

CMakeLists.txt

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -143,26 +143,6 @@ if (CXX_BUILD_TESTS)
143143
endif()
144144

145145

146-
if (NOT KWGEN_EXECUTABLE AND CMAKE_SYSTEM_NAME STREQUAL "WASI")
147-
148-
ExternalProject_Add(
149-
kwgen_host
150-
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tools/kwgen
151-
INSTALL_COMMAND ""
152-
)
153-
154-
ExternalProject_Get_property(kwgen_host BINARY_DIR)
155-
156-
add_executable(kwgen IMPORTED)
157-
set_target_properties(kwgen PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/kwgen")
158-
add_dependencies(kwgen kwgen_host)
159-
160-
else()
161-
162-
add_subdirectory(tools/kwgen)
163-
164-
endif()
165-
166146
add_subdirectory(src)
167147

168148
if (CXX_BUILD_TESTS)

Dockerfile.emsdk

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ RUN apt-get update && apt-get install -y \
55
wget \
66
unzip
77

8-
COPY tools/kwgen/kwgen.cc /tmp/kwgen.cc
9-
10-
RUN g++ -std=c++20 /tmp/kwgen.cc -o /usr/bin/kwgen
11-
128
RUN wget https://github.com/google/flatbuffers/releases/download/v25.1.24/Linux.flatc.binary.clang++-18.zip && \
139
unzip Linux.flatc.binary.clang++-18.zip && \
1410
mv flatc /usr/bin/flatc && \

Dockerfile.wasi

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@ RUN apt-get update && apt-get install -y \
88
unzip \
99
ninja-build
1010

11-
COPY tools/kwgen/kwgen.cc /tmp/kwgen.cc
12-
13-
RUN g++ -std=c++20 /tmp/kwgen.cc -o /usr/bin/kwgen
14-
1511
RUN wget https://github.com/google/flatbuffers/releases/download/v25.1.24/Linux.flatc.binary.clang++-18.zip && \
1612
unzip Linux.flatc.binary.clang++-18.zip && \
1713
mv flatc /usr/bin/flatc && \

packages/cxx-frontend/.prettierrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

packages/cxx-frontend/scripts/build.js

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ async function dockerBuild() {
104104

105105
const cmakeOptions = [
106106
"-DCMAKE_INSTALL_PREFIX=build.em/install/usr",
107-
"-DKWGEN_EXECUTABLE=/usr/bin/kwgen",
108107
"-DFLATBUFFERS_FLATC_EXECUTABLE=/usr/bin/flatc",
109108
"-S .",
110109
"-B build.em",
@@ -136,13 +135,12 @@ async function dockerBuild() {
136135
await $`${docker} run -t --rm -u ${user} -v ${emscriptenCacheDir}:/emsdk/upstream/emscripten/cache/ -v ${projectRootSourcePath}:/code -w /code cxx-emsdk cmake --build build.em`;
137136
}
138137

139-
async function emsdkBuild({ cmake, emcmake, flatc, kwgen }) {
138+
async function emsdkBuild({ cmake, emcmake, flatc }) {
140139
const CMAKE_INTERPROCEDURAL_OPTIMIZATION =
141140
$.env.CMAKE_INTERPROCEDURAL_OPTIMIZATION ?? "ON";
142141

143142
const cmakeOptions = [
144143
`-DCMAKE_INSTALL_PREFIX=${projectRootSourcePath}/build.em/install/usr`,
145-
`-DKWGEN_EXECUTABLE=${kwgen}`,
146144
`-DFLATBUFFERS_FLATC_EXECUTABLE=${flatc}`,
147145
`-S ${projectRootSourcePath}`,
148146
`-B ${projectRootSourcePath}/build.em`,
@@ -193,16 +191,9 @@ async function detectEmsdk() {
193191
(await which("flatc", { nothrow: true }));
194192
if (!flatc) return null;
195193

196-
const kwgen =
197-
argv.kwgen ??
198-
$.env.KWGEN_EXECUTABLE ??
199-
(await which("kwgen", { nothrow: true }));
200-
if (!kwgen) return null;
201-
202194
return {
203195
cmake,
204196
emcmake,
205197
flatc,
206-
kwgen,
207198
};
208199
}

packages/cxx-gen-ast/.prettierrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

packages/cxx-gen-ast/src/gen.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import { gen_ast_slot_ts } from "./gen_ast_slot_ts.ts";
4444
import { gen_token_fwd_h } from "./gen_token_fwd_h.ts";
4545
import { gen_tokenkind_ts } from "./gen_tokenkind_ts.ts";
4646
import { gen_keywords_kwgen } from "./gen_keywords_kwgen.ts";
47+
import { gen_pp_keywords_kwgen } from "./gen_pp_keywords_kwgen.ts";
4748
import { gen_ast_pretty_printer_h } from "./gen_ast_pretty_printer_h.ts";
4849
import { gen_ast_pretty_printer_cc } from "./gen_ast_pretty_printer_cc.ts";
4950

@@ -150,7 +151,10 @@ gen_tokenkind_ts({
150151
output: path.join(outdir, "packages/cxx-frontend/src/TokenKind.ts"),
151152
});
152153
gen_keywords_kwgen({
153-
output: path.join(outdir, "src/parser/cxx/keywords.kwgen"),
154+
output: path.join(outdir, "src/parser/cxx/private/keywords-priv.h"),
155+
});
156+
gen_pp_keywords_kwgen({
157+
output: path.join(outdir, "src/parser/cxx/private/pp_directives-priv.h"),
154158
});
155159

156160
child_process.execSync("clang-format -i *.h *.cc", {

packages/cxx-gen-ast/src/gen_keywords_kwgen.ts

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,32 +18,34 @@
1818
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1919
// SOFTWARE.
2020

21-
import * as fs from "fs";
2221
import * as tokens from "./tokens.ts";
22+
import kwgen from "./kwgen.ts";
23+
import { cpy_header } from "./cpy_header.ts";
2324

2425
export function gen_keywords_kwgen({ output }: { output: string }) {
25-
const code: string[] = [];
26-
const emit = (line = "") => code.push(line);
27-
2826
const isContextKeyword = (kw: string) => {
2927
return ["final", "override", "import", "module"].includes(kw);
3028
};
3129

30+
const keywords: string[] = [];
31+
3232
tokens.KEYWORDS.filter((kw) => !isContextKeyword(kw)).forEach((tk) =>
33-
emit(tk),
33+
keywords.push(tk),
3434
);
3535

36-
emit();
37-
tokens.TOKEN_ALIASES.forEach(([tk]) => emit(tk));
38-
39-
const out = `%no-enums
40-
%token-prefix=cxx::TokenKind::T_
41-
%token-type=cxx::TokenKind
42-
%toupper
43-
44-
%%
45-
${code.join("\n")}
46-
`;
47-
48-
fs.writeFileSync(output, out);
36+
tokens.TOKEN_ALIASES.forEach(([tk]) => {
37+
keywords.push(tk);
38+
});
39+
40+
kwgen({
41+
copyright: cpy_header,
42+
output,
43+
keywords,
44+
tokenPrefix: "cxx::TokenKind::T_",
45+
tokenType: "cxx::TokenKind",
46+
toUpper: true,
47+
noEnums: true,
48+
defaultToken: "cxx::TokenKind::T_IDENTIFIER",
49+
classifier: "classify",
50+
});
4951
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) 2025 Roberto Raggi <[email protected]>
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19+
// SOFTWARE.
20+
21+
import kwgen from "./kwgen.ts";
22+
import { cpy_header } from "./cpy_header.ts";
23+
24+
export function gen_pp_keywords_kwgen({ output }: { output: string }) {
25+
kwgen({
26+
copyright: cpy_header,
27+
output,
28+
keywords: [
29+
"define",
30+
"elif",
31+
"elifdef",
32+
"elifndef",
33+
"else",
34+
"endif",
35+
"error",
36+
"if",
37+
"ifdef",
38+
"ifndef",
39+
"include_next",
40+
"include",
41+
"line",
42+
"pragma",
43+
"undef",
44+
"warning",
45+
],
46+
tokenPrefix: "PreprocessorDirectiveKind::T_",
47+
tokenType: "PreprocessorDirectiveKind",
48+
toUpper: true,
49+
noEnums: false,
50+
defaultToken: "PreprocessorDirectiveKind::T_IDENTIFIER",
51+
classifier: "classifyDirective",
52+
});
53+
}

packages/cxx-gen-ast/src/kwgen.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { writeFileSync } from "node:fs";
33
interface Options {
44
keywords: string[];
55
output: string | ((code: string) => void);
6+
copyright?: string;
67
noEnums: boolean;
78
tokenPrefix: string;
89
tokenType: string;
@@ -12,16 +13,50 @@ interface Options {
1213
}
1314

1415
export default function kwgen(options: Options) {
15-
const { keywords, output, tokenType, defaultToken, classifier } = options;
16+
const {
17+
copyright,
18+
keywords,
19+
output,
20+
classifier,
21+
defaultToken,
22+
noEnums,
23+
tokenPrefix,
24+
tokenType,
25+
} = options;
1626

1727
let out: string[] = [];
1828
const emit = (s: string) => out.push(s);
1929

30+
if (copyright !== undefined) {
31+
emit(copyright);
32+
}
33+
2034
const keywordsByLength = Map.groupBy(keywords, (s) => s.length);
2135

2236
const lengths = Array.from(keywordsByLength.keys());
2337
lengths.sort((a, b) => a - b);
2438

39+
const getTokenName = (s: string) => {
40+
const x = options.toUpper ? s.toUpperCase() : s;
41+
let name = `${tokenPrefix}${x}`;
42+
const scopeIndex = name.lastIndexOf("::");
43+
if (scopeIndex !== -1) {
44+
name = `${name.slice(scopeIndex + 2)}`;
45+
}
46+
return name;
47+
};
48+
49+
if (!noEnums) {
50+
emit(`enum class ${tokenType} {`);
51+
emit(` ${getTokenName(defaultToken)},`);
52+
keywords.forEach((kw) => {
53+
const name = getTokenName(kw);
54+
emit(` ${name},`);
55+
});
56+
emit(`};`);
57+
emit("");
58+
}
59+
2560
for (const length of lengths) {
2661
const items = keywordsByLength.get(length)!;
2762

0 commit comments

Comments
 (0)