Skip to content

Commit 4da9adc

Browse files
committed
chore: Add tokens and source files to the serialized AST
1 parent 84d7aef commit 4da9adc

File tree

11 files changed

+1498
-2021
lines changed

11 files changed

+1498
-2021
lines changed

packages/cxx-gen-ast/src/gen_ast_decoder_cc.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ export function gen_ast_decoder_cc({
4848
const className = makeClassName(base);
4949
emit();
5050
emit(
51-
` auto ASTDecoder::decode${className}(const void* ptr, io::${className} type) -> ${base}* {`,
51+
` auto ASTDecoder::decode${className}(const void* ptr, io::${className} type) -> ${base}* {`
5252
);
5353
emit(` switch (type) {`);
5454
nodes.forEach(({ name }) => {
5555
const className = makeClassName(name);
5656
emit(` case io::${baseClassName}_${className}:`);
5757
emit(
58-
` return decode${className}(reinterpret_cast<const io::${className}*>(ptr));`,
58+
` return decode${className}(reinterpret_cast<const io::${className}*>(ptr));`
5959
);
6060
});
6161
emit(` default:`);
@@ -70,7 +70,7 @@ export function gen_ast_decoder_cc({
7070
const className = makeClassName(name);
7171
emit();
7272
emit(
73-
` auto ASTDecoder::decode${className}(const io::${className}* node) -> ${name}* {`,
73+
` auto ASTDecoder::decode${className}(const io::${className}* node) -> ${name}* {`
7474
);
7575
emit(` if (!node) return nullptr;`);
7676
emit();
@@ -135,6 +135,8 @@ export function gen_ast_decoder_cc({
135135
} else if (m.kind == "attribute" && m.type === "TokenKind") {
136136
emit(` ast->${m.name} = static_cast<TokenKind>(`);
137137
emit(` node->${snakeName}());`);
138+
} else if (m.kind == "token") {
139+
emit(` ast->${m.name} = SourceLocation(node->${snakeName}());`);
138140
}
139141
});
140142
emit(` return ast;`);

packages/cxx-gen-ast/src/gen_ast_encoder_cc.ts

Lines changed: 24 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,8 @@ export function gen_ast_encoder_cc({
172172
emit(` static_cast<std::uint32_t>(ast->${m.name}));`);
173173
});
174174
} else if (m.kind == "token") {
175-
emit(` auto ${m.name} = encodeSourceLocation(ast->${m.name});`);
176175
finalizers.push(() => {
177-
emit(` builder.add_${fieldName}(${m.name}.o);`);
176+
emit(` builder.add_${fieldName}(ast->${m.name}.index());`);
178177
});
179178
}
180179
});
@@ -201,8 +200,9 @@ export function gen_ast_encoder_cc({
201200
#include <cxx/literals.h>
202201
#include <cxx/names.h>
203202
#include <cxx/translation_unit.h>
204-
#include <format>
203+
#include <cxx/preprocessor.h>
205204
205+
#include <format>
206206
#include <algorithm>
207207
208208
namespace cxx {
@@ -214,17 +214,32 @@ auto ASTEncoder::operator()(TranslationUnit* unit) -> std::span<const std::uint8
214214
Table<StringLiteral> stringLiterals;
215215
Table<IntegerLiteral> integerLiterals;
216216
Table<FloatLiteral> floatLiterals;
217-
SourceFiles sourceFiles;
218-
SourceLines sourceLines;
219217
220218
std::swap(unit_, unit);
221219
std::swap(identifiers_, identifiers);
222220
std::swap(charLiterals_, charLiterals);
223221
std::swap(stringLiterals_, stringLiterals);
224222
std::swap(integerLiterals_, integerLiterals);
225223
std::swap(floatLiterals_, floatLiterals);
226-
std::swap(sourceFiles_, sourceFiles);
227-
std::swap(sourceLines_, sourceLines);
224+
225+
std::vector<flatbuffers::Offset<io::Source>> sources;
226+
for (const auto& source : unit_->preprocessor()->sources()) {
227+
auto file_name = fbb_.CreateString(source.fileName);
228+
std::vector<int> lineOffsets(source.lineOffsets.begin(),
229+
source.lineOffsets.end());
230+
auto line_offsets = fbb_.CreateVector(lineOffsets);
231+
sources.push_back(io::CreateSource(fbb_, file_name, line_offsets));
232+
}
233+
234+
auto source_list = fbb_.CreateVector(sources);
235+
236+
std::vector<std::uint64_t> tokens;
237+
for (std::uint32_t i = 0; i < unit_->tokenCount(); ++i) {
238+
const auto& token = unit_->tokenAt(SourceLocation(i));
239+
tokens.push_back(token.raw());
240+
}
241+
242+
auto token_list = fbb_.CreateVector(tokens);
228243
229244
auto [unitOffset, unitType] = acceptUnit(unit_->ast());
230245
@@ -234,15 +249,15 @@ auto ASTEncoder::operator()(TranslationUnit* unit) -> std::span<const std::uint8
234249
builder.add_unit(unitOffset);
235250
builder.add_unit_type(static_cast<io::Unit>(unitType));
236251
builder.add_file_name(file_name);
252+
builder.add_source_list(source_list);
253+
builder.add_token_list(token_list);
237254
238255
std::swap(unit_, unit);
239256
std::swap(identifiers_, identifiers);
240257
std::swap(charLiterals_, charLiterals);
241258
std::swap(stringLiterals_, stringLiterals);
242259
std::swap(integerLiterals_, integerLiterals);
243260
std::swap(floatLiterals_, floatLiterals);
244-
std::swap(sourceFiles_, sourceFiles);
245-
std::swap(sourceLines_, sourceLines);
246261
247262
fbb_.Finish(builder.Finish(), io::SerializedUnitIdentifier());
248263
@@ -258,47 +273,6 @@ auto ASTEncoder::accept(AST* ast) -> flatbuffers::Offset<> {
258273
return offset;
259274
}
260275
261-
auto ASTEncoder::encodeSourceLocation(const SourceLocation& loc)
262-
-> flatbuffers::Offset<> {
263-
if (!loc) {
264-
return {};
265-
}
266-
267-
const auto start = unit_->tokenStartPosition(loc);
268-
269-
flatbuffers::Offset<io::SourceLine> sourceLineOffset;
270-
271-
auto key = std::tuple(start.fileName, start.line);
272-
273-
if (sourceLines_.contains(key)) {
274-
sourceLineOffset = sourceLines_.at(key).o;
275-
} else {
276-
flatbuffers::Offset<flatbuffers::String> fileNameOffset;
277-
278-
if (sourceFiles_.contains(start.fileName)) {
279-
fileNameOffset = sourceFiles_.at(start.fileName);
280-
} else {
281-
fileNameOffset = fbb_.CreateString(start.fileName);
282-
sourceFiles_.emplace(start.fileName, fileNameOffset.o);
283-
}
284-
285-
io::SourceLineBuilder sourceLineBuilder{fbb_};
286-
sourceLineBuilder.add_file_name(fileNameOffset);
287-
sourceLineBuilder.add_line(start.line);
288-
sourceLineOffset = sourceLineBuilder.Finish();
289-
sourceLines_.emplace(std::move(key), sourceLineOffset.o);
290-
}
291-
292-
io::SourceLocationBuilder sourceLocationBuilder{fbb_};
293-
sourceLocationBuilder.add_source_line(sourceLineOffset);
294-
sourceLocationBuilder.add_column(start.column);
295-
296-
auto offset = sourceLocationBuilder.Finish();
297-
298-
return offset.Union();
299-
}
300-
301-
302276
${code.join("\n")}
303277
304278
} // namespace cxx

packages/cxx-gen-ast/src/gen_ast_encoder_h.ts

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,12 @@ export function gen_ast_encoder_h({
4343
emit(` using Table = std::unordered_map<const T*,`);
4444
emit(` flatbuffers::Offset<flatbuffers::String>>;`);
4545
emit();
46-
emit(` using SourceFiles = std::unordered_map<`);
47-
emit(` std::string_view,`);
48-
emit(` flatbuffers::Offset<flatbuffers::String>>;`);
49-
emit();
50-
emit(` using SourceLines = std::map<`);
51-
emit(` std::tuple<std::string_view, std::uint32_t>,`);
52-
emit(` flatbuffers::Offset<flatbuffers::String>>;`);
53-
emit();
5446
emit(` TranslationUnit* unit_ = nullptr;`);
5547
emit(` Table<Identifier> identifiers_;`);
5648
emit(` Table<CharLiteral> charLiterals_;`);
5749
emit(` Table<StringLiteral> stringLiterals_;`);
5850
emit(` Table<IntegerLiteral> integerLiterals_;`);
5951
emit(` Table<FloatLiteral> floatLiterals_;`);
60-
emit(` SourceFiles sourceFiles_;`);
61-
emit(` SourceLines sourceLines_;`);
6252
emit(` flatbuffers::FlatBufferBuilder fbb_;`);
6353
emit(` flatbuffers::Offset<> offset_;`);
6454
emit(` std::uint32_t type_ = 0;`);
@@ -70,9 +60,6 @@ export function gen_ast_encoder_h({
7060
emit(` -> std::span<const std::uint8_t>;`);
7161

7262
emit(`private:`);
73-
emit(` auto encodeSourceLocation(const SourceLocation& loc)`);
74-
emit(` -> flatbuffers::Offset<>;`);
75-
emit();
7663
emit(` auto accept(AST* ast) -> flatbuffers::Offset<>;`);
7764
by_base.forEach((_nodes, base) => {
7865
if (base === "AST") return;

packages/cxx-gen-ast/src/gen_ast_fbs.ts

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,10 @@ export function gen_ast_fbs({ ast, output }: { ast: AST; output: string }) {
9595
case "node-list":
9696
break;
9797
case "token":
98-
emit(` ${fieldName}: SourceLocation;`);
98+
emit(` ${fieldName}: uint32;`);
9999
break;
100100
case "token-list":
101-
emit(` ${fieldName}: [SourceLocation];`);
101+
throw new Error("unexpected token-list");
102102
break;
103103
case "attribute": {
104104
break;
@@ -115,14 +115,9 @@ export function gen_ast_fbs({ ast, output }: { ast: AST; output: string }) {
115115
const out = `${cpy_header}
116116
namespace cxx.io;
117117
118-
table SourceLine {
118+
table Source {
119119
file_name: string;
120-
line: uint32;
121-
}
122-
123-
table SourceLocation {
124-
source_line: SourceLine;
125-
column: uint32;
120+
line_offsets: [int];
126121
}
127122
128123
${code.join("\n")}
@@ -131,6 +126,8 @@ table SerializedUnit {
131126
version: uint32;
132127
unit: Unit;
133128
file_name: string;
129+
token_list: [uint64];
130+
source_list: [Source];
134131
}
135132
136133
root_type SerializedUnit;

0 commit comments

Comments
 (0)