Skip to content

Commit 133717c

Browse files
authored
Eliminate NodeLocConverter (#4870)
I'm looking at eliminating `DiagnosticConverter`. This change removes `NodeLocConverter` (albeit adding `UnitAndImportsDiagnosticConverter`), and in doing so, refactors lex conversion functions to extract them out from the `DiagnosticConverter` functions. I'll be following up with changes that collapse `DiagnosticConverter` logic into `DiagnosticEmitter` locations. The intent is that we shouldn't need separate ownership of both types.
1 parent 16b2caf commit 133717c

15 files changed

+153
-178
lines changed

toolchain/check/BUILD

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ cc_library(
8989
"//toolchain/lex:tokenized_buffer",
9090
"//toolchain/parse:node_kind",
9191
"//toolchain/parse:tree",
92-
"//toolchain/parse:tree_node_diagnostic_converter",
9392
"//toolchain/sem_ir:file",
9493
"//toolchain/sem_ir:formatter",
9594
"//toolchain/sem_ir:inst",
@@ -158,7 +157,6 @@ cc_library(
158157
"//toolchain/lex:tokenized_buffer",
159158
"//toolchain/parse:node_kind",
160159
"//toolchain/parse:tree",
161-
"//toolchain/parse:tree_node_diagnostic_converter",
162160
"//toolchain/sem_ir:entry_point",
163161
"//toolchain/sem_ir:file",
164162
"//toolchain/sem_ir:inst",
@@ -273,7 +271,7 @@ cc_library(
273271
"//common:raw_string_ostream",
274272
"//toolchain/diagnostics:diagnostic_emitter",
275273
"//toolchain/lex:token_index",
276-
"//toolchain/parse:tree_node_diagnostic_converter",
274+
"//toolchain/parse:tree",
277275
"//toolchain/sem_ir:file",
278276
"//toolchain/sem_ir:stringify_type",
279277
"@llvm-project//llvm:Support",

toolchain/check/check.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ struct Unit {
3030
SemIR::File* sem_ir;
3131

3232
// Diagnostic converters.
33-
Parse::NodeLocConverter* node_converter;
3433
SemIRDiagnosticConverter* sem_ir_converter;
3534
};
3635

toolchain/check/check_unit.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,8 @@ auto CheckUnit::ProcessNodeIds() -> bool {
363363

364364
// On crash, report which token we were handling.
365365
PrettyStackTraceFunction node_dumper([&](llvm::raw_ostream& output) {
366-
auto converted = unit_and_imports_->unit->node_converter->ConvertLoc(
367-
node_id, [](DiagnosticLoc, const DiagnosticBase<>&) {});
366+
const auto& tree = unit_and_imports_->unit->get_parse_tree_and_subtrees();
367+
auto converted = tree.NodeToDiagnosticLoc(node_id, /*token_only=*/false);
368368
converted.loc.FormatLocation(output);
369369
output << "checking " << context_.parse_tree().node_kind(node_id) << "\n";
370370
// Crash output has a tab indent; try to indent slightly past that.

toolchain/check/check_unit.h

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#include "llvm/ADT/SmallVector.h"
1010
#include "toolchain/check/check.h"
1111
#include "toolchain/check/context.h"
12-
#include "toolchain/parse/tree_node_diagnostic_converter.h"
1312
#include "toolchain/sem_ir/ids.h"
1413

1514
namespace Carbon::Check {
@@ -44,14 +43,35 @@ struct PackageImports {
4443
llvm::SmallVector<Import> imports;
4544
};
4645

46+
// Converts a `NodeId` to a diagnostic location for `UnitAndImports`.
47+
class UnitAndImportsDiagnosticConverter
48+
: public DiagnosticConverter<Parse::NodeId> {
49+
public:
50+
explicit UnitAndImportsDiagnosticConverter(
51+
llvm::function_ref<const Parse::TreeAndSubtrees&()>
52+
get_parse_tree_and_subtrees)
53+
: get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees) {}
54+
55+
auto ConvertLoc(Parse::NodeId node_id, ContextFnT /*context_fn*/) const
56+
-> ConvertedDiagnosticLoc override {
57+
return get_parse_tree_and_subtrees_().NodeToDiagnosticLoc(
58+
node_id, /*token_only=*/false);
59+
}
60+
61+
private:
62+
llvm::function_ref<const Parse::TreeAndSubtrees&()>
63+
get_parse_tree_and_subtrees_;
64+
};
65+
4766
// Contains information accumulated while checking a `Unit` (primarily import
4867
// information), in addition to the `Unit` itself.
4968
struct UnitAndImports {
5069
explicit UnitAndImports(SemIR::CheckIRId check_ir_id, Unit& unit)
5170
: check_ir_id(check_ir_id),
5271
unit(&unit),
5372
err_tracker(*unit.consumer),
54-
emitter(*unit.node_converter, err_tracker) {}
73+
converter(unit.get_parse_tree_and_subtrees),
74+
emitter(converter, err_tracker) {}
5575

5676
auto parse_tree() -> const Parse::Tree& { return unit->sem_ir->parse_tree(); }
5777
auto source() -> const SourceBuffer& {
@@ -63,7 +83,8 @@ struct UnitAndImports {
6383

6484
// Emitter information.
6585
ErrorTrackingDiagnosticConsumer err_tracker;
66-
DiagnosticEmitter<Parse::NodeLoc> emitter;
86+
UnitAndImportsDiagnosticConverter converter;
87+
DiagnosticEmitter<Parse::NodeId> emitter;
6788

6889
// List of the outgoing imports. If a package includes unavailable library
6990
// imports, it has an entry with has_load_error set. Invalid imports (for

toolchain/check/diagnostic_helpers.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
#include "llvm/ADT/APSInt.h"
99
#include "toolchain/parse/node_ids.h"
10-
#include "toolchain/parse/tree_node_diagnostic_converter.h"
1110
#include "toolchain/sem_ir/ids.h"
1211

1312
namespace Carbon::Check {

toolchain/check/sem_ir_diagnostic_converter.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,11 @@ auto SemIRDiagnosticConverter::ConvertArg(llvm::Any arg) const -> llvm::Any {
198198
auto SemIRDiagnosticConverter::ConvertLocInFile(const SemIR::File* sem_ir,
199199
Parse::NodeId node_id,
200200
bool token_only,
201-
ContextFnT context_fn) const
201+
ContextFnT /*context_fn*/) const
202202
-> ConvertedDiagnosticLoc {
203-
return node_converters_[sem_ir->check_ir_id().index]->ConvertLoc(
204-
Parse::NodeLoc(node_id, token_only), context_fn);
203+
const auto& tree_and_subtrees =
204+
imported_trees_and_subtrees_[sem_ir->check_ir_id().index]();
205+
return tree_and_subtrees.NodeToDiagnosticLoc(node_id, token_only);
205206
}
206207

207208
} // namespace Carbon::Check

toolchain/check/sem_ir_diagnostic_converter.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,21 @@
99
#include "toolchain/check/diagnostic_helpers.h"
1010
#include "toolchain/diagnostics/diagnostic_converter.h"
1111
#include "toolchain/lex/token_index.h"
12-
#include "toolchain/parse/tree_node_diagnostic_converter.h"
12+
#include "toolchain/parse/tree_and_subtrees.h"
1313
#include "toolchain/sem_ir/file.h"
1414

1515
namespace Carbon::Check {
1616

1717
// Handles the transformation of a SemIRLoc to a DiagnosticLoc.
1818
class SemIRDiagnosticConverter : public DiagnosticConverter<SemIRLoc> {
1919
public:
20+
using TreeFnT = llvm::function_ref<const Parse::TreeAndSubtrees&()>;
21+
2022
explicit SemIRDiagnosticConverter(
21-
llvm::ArrayRef<Parse::NodeLocConverter*> node_converters,
23+
llvm::ArrayRef<TreeFnT> imported_trees_and_subtrees,
2224
const SemIR::File* sem_ir)
23-
: node_converters_(node_converters), sem_ir_(sem_ir) {}
25+
: imported_trees_and_subtrees_(imported_trees_and_subtrees),
26+
sem_ir_(sem_ir) {}
2427

2528
// Implements `DiagnosticConverter::ConvertLoc`. Adds context for any imports
2629
// used in the current SemIR to get to the underlying code.
@@ -52,7 +55,7 @@ class SemIRDiagnosticConverter : public DiagnosticConverter<SemIRLoc> {
5255
-> ConvertedDiagnosticLoc;
5356

5457
// Converters for each SemIR.
55-
llvm::ArrayRef<Parse::NodeLocConverter*> node_converters_;
58+
llvm::ArrayRef<TreeFnT> imported_trees_and_subtrees_;
5659

5760
// The current SemIR being processed.
5861
const SemIR::File* sem_ir_;

toolchain/driver/compile_subcommand.cpp

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -333,12 +333,15 @@ class CompilationUnit {
333333
// Parses tokens. Returns true on success.
334334
auto RunParse() -> void;
335335

336-
auto PreCheck() -> Parse::NodeLocConverter&;
336+
// Prepares per-IR lazy fetch functions which may come up in cross-IR
337+
// diagnostics.
338+
auto PreCheck() -> llvm::function_ref<const Parse::TreeAndSubtrees&()>;
337339

338340
// Returns information needed to check this unit.
339-
auto GetCheckUnit(SemIR::CheckIRId check_ir_id,
340-
llvm::ArrayRef<Parse::NodeLocConverter*> node_converters)
341-
-> Check::Unit;
341+
auto GetCheckUnit(
342+
SemIR::CheckIRId check_ir_id,
343+
llvm::ArrayRef<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
344+
all_trees_and_subtrees) -> Check::Unit;
342345

343346
// Runs post-check logic. Returns true if checking succeeded for the IR.
344347
auto PostCheck() -> void;
@@ -411,7 +414,6 @@ class CompilationUnit {
411414
std::optional<Parse::TreeAndSubtrees> parse_tree_and_subtrees_;
412415
std::optional<std::function<const Parse::TreeAndSubtrees&()>>
413416
get_parse_tree_and_subtrees_;
414-
std::optional<Parse::NodeLocConverter> node_converter_;
415417
std::optional<Check::SemIRDiagnosticConverter> sem_ir_converter_;
416418
std::optional<SemIR::File> sem_ir_;
417419
std::unique_ptr<llvm::LLVMContext> llvm_context_;
@@ -497,33 +499,32 @@ auto CompilationUnit::RunParse() -> void {
497499
}
498500
}
499501

500-
auto CompilationUnit::PreCheck() -> Parse::NodeLocConverter& {
502+
auto CompilationUnit::PreCheck()
503+
-> llvm::function_ref<const Parse::TreeAndSubtrees&()> {
501504
CARBON_CHECK(parse_tree_, "Must call RunParse first");
502-
CARBON_CHECK(!node_converter_, "Called PreCheck twice");
505+
CARBON_CHECK(!get_parse_tree_and_subtrees_, "Called PreCheck twice");
503506

504507
get_parse_tree_and_subtrees_ = [this]() -> const Parse::TreeAndSubtrees& {
505508
return this->GetParseTreeAndSubtrees();
506509
};
507-
node_converter_.emplace(&*tokens_, source_->filename(),
508-
*get_parse_tree_and_subtrees_);
509-
return *node_converter_;
510+
return *get_parse_tree_and_subtrees_;
510511
}
511512

512513
auto CompilationUnit::GetCheckUnit(
513514
SemIR::CheckIRId check_ir_id,
514-
llvm::ArrayRef<Parse::NodeLocConverter*> node_converters) -> Check::Unit {
515-
CARBON_CHECK(node_converter_, "Must call PreCheck first");
515+
llvm::ArrayRef<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
516+
all_trees_and_subtrees) -> Check::Unit {
517+
CARBON_CHECK(get_parse_tree_and_subtrees_, "Must call PreCheck first");
516518
CARBON_CHECK(!sem_ir_converter_, "Called GetCheckUnit twice");
517519

518520
sem_ir_.emplace(&*parse_tree_, check_ir_id, parse_tree_->packaging_decl(),
519521
value_stores_, input_filename_);
520-
sem_ir_converter_.emplace(node_converters, &*sem_ir_);
522+
sem_ir_converter_.emplace(all_trees_and_subtrees, &*sem_ir_);
521523
return {.consumer = consumer_,
522524
.value_stores = &value_stores_,
523525
.timings = timings_ ? &*timings_ : nullptr,
524526
.get_parse_tree_and_subtrees = *get_parse_tree_and_subtrees_,
525527
.sem_ir = &*sem_ir_,
526-
.node_converter = &*node_converter_,
527528
.sem_ir_converter = &*sem_ir_converter_};
528529
}
529530

@@ -841,23 +842,25 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult {
841842
}
842843

843844
// Pre-check assigns IR IDs and constructs node converters.
844-
llvm::SmallVector<Parse::NodeLocConverter*> node_converters;
845+
llvm::SmallVector<llvm::function_ref<const Parse::TreeAndSubtrees&()>>
846+
all_trees_and_subtrees;
845847
// This size may not match due to units that are missing source, but that's an
846848
// error case and not worth extra work.
847-
node_converters.reserve(units.size());
849+
all_trees_and_subtrees.reserve(units.size());
848850
for (auto& unit : units) {
849851
if (unit->has_source()) {
850-
node_converters.push_back(&unit->PreCheck());
852+
all_trees_and_subtrees.push_back(unit->PreCheck());
851853
}
852854
}
853855

854856
// Gather Check::Units.
855857
llvm::SmallVector<Check::Unit> check_units;
856-
check_units.reserve(node_converters.size());
858+
check_units.reserve(all_trees_and_subtrees.size());
857859
for (auto& unit : units) {
858860
if (unit->has_source()) {
859861
SemIR::CheckIRId check_ir_id(check_units.size());
860-
check_units.push_back(unit->GetCheckUnit(check_ir_id, node_converters));
862+
check_units.push_back(
863+
unit->GetCheckUnit(check_ir_id, all_trees_and_subtrees));
861864
}
862865
}
863866

toolchain/lex/tokenized_buffer.cpp

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -377,30 +377,29 @@ auto TokenizedBuffer::CollectMemUsage(MemUsage& mem_usage,
377377
mem_usage.Collect(MemUsage::ConcatLabel(label, "comments_"), comments_);
378378
}
379379

380-
auto TokenizedBuffer::SourceBufferDiagnosticConverter::ConvertLoc(
381-
const char* loc, ContextFnT /*context_fn*/) const
380+
auto TokenizedBuffer::SourcePointerToDiagnosticLoc(const char* loc) const
382381
-> ConvertedDiagnosticLoc {
383-
CARBON_CHECK(StringRefContainsPointer(tokens_->source_->text(), loc),
382+
CARBON_CHECK(StringRefContainsPointer(source_->text(), loc),
384383
"location not within buffer");
385-
int32_t offset = loc - tokens_->source_->text().begin();
384+
int32_t offset = loc - source_->text().begin();
386385

387386
// Find the first line starting after the given location.
388387
const auto* next_line_it = llvm::partition_point(
389-
tokens_->line_infos_,
388+
line_infos_,
390389
[offset](const LineInfo& line) { return line.start <= offset; });
391390

392391
// Step back one line to find the line containing the given position.
393-
CARBON_CHECK(next_line_it != tokens_->line_infos_.begin(),
392+
CARBON_CHECK(next_line_it != line_infos_.begin(),
394393
"location precedes the start of the first line");
395394
const auto* line_it = std::prev(next_line_it);
396-
int line_number = line_it - tokens_->line_infos_.begin();
395+
int line_number = line_it - line_infos_.begin();
397396
int column_number = offset - line_it->start;
398397

399398
// Grab the line from the buffer by slicing from this line to the next
400399
// minus the newline. When on the last line, instead use the start to the end
401400
// of the buffer.
402-
llvm::StringRef text = tokens_->source_->text();
403-
llvm::StringRef line = next_line_it != tokens_->line_infos_.end()
401+
llvm::StringRef text = source_->text();
402+
llvm::StringRef line = next_line_it != line_infos_.end()
404403
? text.slice(line_it->start, next_line_it->start)
405404
: text.substr(line_it->start);
406405

@@ -409,29 +408,37 @@ auto TokenizedBuffer::SourceBufferDiagnosticConverter::ConvertLoc(
409408
// tail of the line such as CR+LF, etc.
410409
line.consume_back("\n");
411410

412-
return {.loc = {.filename = tokens_->source_->filename(),
411+
return {.loc = {.filename = source_->filename(),
413412
.line = line,
414413
.line_number = line_number + 1,
415414
.column_number = column_number + 1},
416415
.last_byte_offset = offset};
417416
}
418417

419-
auto TokenDiagnosticConverter::ConvertLoc(TokenIndex token,
420-
ContextFnT context_fn) const
418+
auto TokenizedBuffer::SourceBufferDiagnosticConverter::ConvertLoc(
419+
const char* loc, ContextFnT /*context_fn*/) const
420+
-> ConvertedDiagnosticLoc {
421+
return tokens_->SourcePointerToDiagnosticLoc(loc);
422+
}
423+
424+
auto TokenizedBuffer::TokenToDiagnosticLoc(TokenIndex token) const
421425
-> ConvertedDiagnosticLoc {
422426
// Map the token location into a position within the source buffer.
423-
const auto& token_info = tokens_->GetTokenInfo(token);
424427
const char* token_start =
425-
tokens_->source_->text().begin() + token_info.byte_offset();
428+
source_->text().begin() + GetTokenInfo(token).byte_offset();
426429

427430
// Find the corresponding file location.
428431
// TODO: Should we somehow indicate in the diagnostic location if this token
429432
// is a recovery token that doesn't correspond to the original source?
430-
auto converted =
431-
TokenizedBuffer::SourceBufferDiagnosticConverter(tokens_).ConvertLoc(
432-
token_start, context_fn);
433-
converted.loc.length = tokens_->GetTokenText(token).size();
433+
auto converted = SourcePointerToDiagnosticLoc(token_start);
434+
converted.loc.length = GetTokenText(token).size();
434435
return converted;
435436
}
436437

438+
auto TokenDiagnosticConverter::ConvertLoc(TokenIndex token,
439+
ContextFnT /*context_fn*/) const
440+
-> ConvertedDiagnosticLoc {
441+
return tokens_->TokenToDiagnosticLoc(token);
442+
}
443+
437444
} // namespace Carbon::Lex

toolchain/lex/tokenized_buffer.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,9 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
194194
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
195195
-> void;
196196

197+
// Converts a token to a diagnostic location.
198+
auto TokenToDiagnosticLoc(TokenIndex token) const -> ConvertedDiagnosticLoc;
199+
197200
// Returns true if the buffer has errors that were detected at lexing time.
198201
auto has_errors() const -> bool { return has_errors_; }
199202

@@ -221,7 +224,6 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
221224

222225
private:
223226
friend class Lexer;
224-
friend class TokenDiagnosticConverter;
225227

226228
// A diagnostic location converter that maps token locations into source
227229
// buffer locations.
@@ -239,6 +241,10 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
239241
const TokenizedBuffer* tokens_;
240242
};
241243

244+
// Converts a pointer into the source to a diagnostic location.
245+
auto SourcePointerToDiagnosticLoc(const char* loc) const
246+
-> ConvertedDiagnosticLoc;
247+
242248
// Specifies minimum widths to use when printing a token's fields via
243249
// `printToken`.
244250
struct PrintWidths {

0 commit comments

Comments
 (0)