Skip to content

Commit 2ebb15e

Browse files
committed
Add a --print-terminal-tokens option
1 parent d38bff4 commit 2ebb15e

File tree

4 files changed

+38
-13
lines changed

4 files changed

+38
-13
lines changed

clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class Fuzzer {
4646
glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS},
4747
*Lang.G.findNonterminal("translation-unit"), Lang);
4848
if (Print)
49-
llvm::outs() << Root.dumpRecursive(Lang.G);
49+
llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt);
5050
}
5151
};
5252

clang-tools-extra/pseudo/include/clang-pseudo/Forest.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "llvm/ADT/STLExtras.h"
2727
#include "llvm/Support/Allocator.h"
2828
#include <cstdint>
29+
#include <functional>
30+
#include <optional>
2931

3032
namespace clang {
3133
namespace pseudo {
@@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode {
112114
// Iteration over all nodes in the forest, including this.
113115
llvm::iterator_range<RecursiveIterator> descendants() const;
114116

115-
std::string dump(const Grammar &) const;
116-
std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const;
117+
std::string
118+
dump(const Grammar &,
119+
std::optional<std::reference_wrapper<const TokenStream>>) const;
120+
std::string
121+
dumpRecursive(const Grammar &,
122+
std::optional<std::reference_wrapper<const TokenStream>>,
123+
bool Abbreviated = false) const;
117124

118125
private:
119126
friend class ForestArena;

clang-tools-extra/pseudo/lib/Forest.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,21 @@ ForestNode::descendants() const {
4545
return {RecursiveIterator(this), RecursiveIterator()};
4646
}
4747

48-
std::string ForestNode::dump(const Grammar &G) const {
48+
std::string ForestNode::dump(
49+
const Grammar &G,
50+
std::optional<std::reference_wrapper<const TokenStream>> Code) const {
4951
switch (kind()) {
5052
case Ambiguous:
5153
return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
5254
case Terminal:
53-
return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
54-
startTokenIndex());
55+
if (Code) {
56+
return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()),
57+
startTokenIndex(),
58+
Code->get().tokens()[startTokenIndex()]);
59+
} else {
60+
return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
61+
startTokenIndex());
62+
}
5563
case Sequence:
5664
return G.dumpRule(rule());
5765
case Opaque:
@@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar &G) const {
6068
llvm_unreachable("Unhandled node kind!");
6169
}
6270

63-
std::string ForestNode::dumpRecursive(const Grammar &G,
64-
bool Abbreviated) const {
71+
std::string ForestNode::dumpRecursive(
72+
const Grammar &G,
73+
std::optional<std::reference_wrapper<const TokenStream>> Code,
74+
bool Abbreviated) const {
6575
using llvm::formatv;
6676
Token::Index MaxToken = 0;
6777
// Count visits of nodes so we can mark those seen multiple times.
@@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
95105
std::string Result;
96106
constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
97107
std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>,
98-
LineDecoration &LineDec)>
108+
LineDecoration LineDec)>
99109
Dump = [&](const ForestNode *P, Token::Index End,
100110
std::optional<SymbolID> ElidedParent, LineDecoration LineDec) {
101111
bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
@@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
145155

146156
// The first time, print as #1. Later, =#1.
147157
if (First) {
148-
Result += formatv("{0} #{1}", P->dump(G), ID);
158+
Result += formatv("{0} #{1}", P->dump(G, Code), ID);
149159
} else {
150160
Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
151161
Children = {}; // Don't walk the children again.
152162
}
153163
} else {
154-
Result.append(P->dump(G));
164+
Result.append(P->dump(G, Code));
155165
}
156166
Result.push_back('\n');
157167

clang-tools-extra/pseudo/tool/ClangPseudo.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ static opt<bool> Disambiguate("disambiguate",
5151
desc("Choose best tree from parse forest"));
5252
static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
5353
static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
54+
static opt<bool>
55+
PrintTerminalTokens("print-terminal-tokens",
56+
desc("Print terminal tokens in parse forest"));
5457
static opt<bool> ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"),
5558
init(true));
5659
static opt<std::string> HTMLForest("html-forest",
@@ -161,9 +164,14 @@ int main(int argc, char *argv[]) {
161164
auto &Root =
162165
glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
163166
*StartSymID, Lang);
167+
std::optional<std::reference_wrapper<const TokenStream>> Code;
168+
if (PrintTerminalTokens) {
169+
Code = *ParseableStream;
170+
}
164171
// If we're disambiguating, we'll print at the end instead.
165172
if (PrintForest && !Disambiguate)
166-
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev);
173+
llvm::outs() << Root.dumpRecursive(Lang.G, Code,
174+
/*Abbreviated=*/ForestAbbrev);
167175
clang::pseudo::Disambiguation Disambig;
168176
if (Disambiguate)
169177
Disambig = clang::pseudo::disambiguate(&Root, {});
@@ -234,7 +242,7 @@ int main(int argc, char *argv[]) {
234242
ForestNode *DisambigRoot = &Root;
235243
removeAmbiguities(DisambigRoot, Disambig);
236244
llvm::outs() << "Disambiguated tree:\n";
237-
llvm::outs() << DisambigRoot->dumpRecursive(Lang.G,
245+
llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, Code,
238246
/*Abbreviated=*/ForestAbbrev);
239247
}
240248
}

0 commit comments

Comments
 (0)