diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h index e9edb40e02b64..0735e1fae0801 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include +#include +#include namespace clang { namespace pseudo { @@ -112,8 +114,18 @@ class alignas(class ForestNode *) ForestNode { // Iteration over all nodes in the forest, including this. llvm::iterator_range descendants() const; - std::string dump(const Grammar &) const; - std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; + // Dump forest node to text. If Code is std::nullopt, terminals will be + // displayed as token indexes. + std::string dump(const Grammar &, + std::optional> + Code = std::nullopt) const; + // Dump forest node recursively to text. If Code is std::nullopt, terminals + // will be displayed as token indexes. + std::string + dumpRecursive(const Grammar &, + std::optional> Code = + std::nullopt, + bool Abbreviated = false) const; private: friend class ForestArena; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp index e8e60e5ec475a..adce731d6c1e1 100644 --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -45,13 +45,21 @@ ForestNode::descendants() const { return {RecursiveIterator(this), RecursiveIterator()}; } -std::string ForestNode::dump(const Grammar &G) const { +std::string ForestNode::dump( + const Grammar &G, + std::optional> Code) const { switch (kind()) { case Ambiguous: return llvm::formatv("{0} := ", G.symbolName(symbol())); case Terminal: - return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), - startTokenIndex()); + if (Code) { + return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()), + startTokenIndex(), + Code->get().tokens()[startTokenIndex()]); + } else { + return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), + startTokenIndex()); + } case Sequence: return G.dumpRule(rule()); case Opaque: @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar &G) const { llvm_unreachable("Unhandled node kind!"); } -std::string ForestNode::dumpRecursive(const Grammar &G, - bool Abbreviated) const { +std::string ForestNode::dumpRecursive( + const Grammar &G, + std::optional> Code, + bool Abbreviated) const { using llvm::formatv; Token::Index MaxToken = 0; // Count visits of nodes so we can mark those seen multiple times. @@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G, std::string Result; constexpr Token::Index KEnd = std::numeric_limits::max(); std::function, - LineDecoration &LineDec)> + LineDecoration LineDec)> Dump = [&](const ForestNode *P, Token::Index End, std::optional ElidedParent, LineDecoration LineDec) { bool SharedNode = VisitCounts.find(P)->getSecond() > 1; @@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar &G, // The first time, print as #1. Later, =#1. if (First) { - Result += formatv("{0} #{1}", P->dump(G), ID); + Result += formatv("{0} #{1}", P->dump(G, Code), ID); } else { Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID); Children = {}; // Don't walk the children again. } } else { - Result.append(P->dump(G)); + Result.append(P->dump(G, Code)); } Result.push_back('\n'); diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index 6a64760749cef..ec5e6bdf7bb28 100644 --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -51,6 +51,9 @@ static opt Disambiguate("disambiguate", desc("Choose best tree from parse forest")); static opt PrintStatistics("print-statistics", desc("Print GLR parser statistics")); static opt PrintForest("print-forest", desc("Print parse forest")); +static opt + PrintTerminalTokens("print-terminal-tokens", + desc("Print terminal tokens in parse forest")); static opt ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"), init(true)); static opt HTMLForest("html-forest", @@ -161,9 +164,14 @@ int main(int argc, char *argv[]) { auto &Root = glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS}, *StartSymID, Lang); + std::optional> Code; + if (PrintTerminalTokens) { + Code = *ParseableStream; + } // If we're disambiguating, we'll print at the end instead. if (PrintForest && !Disambiguate) - llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev); + llvm::outs() << Root.dumpRecursive(Lang.G, Code, + /*Abbreviated=*/ForestAbbrev); clang::pseudo::Disambiguation Disambig; if (Disambiguate) Disambig = clang::pseudo::disambiguate(&Root, {}); @@ -234,7 +242,7 @@ int main(int argc, char *argv[]) { ForestNode *DisambigRoot = &Root; removeAmbiguities(DisambigRoot, Disambig); llvm::outs() << "Disambiguated tree:\n"; - llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, + llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, Code, /*Abbreviated=*/ForestAbbrev); } } diff --git a/clang-tools-extra/pseudo/unittests/ForestTest.cpp b/clang-tools-extra/pseudo/unittests/ForestTest.cpp index 36af896148209..d959b69ecdc94 100644 --- a/clang-tools-extra/pseudo/unittests/ForestTest.cpp +++ b/clang-tools-extra/pseudo/unittests/ForestTest.cpp @@ -73,12 +73,12 @@ TEST_F(ForestTest, DumpBasic) { const auto *Add = &Arena.createSequence(symbol("add-expression"), ruleFor("add-expression"), {Left, &T[1], Right}); - EXPECT_EQ(Add->dumpRecursive(G, true), + EXPECT_EQ(Add->dumpRecursive(G, std::nullopt, true), "[ 0, end) add-expression := id-expression + id-expression\n" "[ 0, 1) ├─id-expression~IDENTIFIER := tok[0]\n" "[ 1, 2) ├─+ := tok[1]\n" "[ 2, end) └─id-expression~IDENTIFIER := tok[2]\n"); - EXPECT_EQ(Add->dumpRecursive(G, false), + EXPECT_EQ(Add->dumpRecursive(G, std::nullopt, false), "[ 0, end) add-expression := id-expression + id-expression\n" "[ 0, 1) ├─id-expression := IDENTIFIER\n" "[ 0, 1) │ └─IDENTIFIER := tok[0]\n" @@ -144,7 +144,7 @@ TEST_F(ForestTest, DumpAbbreviatedShared) { // We must not abbreviate away shared nodes: if we show A~* there's no way to // show that the intermediate B node is shared between A1 and A2. - EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true), + EXPECT_EQ(A->dumpRecursive(G, std::nullopt, /*Abbreviate=*/true), "[ 0, end) A := \n" "[ 0, end) ├─A~B := * #1\n" "[ 0, end) │ └─* := tok[0]\n"