Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Allocator.h"
#include <cstdint>
#include <functional>
#include <optional>

namespace clang {
namespace pseudo {
Expand Down Expand Up @@ -112,8 +114,18 @@ class alignas(class ForestNode *) ForestNode {
// Iteration over all nodes in the forest, including this.
llvm::iterator_range<RecursiveIterator> descendants() const;

std::string dump(const Grammar &) const;
std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const;
// Dump forest node to text. If Code is std::nullopt, terminals will be
// displayed as token indexes.
std::string dump(const Grammar &,
std::optional<std::reference_wrapper<const TokenStream>>
Code = std::nullopt) const;
// Dump forest node recursively to text. If Code is std::nullopt, terminals
// will be displayed as token indexes.
std::string
dumpRecursive(const Grammar &,
std::optional<std::reference_wrapper<const TokenStream>> Code =
std::nullopt,
bool Abbreviated = false) const;

private:
friend class ForestArena;
Expand Down
26 changes: 18 additions & 8 deletions clang-tools-extra/pseudo/lib/Forest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,21 @@ ForestNode::descendants() const {
return {RecursiveIterator(this), RecursiveIterator()};
}

std::string ForestNode::dump(const Grammar &G) const {
std::string ForestNode::dump(
const Grammar &G,
std::optional<std::reference_wrapper<const TokenStream>> Code) const {
switch (kind()) {
case Ambiguous:
return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
case Terminal:
return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
startTokenIndex());
if (Code) {
return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()),
startTokenIndex(),
Code->get().tokens()[startTokenIndex()]);
} else {
return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
startTokenIndex());
}
case Sequence:
return G.dumpRule(rule());
case Opaque:
Expand All @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar &G) const {
llvm_unreachable("Unhandled node kind!");
}

std::string ForestNode::dumpRecursive(const Grammar &G,
bool Abbreviated) const {
std::string ForestNode::dumpRecursive(
const Grammar &G,
std::optional<std::reference_wrapper<const TokenStream>> Code,
bool Abbreviated) const {
using llvm::formatv;
Token::Index MaxToken = 0;
// Count visits of nodes so we can mark those seen multiple times.
Expand Down Expand Up @@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
std::string Result;
constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>,
LineDecoration &LineDec)>
LineDecoration LineDec)>
Dump = [&](const ForestNode *P, Token::Index End,
std::optional<SymbolID> ElidedParent, LineDecoration LineDec) {
bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
Expand Down Expand Up @@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar &G,

// The first time, print as #1. Later, =#1.
if (First) {
Result += formatv("{0} #{1}", P->dump(G), ID);
Result += formatv("{0} #{1}", P->dump(G, Code), ID);
} else {
Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
Children = {}; // Don't walk the children again.
}
} else {
Result.append(P->dump(G));
Result.append(P->dump(G, Code));
}
Result.push_back('\n');

Expand Down
12 changes: 10 additions & 2 deletions clang-tools-extra/pseudo/tool/ClangPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ static opt<bool> Disambiguate("disambiguate",
desc("Choose best tree from parse forest"));
static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
static opt<bool>
PrintTerminalTokens("print-terminal-tokens",
desc("Print terminal tokens in parse forest"));
static opt<bool> ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"),
init(true));
static opt<std::string> HTMLForest("html-forest",
Expand Down Expand Up @@ -161,9 +164,14 @@ int main(int argc, char *argv[]) {
auto &Root =
glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
*StartSymID, Lang);
std::optional<std::reference_wrapper<const TokenStream>> Code;
if (PrintTerminalTokens) {
Code = *ParseableStream;
}
// If we're disambiguating, we'll print at the end instead.
if (PrintForest && !Disambiguate)
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev);
llvm::outs() << Root.dumpRecursive(Lang.G, Code,
/*Abbreviated=*/ForestAbbrev);
clang::pseudo::Disambiguation Disambig;
if (Disambiguate)
Disambig = clang::pseudo::disambiguate(&Root, {});
Expand Down Expand Up @@ -234,7 +242,7 @@ int main(int argc, char *argv[]) {
ForestNode *DisambigRoot = &Root;
removeAmbiguities(DisambigRoot, Disambig);
llvm::outs() << "Disambiguated tree:\n";
llvm::outs() << DisambigRoot->dumpRecursive(Lang.G,
llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, Code,
/*Abbreviated=*/ForestAbbrev);
}
}
Expand Down
6 changes: 3 additions & 3 deletions clang-tools-extra/pseudo/unittests/ForestTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ TEST_F(ForestTest, DumpBasic) {
const auto *Add =
&Arena.createSequence(symbol("add-expression"), ruleFor("add-expression"),
{Left, &T[1], Right});
EXPECT_EQ(Add->dumpRecursive(G, true),
EXPECT_EQ(Add->dumpRecursive(G, std::nullopt, true),
"[ 0, end) add-expression := id-expression + id-expression\n"
"[ 0, 1) ├─id-expression~IDENTIFIER := tok[0]\n"
"[ 1, 2) ├─+ := tok[1]\n"
"[ 2, end) └─id-expression~IDENTIFIER := tok[2]\n");
EXPECT_EQ(Add->dumpRecursive(G, false),
EXPECT_EQ(Add->dumpRecursive(G, std::nullopt, false),
"[ 0, end) add-expression := id-expression + id-expression\n"
"[ 0, 1) ├─id-expression := IDENTIFIER\n"
"[ 0, 1) │ └─IDENTIFIER := tok[0]\n"
Expand Down Expand Up @@ -144,7 +144,7 @@ TEST_F(ForestTest, DumpAbbreviatedShared) {

// We must not abbreviate away shared nodes: if we show A~* there's no way to
// show that the intermediate B node is shared between A1 and A2.
EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true),
EXPECT_EQ(A->dumpRecursive(G, std::nullopt, /*Abbreviate=*/true),
"[ 0, end) A := <ambiguous>\n"
"[ 0, end) ├─A~B := * #1\n"
"[ 0, end) │ └─* := tok[0]\n"
Expand Down