-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[clang-pseudo] Add a --print-terminal-tokens option #87898
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang-pseudo] Add a --print-terminal-tokens option #87898
Conversation
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-clang-tools-extra Author: Jeremy Rifkin (jeremy-rifkin) ChangesThis PR adds a Full diff: https://github.com/llvm/llvm-project/pull/87898.diff 4 Files Affected:
diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
index 87b9d15480cc35..33b3da1ed6ea9f 100644
--- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -46,7 +46,7 @@ class Fuzzer {
glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS},
*Lang.G.findNonterminal("translation-unit"), Lang);
if (Print)
- llvm::outs() << Root.dumpRecursive(Lang.G);
+ llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt);
}
};
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
index e9edb40e02b64e..642c489b3fba41 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h
@@ -26,6 +26,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Allocator.h"
#include <cstdint>
+#include <functional>
+#include <optional>
namespace clang {
namespace pseudo {
@@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode {
// Iteration over all nodes in the forest, including this.
llvm::iterator_range<RecursiveIterator> descendants() const;
- std::string dump(const Grammar &) const;
- std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const;
+ std::string
+ dump(const Grammar &,
+ std::optional<std::reference_wrapper<const TokenStream>>) const;
+ std::string
+ dumpRecursive(const Grammar &,
+ std::optional<std::reference_wrapper<const TokenStream>>,
+ bool Abbreviated = false) const;
private:
friend class ForestArena;
diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp
index e8e60e5ec475a4..adce731d6c1e1c 100644
--- a/clang-tools-extra/pseudo/lib/Forest.cpp
+++ b/clang-tools-extra/pseudo/lib/Forest.cpp
@@ -45,13 +45,21 @@ ForestNode::descendants() const {
return {RecursiveIterator(this), RecursiveIterator()};
}
-std::string ForestNode::dump(const Grammar &G) const {
+std::string ForestNode::dump(
+ const Grammar &G,
+ std::optional<std::reference_wrapper<const TokenStream>> Code) const {
switch (kind()) {
case Ambiguous:
return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
case Terminal:
- return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
- startTokenIndex());
+ if (Code) {
+ return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()),
+ startTokenIndex(),
+ Code->get().tokens()[startTokenIndex()]);
+ } else {
+ return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
+ startTokenIndex());
+ }
case Sequence:
return G.dumpRule(rule());
case Opaque:
@@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar &G) const {
llvm_unreachable("Unhandled node kind!");
}
-std::string ForestNode::dumpRecursive(const Grammar &G,
- bool Abbreviated) const {
+std::string ForestNode::dumpRecursive(
+ const Grammar &G,
+ std::optional<std::reference_wrapper<const TokenStream>> Code,
+ bool Abbreviated) const {
using llvm::formatv;
Token::Index MaxToken = 0;
// Count visits of nodes so we can mark those seen multiple times.
@@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
std::string Result;
constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>,
- LineDecoration &LineDec)>
+ LineDecoration LineDec)>
Dump = [&](const ForestNode *P, Token::Index End,
std::optional<SymbolID> ElidedParent, LineDecoration LineDec) {
bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
@@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
// The first time, print as #1. Later, =#1.
if (First) {
- Result += formatv("{0} #{1}", P->dump(G), ID);
+ Result += formatv("{0} #{1}", P->dump(G, Code), ID);
} else {
Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
Children = {}; // Don't walk the children again.
}
} else {
- Result.append(P->dump(G));
+ Result.append(P->dump(G, Code));
}
Result.push_back('\n');
diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index 6a64760749cefe..4797dc01cdc13b 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -51,6 +51,9 @@ static opt<bool> Disambiguate("disambiguate",
desc("Choose best tree from parse forest"));
static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
+static opt<bool>
+ PrintTerminalTokens("print-terminal-tokens",
+ desc("Print terminal tokens in parse forest"));
static opt<bool> ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"),
init(true));
static opt<std::string> HTMLForest("html-forest",
@@ -161,9 +164,14 @@ int main(int argc, char *argv[]) {
auto &Root =
glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
*StartSymID, Lang);
+ std::optional<std::reference_wrapper<const TokenStream>> Code;
+ if (PrintTerminalTokens) {
+ Code = *ParseableStream;
+ }
// If we're disambiguating, we'll print at the end instead.
if (PrintForest && !Disambiguate)
- llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev);
+ llvm::outs() << Root.dumpRecursive(Lang.G, Code,
+ /*Abbreviated=*/ForestAbbrev);
clang::pseudo::Disambiguation Disambig;
if (Disambiguate)
Disambig = clang::pseudo::disambiguate(&Root, {});
@@ -234,7 +242,7 @@ int main(int argc, char *argv[]) {
ForestNode *DisambigRoot = &Root;
removeAmbiguities(DisambigRoot, Disambig);
llvm::outs() << "Disambiguated tree:\n";
- llvm::outs() << DisambigRoot->dumpRecursive(Lang.G,
+ llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, Code,
/*Abbreviated=*/ForestAbbrev);
}
}
|
|
Clang pseudo has apparently been removed |
This PR adds a
--print-terminal-tokensoption to clang-pseudo which prints tokens in a parse forest in addition to providing the token index: