-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[llvm][mustache] Support setting delimiters in templates #159187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-support Author: Paul Kirth (ilovepi) ChangesThe base mustache spec allows setting custom delimiters, which slightly Full diff: https://github.com/llvm/llvm-project/pull/159187.diff 3 Files Affected:
diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 9c71d6a510056..43ce6adbba41a 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -7,9 +7,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Mustache.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
+#include <cctype>
#include <sstream>
+#define DEBUG_TYPE "mustache"
+
using namespace llvm;
using namespace llvm::mustache;
@@ -62,6 +67,7 @@ class Token {
InvertSectionOpen,
UnescapeVariable,
Comment,
+ SetDelimiter,
};
Token(std::string Str)
@@ -102,6 +108,8 @@ class Token {
return Type::Partial;
case '&':
return Type::UnescapeVariable;
+ case '=':
+ return Type::SetDelimiter;
default:
return Type::Variable;
}
@@ -189,14 +197,14 @@ class ASTNode {
};
// A wrapper for arena allocator for ASTNodes
-AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
+static AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
llvm::StringMap<Lambda> &Lambdas,
llvm::StringMap<SectionLambda> &SectionLambdas,
EscapeMap &Escapes) {
return std::make_unique<ASTNode>(Partials, Lambdas, SectionLambdas, Escapes);
}
-AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
+static AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
llvm::StringMap<AstPtr> &Partials,
llvm::StringMap<Lambda> &Lambdas,
llvm::StringMap<SectionLambda> &SectionLambdas,
@@ -205,7 +213,7 @@ AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
SectionLambdas, Escapes);
}
-AstPtr createTextNode(std::string Body, ASTNode *Parent,
+static AstPtr createTextNode(std::string Body, ASTNode *Parent,
llvm::StringMap<AstPtr> &Partials,
llvm::StringMap<Lambda> &Lambdas,
llvm::StringMap<SectionLambda> &SectionLambdas,
@@ -226,7 +234,7 @@ AstPtr createTextNode(std::string Body, ASTNode *Parent,
// and the current token is the second token.
// For example:
// "{{#Section}}"
-bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
+static bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
if (Idx == 0)
return true;
@@ -242,7 +250,7 @@ bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
// Function to check if there's no meaningful text ahead.
// We determine if a token has text ahead if the left of previous
// token does not start with a newline.
-bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
+static bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
if (Idx >= Tokens.size() - 1)
return true;
@@ -255,11 +263,11 @@ bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
return !TokenBody.starts_with("\r\n") && !TokenBody.starts_with("\n");
}
-bool requiresCleanUp(Token::Type T) {
+static bool requiresCleanUp(Token::Type T) {
// We must clean up all the tokens that could contain child nodes.
return T == Token::Type::SectionOpen || T == Token::Type::InvertSectionOpen ||
T == Token::Type::SectionClose || T == Token::Type::Comment ||
- T == Token::Type::Partial;
+ T == Token::Type::Partial || T == Token::Type::SetDelimiter;
}
// Adjust next token body if there is no text ahead.
@@ -268,7 +276,7 @@ bool requiresCleanUp(Token::Type T) {
// "{{! Comment }} \nLine 2"
// would be considered as no text ahead and should be rendered as
// " Line 2"
-void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
+static void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
Token &NextToken = Tokens[Idx + 1];
StringRef NextTokenBody = NextToken.TokenBody;
// Cut off the leading newline which could be \n or \r\n.
@@ -286,7 +294,7 @@ void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
// "A"
// The exception for this is partial tag which requires us to
// keep track of the indentation once it's rendered.
-void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
+static void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
Token &CurrentToken, Token::Type CurrentType) {
Token &PrevToken = Tokens[Idx - 1];
StringRef PrevTokenBody = PrevToken.TokenBody;
@@ -296,57 +304,129 @@ void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
CurrentToken.setIndentation(Indentation);
}
+struct Tag {
+ enum class Kind {
+ None,
+ Normal, // {{...}}
+ Triple, // {{{...}}}
+ };
+
+ Kind TagKind = Kind::None;
+ StringRef Content; // The content between the delimiters.
+ StringRef FullMatch; // The entire tag, including delimiters.
+ size_t StartPosition = StringRef::npos;
+};
+
+static Tag findNextTag(StringRef Template, size_t StartPos,
+ const SmallString<8> &Open,
+ const SmallString<8> &Close) {
+ const StringLiteral TripleOpen("{{{");
+ const StringLiteral TripleClose("}}}");
+
+ size_t NormalOpenPos = Template.find(Open, StartPos);
+ size_t TripleOpenPos = Template.find(TripleOpen, StartPos);
+
+ Tag Result;
+
+ // Determine which tag comes first.
+ if (TripleOpenPos != StringRef::npos &&
+ (NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
+ // Found a triple mustache tag.
+ size_t EndPos =
+ Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
+ if (EndPos == StringRef::npos)
+ return Result; // No closing tag found.
+
+ Result.TagKind = Tag::Kind::Triple;
+ Result.StartPosition = TripleOpenPos;
+ size_t ContentStart = TripleOpenPos + TripleOpen.size();
+ Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
+ Result.FullMatch = Template.substr(
+ TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
+ } else if (NormalOpenPos != StringRef::npos) {
+ // Found a normal mustache tag.
+ size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
+ if (EndPos == StringRef::npos)
+ return Result; // No closing tag found.
+
+ Result.TagKind = Tag::Kind::Normal;
+ Result.StartPosition = NormalOpenPos;
+ size_t ContentStart = NormalOpenPos + Open.size();
+ Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
+ Result.FullMatch =
+ Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
+ }
+
+ return Result;
+}
+
+static void processTag(const Tag &T, SmallVectorImpl<Token> &Tokens,
+ SmallString<8> &Open, SmallString<8> &Close) {
+ LLVM_DEBUG(dbgs() << " Found tag: \"" << T.FullMatch << "\", Content: \""
+ << T.Content << "\"\n");
+ if (T.TagKind == Tag::Kind::Triple) {
+ Tokens.emplace_back(T.FullMatch.str(), "&" + T.Content.str(), '&');
+ LLVM_DEBUG(dbgs() << " Created UnescapeVariable token.\n");
+ return;
+ }
+ StringRef Interpolated = T.Content;
+ std::string RawBody = T.FullMatch.str();
+ if (!Interpolated.trim().starts_with("=")) {
+ char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
+ Tokens.emplace_back(RawBody, Interpolated.str(), Front);
+ LLVM_DEBUG(dbgs() << " Created tag token of type '" << Front << "'\n");
+ return;
+ }
+ Tokens.emplace_back(RawBody, Interpolated.str(), '=');
+ StringRef DelimSpec = Interpolated.trim();
+ DelimSpec = DelimSpec.drop_front(1);
+ DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
+ DelimSpec = DelimSpec.trim();
+
+ auto [NewOpen, NewClose] = DelimSpec.split(' ');
+ Open = NewOpen;
+ Close = NewClose;
+
+ LLVM_DEBUG(dbgs() << " Found Set Delimiter tag. NewOpen='" << Open
+ << "', NewClose='" << Close << "'\n");
+}
+
// Simple tokenizer that splits the template into tokens.
// The mustache spec allows {{{ }}} to unescape variables,
// but we don't support that here. An unescape variable
// is represented only by {{& variable}}.
-SmallVector<Token> tokenize(StringRef Template) {
+static SmallVector<Token> tokenize(StringRef Template) {
+ LLVM_DEBUG(dbgs() << "Tokenizing template: \"" << Template << "\"\n");
SmallVector<Token> Tokens;
- StringLiteral Open("{{");
- StringLiteral Close("}}");
- StringLiteral TripleOpen("{{{");
- StringLiteral TripleClose("}}}");
+ SmallString<8> Open("{{");
+ SmallString<8> Close("}}");
size_t Start = 0;
- size_t DelimiterStart = Template.find(Open);
- if (DelimiterStart == StringRef::npos) {
- Tokens.emplace_back(Template.str());
- return Tokens;
- }
- while (DelimiterStart != StringRef::npos) {
- if (DelimiterStart != Start)
- Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start).str());
-
- if (Template.substr(DelimiterStart).starts_with(TripleOpen)) {
- size_t DelimiterEnd = Template.find(TripleClose, DelimiterStart);
- if (DelimiterEnd == StringRef::npos)
- break;
- size_t BodyStart = DelimiterStart + TripleOpen.size();
- std::string Body =
- Template.substr(BodyStart, DelimiterEnd - BodyStart).str();
- std::string RawBody =
- Template.substr(DelimiterStart, DelimiterEnd - DelimiterStart + 3)
- .str();
- Tokens.emplace_back(RawBody, "&" + Body, '&');
- Start = DelimiterEnd + TripleClose.size();
- } else {
- size_t DelimiterEnd = Template.find(Close, DelimiterStart);
- if (DelimiterEnd == StringRef::npos)
- break;
-
- // Extract the Interpolated variable without delimiters.
- size_t InterpolatedStart = DelimiterStart + Open.size();
- size_t InterpolatedEnd = DelimiterEnd - DelimiterStart - Close.size();
- std::string Interpolated =
- Template.substr(InterpolatedStart, InterpolatedEnd).str();
- std::string RawBody = Open.str() + Interpolated + Close.str();
- Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]);
- Start = DelimiterEnd + Close.size();
+
+ while (Start < Template.size()) {
+ LLVM_DEBUG(dbgs() << "Loop start. Start=" << Start << ", Open='" << Open
+ << "', Close='" << Close << "'\n");
+ Tag T = findNextTag(Template, Start, Open, Close);
+
+ if (T.TagKind == Tag::Kind::None) {
+ // No more tags, the rest is text.
+ Tokens.emplace_back(Template.substr(Start).str());
+ LLVM_DEBUG(dbgs() << " No more tags. Created final Text token: \""
+ << Template.substr(Start) << "\"\n");
+ break;
+ }
+
+ // Add the text before the tag.
+ if (T.StartPosition > Start) {
+ StringRef Text = Template.substr(Start, T.StartPosition - Start);
+ Tokens.emplace_back(Text.str());
+ LLVM_DEBUG(dbgs() << " Created Text token: \"" << Text << "\"\n");
}
- DelimiterStart = Template.find(Open, Start);
- }
- if (Start < Template.size())
- Tokens.emplace_back(Template.substr(Start).str());
+ processTag(T, Tokens, Open, Close);
+
+ // Move past the tag.
+ Start = T.StartPosition + T.FullMatch.size();
+ }
// Fix up white spaces for:
// - open sections
@@ -388,6 +468,7 @@ SmallVector<Token> tokenize(StringRef Template) {
if ((!HasTextBehind && !HasTextAhead) || (!HasTextBehind && Idx == LastIdx))
stripTokenBefore(Tokens, Idx, CurrentToken, CurrentType);
}
+ LLVM_DEBUG(dbgs() << "Tokenizing finished.\n");
return Tokens;
}
@@ -551,13 +632,14 @@ void Parser::parseMustache(ASTNode *Parent, llvm::StringMap<AstPtr> &Partials,
break;
}
case Token::Type::Comment:
+ case Token::Type::SetDelimiter:
break;
case Token::Type::SectionClose:
return;
}
}
}
-void toMustacheString(const json::Value &Data, raw_ostream &OS) {
+static void toMustacheString(const json::Value &Data, raw_ostream &OS) {
switch (Data.kind()) {
case json::Value::Null:
return;
@@ -590,6 +672,8 @@ void toMustacheString(const json::Value &Data, raw_ostream &OS) {
}
void ASTNode::render(const json::Value &CurrentCtx, raw_ostream &OS) {
+ if (Ty != Root && Ty != Text && AccessorValue.empty())
+ return;
// Set the parent context to the incoming context so that we
// can walk up the context tree correctly in findContext().
ParentContext = &CurrentCtx;
@@ -789,3 +873,5 @@ Template &Template::operator=(Template &&Other) noexcept {
return *this;
}
} // namespace llvm::mustache
+
+#undef DEBUG_TYPE
diff --git a/llvm/unittests/Support/MustacheTest.cpp b/llvm/unittests/Support/MustacheTest.cpp
index f613fde072cde..addf0355c4d0a 100644
--- a/llvm/unittests/Support/MustacheTest.cpp
+++ b/llvm/unittests/Support/MustacheTest.cpp
@@ -1335,7 +1335,7 @@ TEST(MustacheDelimiters, PairBehavior) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("(Hey!)", Out);
+ EXPECT_EQ("(Hey!)", Out);
}
TEST(MustacheDelimiters, SpecialCharacters) {
@@ -1344,7 +1344,7 @@ TEST(MustacheDelimiters, SpecialCharacters) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("(It worked!)", Out);
+ EXPECT_EQ("(It worked!)", Out);
}
TEST(MustacheDelimiters, Sections) {
@@ -1354,7 +1354,7 @@ TEST(MustacheDelimiters, Sections) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ EXPECT_EQ("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
"interpolated.\n]\n",
Out);
}
@@ -1366,7 +1366,7 @@ TEST(MustacheDelimiters, InvertedSections) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ EXPECT_EQ("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
"interpolated.\n]\n",
Out);
}
@@ -1378,7 +1378,7 @@ TEST(MustacheDelimiters, PartialInheritence) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[ .yes. ]\n[ .yes. ]\n", Out);
+ EXPECT_EQ("[ .yes. ]\n[ .yes. ]\n", Out);
}
TEST(MustacheDelimiters, PostPartialBehavior) {
@@ -1388,7 +1388,7 @@ TEST(MustacheDelimiters, PostPartialBehavior) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out);
+ EXPECT_EQ("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out);
}
TEST(MustacheDelimiters, SurroundingWhitespace) {
@@ -1415,7 +1415,7 @@ TEST(MustacheDelimiters, StandaloneTag) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("Begin.\nEnd.\n", Out);
+ EXPECT_EQ("Begin.\nEnd.\n", Out);
}
TEST(MustacheDelimiters, IndentedStandaloneTag) {
@@ -1424,7 +1424,7 @@ TEST(MustacheDelimiters, IndentedStandaloneTag) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("Begin.\nEnd.\n", Out);
+ EXPECT_EQ("Begin.\nEnd.\n", Out);
}
TEST(MustacheDelimiters, StandaloneLineEndings) {
@@ -1433,7 +1433,7 @@ TEST(MustacheDelimiters, StandaloneLineEndings) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("|\r\n|", Out);
+ EXPECT_EQ("|\r\n|", Out);
}
TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) {
@@ -1442,7 +1442,7 @@ TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("=", Out);
+ EXPECT_EQ("=", Out);
}
TEST(MustacheDelimiters, StandaloneWithoutNewline) {
@@ -1451,7 +1451,7 @@ TEST(MustacheDelimiters, StandaloneWithoutNewline) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("=\n", Out);
+ EXPECT_EQ("=\n", Out);
}
TEST(MustacheDelimiters, PairwithPadding) {
@@ -1462,4 +1462,3 @@ TEST(MustacheDelimiters, PairwithPadding) {
T.render(D, OS);
EXPECT_EQ("||", Out);
}
-
diff --git a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
index ea1395b2646f6..bdcef376547fb 100644
--- a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
+++ b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
@@ -54,20 +54,6 @@ static int NumXFail = 0;
static int NumSuccess = 0;
static const StringMap<StringSet<>> XFailTestNames = {{
- {"delimiters.json",
- {
- "Pair Behavior",
- "Special Characters",
- "Sections",
- "Inverted Sections",
- "Partial Inheritence",
- "Post-Partial Behavior",
- "Standalone Tag",
- "Indented Standalone Tag",
- "Standalone Line Endings",
- "Standalone Without Previous Line",
- "Standalone Without Newline",
- }},
{"~dynamic-names.json",
{
"Basic Behavior - Partial",
@@ -113,7 +99,6 @@ static const StringMap<StringSet<>> XFailTestNames = {{
"Block reindentation",
"Intrinsic indentation",
"Nested block reindentation",
-
}},
{"~lambdas.json",
{
@@ -126,7 +111,6 @@ static const StringMap<StringSet<>> XFailTestNames = {{
"Section - Expansion",
"Section - Alternate Delimiters",
"Section - Multiple Calls",
-
}},
{"partials.json", {"Standalone Indentation"}},
}};
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
97fa89f
to
fbd8894
Compare
293e89e
to
c3b7fae
Compare
7cea784
to
630801a
Compare
6ba6cd7
to
78a29c3
Compare
630801a
to
794217c
Compare
78a29c3
to
dc36a30
Compare
794217c
to
87331c5
Compare
87331c5
to
bd4ebfd
Compare
The base mustache spec allows setting custom delimiters, which slightly change parsing of partials. This patch implements that feature by adding a new token type, and changing the tokenizer's behavior to allow setting custom delimiters.
bd4ebfd
to
9ff3fba
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/32168 Here is the relevant piece of the build log for the reference
|
The base mustache spec allows setting custom delimiters, which slightly change parsing of partials. This patch implements that feature by adding a new token type, and changing the tokenizer's behavior to allow setting custom delimiters.
The base mustache spec allows setting custom delimiters, which slightly
change parsing of partials. This patch implements that feature by adding
a new token type, and changing the tokenizer's behavior to allow setting
custom delimiters.