Skip to content

Commit 1fcf481

Browse files
authored
[llvm][mustache] Support setting delimiters in templates (#159187)
The base mustache spec allows setting custom delimiters, which slightly change parsing of partials. This patch implements that feature by adding a new token type, and changing the tokenizer's behavior to allow setting custom delimiters.
1 parent 18136c2 commit 1fcf481

File tree

3 files changed

+158
-90
lines changed

3 files changed

+158
-90
lines changed

llvm/lib/Support/Mustache.cpp

Lines changed: 147 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,13 @@
77
//===----------------------------------------------------------------------===//
88
#include "llvm/Support/Mustache.h"
99
#include "llvm/ADT/SmallVector.h"
10+
#include "llvm/Support/Debug.h"
1011
#include "llvm/Support/raw_ostream.h"
12+
#include <cctype>
1113
#include <sstream>
1214

15+
#define DEBUG_TYPE "mustache"
16+
1317
using namespace llvm;
1418
using namespace llvm::mustache;
1519

@@ -62,6 +66,7 @@ class Token {
6266
InvertSectionOpen,
6367
UnescapeVariable,
6468
Comment,
69+
SetDelimiter,
6570
};
6671

6772
Token(std::string Str)
@@ -102,6 +107,8 @@ class Token {
102107
return Type::Partial;
103108
case '&':
104109
return Type::UnescapeVariable;
110+
case '=':
111+
return Type::SetDelimiter;
105112
default:
106113
return Type::Variable;
107114
}
@@ -189,27 +196,27 @@ class ASTNode {
189196
};
190197

191198
// A wrapper for arena allocator for ASTNodes
192-
AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
193-
llvm::StringMap<Lambda> &Lambdas,
194-
llvm::StringMap<SectionLambda> &SectionLambdas,
195-
EscapeMap &Escapes) {
199+
static AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
200+
llvm::StringMap<Lambda> &Lambdas,
201+
llvm::StringMap<SectionLambda> &SectionLambdas,
202+
EscapeMap &Escapes) {
196203
return std::make_unique<ASTNode>(Partials, Lambdas, SectionLambdas, Escapes);
197204
}
198205

199-
AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
200-
llvm::StringMap<AstPtr> &Partials,
201-
llvm::StringMap<Lambda> &Lambdas,
202-
llvm::StringMap<SectionLambda> &SectionLambdas,
203-
EscapeMap &Escapes) {
206+
static AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
207+
llvm::StringMap<AstPtr> &Partials,
208+
llvm::StringMap<Lambda> &Lambdas,
209+
llvm::StringMap<SectionLambda> &SectionLambdas,
210+
EscapeMap &Escapes) {
204211
return std::make_unique<ASTNode>(T, std::move(A), Parent, Partials, Lambdas,
205212
SectionLambdas, Escapes);
206213
}
207214

208-
AstPtr createTextNode(std::string Body, ASTNode *Parent,
209-
llvm::StringMap<AstPtr> &Partials,
210-
llvm::StringMap<Lambda> &Lambdas,
211-
llvm::StringMap<SectionLambda> &SectionLambdas,
212-
EscapeMap &Escapes) {
215+
static AstPtr createTextNode(std::string Body, ASTNode *Parent,
216+
llvm::StringMap<AstPtr> &Partials,
217+
llvm::StringMap<Lambda> &Lambdas,
218+
llvm::StringMap<SectionLambda> &SectionLambdas,
219+
EscapeMap &Escapes) {
213220
return std::make_unique<ASTNode>(std::move(Body), Parent, Partials, Lambdas,
214221
SectionLambdas, Escapes);
215222
}
@@ -226,7 +233,7 @@ AstPtr createTextNode(std::string Body, ASTNode *Parent,
226233
// and the current token is the second token.
227234
// For example:
228235
// "{{#Section}}"
229-
bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
236+
static bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
230237
if (Idx == 0)
231238
return true;
232239

@@ -242,7 +249,7 @@ bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
242249
// Function to check if there's no meaningful text ahead.
243250
// We determine if a token has text ahead if the left of previous
244251
// token does not start with a newline.
245-
bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
252+
static bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
246253
if (Idx >= Tokens.size() - 1)
247254
return true;
248255

@@ -255,11 +262,11 @@ bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
255262
return !TokenBody.starts_with("\r\n") && !TokenBody.starts_with("\n");
256263
}
257264

258-
bool requiresCleanUp(Token::Type T) {
265+
static bool requiresCleanUp(Token::Type T) {
259266
// We must clean up all the tokens that could contain child nodes.
260267
return T == Token::Type::SectionOpen || T == Token::Type::InvertSectionOpen ||
261268
T == Token::Type::SectionClose || T == Token::Type::Comment ||
262-
T == Token::Type::Partial;
269+
T == Token::Type::Partial || T == Token::Type::SetDelimiter;
263270
}
264271

265272
// Adjust next token body if there is no text ahead.
@@ -268,7 +275,7 @@ bool requiresCleanUp(Token::Type T) {
268275
// "{{! Comment }} \nLine 2"
269276
// would be considered as no text ahead and should be rendered as
270277
// " Line 2"
271-
void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
278+
static void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
272279
Token &NextToken = Tokens[Idx + 1];
273280
StringRef NextTokenBody = NextToken.TokenBody;
274281
// Cut off the leading newline which could be \n or \r\n.
@@ -294,57 +301,128 @@ void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
294301
CurrentToken.setIndentation(Indentation);
295302
}
296303

304+
struct Tag {
305+
enum class Kind {
306+
None,
307+
Normal, // {{...}}
308+
Triple, // {{{...}}}
309+
};
310+
311+
Kind TagKind = Kind::None;
312+
StringRef Content; // The content between the delimiters.
313+
StringRef FullMatch; // The entire tag, including delimiters.
314+
size_t StartPosition = StringRef::npos;
315+
};
316+
317+
static Tag findNextTag(StringRef Template, size_t StartPos, StringRef Open,
318+
StringRef Close) {
319+
const StringLiteral TripleOpen("{{{");
320+
const StringLiteral TripleClose("}}}");
321+
322+
size_t NormalOpenPos = Template.find(Open, StartPos);
323+
size_t TripleOpenPos = Template.find(TripleOpen, StartPos);
324+
325+
Tag Result;
326+
327+
// Determine which tag comes first.
328+
if (TripleOpenPos != StringRef::npos &&
329+
(NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
330+
// Found a triple mustache tag.
331+
size_t EndPos =
332+
Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
333+
if (EndPos == StringRef::npos)
334+
return Result; // No closing tag found.
335+
336+
Result.TagKind = Tag::Kind::Triple;
337+
Result.StartPosition = TripleOpenPos;
338+
size_t ContentStart = TripleOpenPos + TripleOpen.size();
339+
Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
340+
Result.FullMatch = Template.substr(
341+
TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
342+
} else if (NormalOpenPos != StringRef::npos) {
343+
// Found a normal mustache tag.
344+
size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
345+
if (EndPos == StringRef::npos)
346+
return Result; // No closing tag found.
347+
348+
Result.TagKind = Tag::Kind::Normal;
349+
Result.StartPosition = NormalOpenPos;
350+
size_t ContentStart = NormalOpenPos + Open.size();
351+
Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
352+
Result.FullMatch =
353+
Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
354+
}
355+
356+
return Result;
357+
}
358+
359+
static void processTag(const Tag &T, SmallVectorImpl<Token> &Tokens,
360+
SmallString<8> &Open, SmallString<8> &Close) {
361+
LLVM_DEBUG(dbgs() << " Found tag: \"" << T.FullMatch << "\", Content: \""
362+
<< T.Content << "\"\n");
363+
if (T.TagKind == Tag::Kind::Triple) {
364+
Tokens.emplace_back(T.FullMatch.str(), "&" + T.Content.str(), '&');
365+
LLVM_DEBUG(dbgs() << " Created UnescapeVariable token.\n");
366+
return;
367+
}
368+
StringRef Interpolated = T.Content;
369+
std::string RawBody = T.FullMatch.str();
370+
if (!Interpolated.trim().starts_with("=")) {
371+
char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
372+
Tokens.emplace_back(RawBody, Interpolated.str(), Front);
373+
LLVM_DEBUG(dbgs() << " Created tag token of type '" << Front << "'\n");
374+
return;
375+
}
376+
Tokens.emplace_back(RawBody, Interpolated.str(), '=');
377+
StringRef DelimSpec = Interpolated.trim();
378+
DelimSpec = DelimSpec.drop_front(1);
379+
DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
380+
DelimSpec = DelimSpec.trim();
381+
382+
auto [NewOpen, NewClose] = DelimSpec.split(' ');
383+
Open = NewOpen;
384+
Close = NewClose;
385+
386+
LLVM_DEBUG(dbgs() << " Found Set Delimiter tag. NewOpen='" << Open
387+
<< "', NewClose='" << Close << "'\n");
388+
}
389+
297390
// Simple tokenizer that splits the template into tokens.
298391
// The mustache spec allows {{{ }}} to unescape variables,
299392
// but we don't support that here. An unescape variable
300393
// is represented only by {{& variable}}.
301-
SmallVector<Token> tokenize(StringRef Template) {
394+
static SmallVector<Token> tokenize(StringRef Template) {
395+
LLVM_DEBUG(dbgs() << "Tokenizing template: \"" << Template << "\"\n");
302396
SmallVector<Token> Tokens;
303-
StringLiteral Open("{{");
304-
StringLiteral Close("}}");
305-
StringLiteral TripleOpen("{{{");
306-
StringLiteral TripleClose("}}}");
397+
SmallString<8> Open("{{");
398+
SmallString<8> Close("}}");
307399
size_t Start = 0;
308-
size_t DelimiterStart = Template.find(Open);
309-
if (DelimiterStart == StringRef::npos) {
310-
Tokens.emplace_back(Template.str());
311-
return Tokens;
312-
}
313-
while (DelimiterStart != StringRef::npos) {
314-
if (DelimiterStart != Start)
315-
Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start).str());
316-
317-
if (Template.substr(DelimiterStart).starts_with(TripleOpen)) {
318-
size_t DelimiterEnd = Template.find(TripleClose, DelimiterStart);
319-
if (DelimiterEnd == StringRef::npos)
320-
break;
321-
size_t BodyStart = DelimiterStart + TripleOpen.size();
322-
std::string Body =
323-
Template.substr(BodyStart, DelimiterEnd - BodyStart).str();
324-
std::string RawBody =
325-
Template.substr(DelimiterStart, DelimiterEnd - DelimiterStart + 3)
326-
.str();
327-
Tokens.emplace_back(RawBody, "&" + Body, '&');
328-
Start = DelimiterEnd + TripleClose.size();
329-
} else {
330-
size_t DelimiterEnd = Template.find(Close, DelimiterStart);
331-
if (DelimiterEnd == StringRef::npos)
332-
break;
333-
334-
// Extract the Interpolated variable without delimiters.
335-
size_t InterpolatedStart = DelimiterStart + Open.size();
336-
size_t InterpolatedEnd = DelimiterEnd - DelimiterStart - Close.size();
337-
std::string Interpolated =
338-
Template.substr(InterpolatedStart, InterpolatedEnd).str();
339-
std::string RawBody = Open.str() + Interpolated + Close.str();
340-
Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]);
341-
Start = DelimiterEnd + Close.size();
400+
401+
while (Start < Template.size()) {
402+
LLVM_DEBUG(dbgs() << "Loop start. Start=" << Start << ", Open='" << Open
403+
<< "', Close='" << Close << "'\n");
404+
Tag T = findNextTag(Template, Start, Open, Close);
405+
406+
if (T.TagKind == Tag::Kind::None) {
407+
// No more tags, the rest is text.
408+
Tokens.emplace_back(Template.substr(Start).str());
409+
LLVM_DEBUG(dbgs() << " No more tags. Created final Text token: \""
410+
<< Template.substr(Start) << "\"\n");
411+
break;
412+
}
413+
414+
// Add the text before the tag.
415+
if (T.StartPosition > Start) {
416+
StringRef Text = Template.substr(Start, T.StartPosition - Start);
417+
Tokens.emplace_back(Text.str());
418+
LLVM_DEBUG(dbgs() << " Created Text token: \"" << Text << "\"\n");
342419
}
343-
DelimiterStart = Template.find(Open, Start);
344-
}
345420

346-
if (Start < Template.size())
347-
Tokens.emplace_back(Template.substr(Start).str());
421+
processTag(T, Tokens, Open, Close);
422+
423+
// Move past the tag.
424+
Start = T.StartPosition + T.FullMatch.size();
425+
}
348426

349427
// Fix up white spaces for:
350428
// - open sections
@@ -386,6 +464,7 @@ SmallVector<Token> tokenize(StringRef Template) {
386464
if ((!HasTextBehind && !HasTextAhead) || (!HasTextBehind && Idx == LastIdx))
387465
stripTokenBefore(Tokens, Idx, CurrentToken, CurrentType);
388466
}
467+
LLVM_DEBUG(dbgs() << "Tokenizing finished.\n");
389468
return Tokens;
390469
}
391470

@@ -563,13 +642,14 @@ void Parser::parseMustache(ASTNode *Parent, llvm::StringMap<AstPtr> &Partials,
563642
break;
564643
}
565644
case Token::Type::Comment:
645+
case Token::Type::SetDelimiter:
566646
break;
567647
case Token::Type::SectionClose:
568648
return;
569649
}
570650
}
571651
}
572-
void toMustacheString(const json::Value &Data, raw_ostream &OS) {
652+
static void toMustacheString(const json::Value &Data, raw_ostream &OS) {
573653
switch (Data.kind()) {
574654
case json::Value::Null:
575655
return;
@@ -602,6 +682,8 @@ void toMustacheString(const json::Value &Data, raw_ostream &OS) {
602682
}
603683

604684
void ASTNode::render(const json::Value &CurrentCtx, raw_ostream &OS) {
685+
if (Ty != Root && Ty != Text && AccessorValue.empty())
686+
return;
605687
// Set the parent context to the incoming context so that we
606688
// can walk up the context tree correctly in findContext().
607689
ParentContext = &CurrentCtx;
@@ -801,3 +883,5 @@ Template &Template::operator=(Template &&Other) noexcept {
801883
return *this;
802884
}
803885
} // namespace llvm::mustache
886+
887+
#undef DEBUG_TYPE

0 commit comments

Comments
 (0)