Skip to content

Commit 55d0587

Browse files
committed
[llvm][mustache] Specialize delimiter search
Delimiters in mustache are generally 2-4 character sequences. While good for general search, we can beat find() for these short sequences by just using memchr() to find the first match, and then checking the next few characters directly.
1 parent a34af38 commit 55d0587

File tree

1 file changed

+53
-3
lines changed

1 file changed

+53
-3
lines changed

llvm/lib/Support/Mustache.cpp

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,50 @@ namespace {
1717

1818
using Accessor = SmallVector<std::string>;
1919

20+
// A more generic specialized find for needles of length 1-3.
21+
[[maybe_unused]]
22+
static size_t findDelimiters(StringRef Haystack, StringRef Needle,
23+
size_t Offset = 0) {
24+
const size_t N = Needle.size();
25+
if (N == 0)
26+
return Offset;
27+
if (N > 3) {
28+
// Fallback for longer needles where more advanced algorithms are better.
29+
return Haystack.find(Needle, Offset);
30+
}
31+
32+
const char *HaystackStart = Haystack.data();
33+
const size_t HaystackSize = Haystack.size();
34+
if (HaystackSize < N + Offset)
35+
return StringRef::npos;
36+
37+
const char *NeedleStart = Needle.data();
38+
const char *Current = HaystackStart + Offset;
39+
const char *End = HaystackStart + HaystackSize;
40+
41+
while (Current + N <= End) {
42+
// Stage 1: Find the first character of the needle.
43+
Current = (const char *)::memchr(Current, NeedleStart[0], End - Current);
44+
if (!Current || Current + N > End)
45+
return StringRef::npos;
46+
47+
// Stage 2: Validate the rest of the sequence.
48+
if (N == 1)
49+
return Current - HaystackStart;
50+
if (N == 2 && Current[1] == NeedleStart[1])
51+
return Current - HaystackStart;
52+
if (N == 3 && Current[1] == NeedleStart[1] && Current[2] == NeedleStart[2])
53+
return Current - HaystackStart;
54+
55+
// Mismatch, advance and continue the search.
56+
++Current;
57+
}
58+
59+
return StringRef::npos;
60+
}
61+
62+
63+
2064
static bool isFalsey(const json::Value &V) {
2165
return V.getAsNull() || (V.getAsBoolean() && !V.getAsBoolean().value()) ||
2266
(V.getAsArray() && V.getAsArray()->empty());
@@ -306,15 +350,18 @@ SmallVector<Token> tokenize(StringRef Template) {
306350
StringLiteral Open("{{");
307351
StringLiteral Close("}}");
308352
size_t Start = 0;
309-
size_t DelimiterStart = Template.find(Open);
353+
// size_t DelimiterStart = Template.find(Open);
354+
size_t DelimiterStart = findDelimiters(Template, Open);
355+
310356
if (DelimiterStart == StringRef::npos) {
311357
Tokens.emplace_back(Template.str());
312358
return Tokens;
313359
}
314360
while (DelimiterStart != StringRef::npos) {
315361
if (DelimiterStart != Start)
316362
Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start).str());
317-
size_t DelimiterEnd = Template.find(Close, DelimiterStart);
363+
// size_t DelimiterEnd = Template.find(Close, DelimiterStart);
364+
size_t DelimiterEnd = findDelimiters(Template, Close, DelimiterStart);
318365
if (DelimiterEnd == StringRef::npos)
319366
break;
320367

@@ -326,7 +373,8 @@ SmallVector<Token> tokenize(StringRef Template) {
326373
std::string RawBody = Open.str() + Interpolated + Close.str();
327374
Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]);
328375
Start = DelimiterEnd + Close.size();
329-
DelimiterStart = Template.find(Open, Start);
376+
// DelimiterStart = Template.find(Open, Start);
377+
DelimiterStart = findDelimiters(Template, Open, Start);
330378
}
331379

332380
if (Start < Template.size())
@@ -572,6 +620,8 @@ void ASTNode::render(const json::Value &CurrentCtx, raw_ostream &OS) {
572620
ParentContext = &CurrentCtx;
573621
const json::Value *ContextPtr = Ty == Root ? ParentContext : findContext();
574622

623+
if (AccessorValue.empty() && (Ty != Root && Ty != Text))
624+
return;
575625
switch (Ty) {
576626
case Root:
577627
renderChild(CurrentCtx, OS);

0 commit comments

Comments
 (0)