Skip to content

Commit 5080bb3

Browse files
vitalybukaLukacma
authored andcommitted
[SpecialCaseList] Add RadixTree for substring matching (llvm#164545)
This commit adds a new RadixTree to `SpecialCaseList` for handling substring matches. Previously, `SpecialCaseList` only supported prefix and suffix matching. With this change, patterns that have neither prefixes nor suffixes can now be efficiently filtered. According to SpecialCaseListBM: Lookup benchmarks (significant improvements): ``` OVERALL_GEOMEAN -0.7809 ``` Lookup `*test*` like benchmarks (huge improvements): ``` OVERALL_GEOMEAN -0.9947 ``` https://gist.github.com/vitalybuka/ee7f681b448eb18974386ab35e2d4d27
1 parent 2022416 commit 5080bb3

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

llvm/include/llvm/Support/SpecialCaseList.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ class SpecialCaseList {
170170
RadixTree<iterator_range<StringRef::const_reverse_iterator>,
171171
SmallVector<const GlobMatcher::Glob *, 1>>>
172172
PrefixSuffixToGlob;
173+
174+
RadixTree<iterator_range<StringRef::const_iterator>,
175+
SmallVector<const GlobMatcher::Glob *, 1>>
176+
SubstrToGlob;
173177
};
174178

175179
/// Represents a set of patterns and their line numbers

llvm/lib/Support/SpecialCaseList.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,19 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
9494
StringRef Prefix = G.Pattern.prefix();
9595
StringRef Suffix = G.Pattern.suffix();
9696

97+
if (Suffix.empty() && Prefix.empty()) {
98+
// If both prefix and suffix are empty put into special tree to search by
99+
// substring in a middle.
100+
StringRef Substr = G.Pattern.longest_substr();
101+
if (!Substr.empty()) {
102+
// But only if substring is not empty. Searching this tree is more
103+
// expensive.
104+
auto &V = SubstrToGlob.emplace(Substr).first->second;
105+
V.emplace_back(&G);
106+
continue;
107+
}
108+
}
109+
97110
auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
98111
auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
99112
V.emplace_back(&G);
@@ -119,6 +132,25 @@ void SpecialCaseList::GlobMatcher::match(
119132
}
120133
}
121134
}
135+
136+
if (!SubstrToGlob.empty()) {
137+
// As we don't know when substring exactly starts, we will try all
138+
// possibilities. In most cases search will fail on first characters.
139+
for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
140+
for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
141+
for (const auto *G : V) {
142+
if (G->Pattern.match(Query)) {
143+
Cb(G->Name, G->LineNo);
144+
// As soon as we find a match in the vector, we can break for this
145+
// vector, since the globs are already sorted by priority within the
146+
// prefix group. However, we continue searching other prefix groups
147+
// in the map, as they may contain a better match overall.
148+
break;
149+
}
150+
}
151+
}
152+
}
153+
}
122154
}
123155

124156
SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)

0 commit comments

Comments
 (0)