Skip to content

Commit 21f467e

Browse files
vitalybukaCopilot
authored andcommitted
[SpecialCaseList] Filtering Globs with matching prefix (llvm#164531)
This commit optimizes `SpecialCaseList` by using a `RadixTree` to filter glob patterns based on their prefixes. When matching a query, the `RadixTree` quickly identifies all glob patterns whose prefixes match the query's prefix. This significantly reduces the number of glob patterns that need to be fully evaluated, leading to performance improvements, especially when dealing with a large number of patterns. According to SpecialCaseListBM: Lookup benchmarks (significant improvements): ``` OVERALL_GEOMEAN -0.8177 ``` Lookup like `prefix*` benchmarks (huge improvements): ``` OVERALL_GEOMEAN -0.9819 ``` https://gist.github.com/vitalybuka/824884bcbc1713e815068c279159dafe --------- Co-authored-by: Copilot <[email protected]>
1 parent 22937a9 commit 21f467e

File tree

2 files changed

+28
-3
lines changed

2 files changed

+28
-3
lines changed

llvm/include/llvm/Support/SpecialCaseList.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
#define LLVM_SUPPORT_SPECIALCASELIST_H
1414

1515
#include "llvm/ADT/ArrayRef.h"
16+
#include "llvm/ADT/RadixTree.h"
17+
#include "llvm/ADT/SmallVector.h"
1618
#include "llvm/ADT/StringMap.h"
19+
#include "llvm/ADT/iterator_range.h"
1720
#include "llvm/Support/Allocator.h"
1821
#include "llvm/Support/Compiler.h"
1922
#include "llvm/Support/GlobPattern.h"
@@ -162,6 +165,10 @@ class SpecialCaseList {
162165
};
163166

164167
std::vector<GlobMatcher::Glob> Globs;
168+
169+
RadixTree<iterator_range<StringRef::const_iterator>,
170+
SmallVector<const GlobMatcher::Glob *, 1>>
171+
PrefixToGlob;
165172
};
166173

167174
/// Represents a set of patterns and their line numbers

llvm/lib/Support/SpecialCaseList.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,32 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
8989
return A.Name.size() < B.Name.size();
9090
});
9191
}
92+
93+
for (const auto &G : reverse(Globs)) {
94+
StringRef Prefix = G.Pattern.prefix();
95+
96+
auto &V = PrefixToGlob.emplace(Prefix).first->second;
97+
V.emplace_back(&G);
98+
}
9299
}
93100

94101
void SpecialCaseList::GlobMatcher::match(
95102
StringRef Query,
96103
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
97-
for (const auto &G : reverse(Globs))
98-
if (G.Pattern.match(Query))
99-
return Cb(G.Name, G.LineNo);
104+
if (!PrefixToGlob.empty()) {
105+
for (const auto &[_, V] : PrefixToGlob.find_prefixes(Query)) {
106+
for (const auto *G : V) {
107+
if (G->Pattern.match(Query)) {
108+
Cb(G->Name, G->LineNo);
109+
// As soon as we find a match in the vector, we can break for this
110+
// vector, since the globs are already sorted by priority within the
111+
// prefix group. However, we continue searching other prefix groups in
112+
// the map, as they may contain a better match overall.
113+
break;
114+
}
115+
}
116+
}
117+
}
100118
}
101119

102120
SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)

0 commit comments

Comments
 (0)