Skip to content

Commit 5b24b55

Browse files
authored
[Support] Extract simple suffix from GlobPattern (#162118)
Existing glob is optimized with prefix for "src:/dir1/dir2/*", but I notices we often use patterns like "src:*dir1/dir2/file.h". So suffix will help. It will be hard to notice in most cases, but I use ignore list to bisect some falures. E.g. put 100k entries in the file, and build/test as needed. On one of hard compilation units glob matching was 400s, after the change 20s. Still, there is higher level inefficiency in ignore list matching, which I will address in followup patches and remove 20s above.
1 parent de9b3ca commit 5b24b55

File tree

3 files changed

+89
-0
lines changed

3 files changed

+89
-0
lines changed

llvm/include/llvm/Support/GlobPattern.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,19 @@ class GlobPattern {
6565
bool isTrivialMatchAll() const {
6666
if (!Prefix.empty())
6767
return false;
68+
if (!Suffix.empty())
69+
return false;
6870
if (SubGlobs.size() != 1)
6971
return false;
7072
return SubGlobs[0].getPat() == "*";
7173
}
7274

75+
StringRef prefix() const { return Prefix; }
76+
StringRef suffix() const { return Suffix; }
77+
7378
private:
7479
StringRef Prefix;
80+
StringRef Suffix;
7581

7682
struct SubGlobPattern {
7783
/// \param Pat the pattern to match against

llvm/lib/Support/GlobPattern.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,15 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
143143
return Pat;
144144
S = S.substr(PrefixSize);
145145

146+
// Just in case we stop on unmatched opening brackets.
147+
size_t SuffixStart = S.find_last_of("?*[]{}\\");
148+
assert(SuffixStart != std::string::npos);
149+
if (S[SuffixStart] == '\\')
150+
++SuffixStart;
151+
++SuffixStart;
152+
Pat.Suffix = S.substr(SuffixStart);
153+
S = S.substr(0, SuffixStart);
154+
146155
SmallVector<std::string, 1> SubPats;
147156
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
148157
return std::move(Err);
@@ -193,6 +202,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
193202
bool GlobPattern::match(StringRef S) const {
194203
if (!S.consume_front(Prefix))
195204
return false;
205+
if (!S.consume_back(Suffix))
206+
return false;
196207
if (SubGlobs.empty() && S.empty())
197208
return true;
198209
for (auto &Glob : SubGlobs)

llvm/unittests/Support/GlobPatternTest.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,78 @@ TEST_F(GlobPatternTest, NUL) {
257257
}
258258
}
259259

260+
TEST_F(GlobPatternTest, PrefixSuffix) {
261+
auto Pat = GlobPattern::create("");
262+
ASSERT_TRUE((bool)Pat);
263+
EXPECT_EQ("", Pat->prefix());
264+
EXPECT_EQ("", Pat->suffix());
265+
266+
Pat = GlobPattern::create("abcd");
267+
ASSERT_TRUE((bool)Pat);
268+
EXPECT_EQ("abcd", Pat->prefix());
269+
EXPECT_EQ("", Pat->suffix());
270+
271+
Pat = GlobPattern::create("*abcd");
272+
ASSERT_TRUE((bool)Pat);
273+
EXPECT_EQ("", Pat->prefix());
274+
EXPECT_EQ("abcd", Pat->suffix());
275+
276+
Pat = GlobPattern::create("abcd*");
277+
ASSERT_TRUE((bool)Pat);
278+
EXPECT_EQ("abcd", Pat->prefix());
279+
EXPECT_EQ("", Pat->suffix());
280+
281+
Pat = GlobPattern::create("ab*cd");
282+
ASSERT_TRUE((bool)Pat);
283+
EXPECT_EQ("ab", Pat->prefix());
284+
EXPECT_EQ("cd", Pat->suffix());
285+
286+
Pat = GlobPattern::create("ab?cd");
287+
ASSERT_TRUE((bool)Pat);
288+
EXPECT_EQ("ab", Pat->prefix());
289+
EXPECT_EQ("cd", Pat->suffix());
290+
291+
Pat = GlobPattern::create("ab[n]cd");
292+
ASSERT_TRUE((bool)Pat);
293+
EXPECT_EQ("ab", Pat->prefix());
294+
EXPECT_EQ("cd", Pat->suffix());
295+
296+
Pat = GlobPattern::create("ab{}cd");
297+
ASSERT_TRUE((bool)Pat);
298+
EXPECT_EQ("ab", Pat->prefix());
299+
EXPECT_EQ("cd", Pat->suffix());
300+
301+
Pat = GlobPattern::create("ab{cd");
302+
ASSERT_TRUE((bool)Pat);
303+
EXPECT_EQ("ab", Pat->prefix());
304+
EXPECT_EQ("cd", Pat->suffix());
305+
306+
Pat = GlobPattern::create("ab]cd");
307+
ASSERT_TRUE((bool)Pat);
308+
EXPECT_EQ("ab]cd", Pat->prefix());
309+
EXPECT_EQ("", Pat->suffix());
310+
311+
Pat = GlobPattern::create("ab\\cd");
312+
ASSERT_TRUE((bool)Pat);
313+
EXPECT_EQ("ab", Pat->prefix());
314+
EXPECT_EQ("d", Pat->suffix());
315+
316+
Pat = GlobPattern::create("ab\\\\cd");
317+
ASSERT_TRUE((bool)Pat);
318+
EXPECT_EQ("ab", Pat->prefix());
319+
EXPECT_EQ("d", Pat->suffix());
320+
321+
Pat = GlobPattern::create("ab?cd?");
322+
ASSERT_TRUE((bool)Pat);
323+
EXPECT_EQ("ab", Pat->prefix());
324+
EXPECT_EQ("", Pat->suffix());
325+
326+
Pat = GlobPattern::create("?ab?cd");
327+
ASSERT_TRUE((bool)Pat);
328+
EXPECT_EQ("", Pat->prefix());
329+
EXPECT_EQ("cd", Pat->suffix());
330+
}
331+
260332
TEST_F(GlobPatternTest, Pathological) {
261333
std::string P, S(40, 'a');
262334
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};

0 commit comments

Comments
 (0)