Skip to content

Commit 6fdef0b

Browse files
vitalybukalukel97
andauthored
[NFC][GlobPattern] Add GlobPattern::longest_substr() (#164512)
Finds longest (almost) plain substring in the pattern. Implementation is conservative to avoid false positives. The result is not used to optimize `GlobPattern::match()` so it's calculated on request. For * #164545 --------- Co-authored-by: Luke Lau <[email protected]>
1 parent 3c2dae6 commit 6fdef0b

File tree

3 files changed

+117
-0
lines changed

3 files changed

+117
-0
lines changed

llvm/include/llvm/Support/GlobPattern.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ class GlobPattern {
7979
StringRef prefix() const { return Pattern.take_front(PrefixSize); }
8080
// Returns plain suffix of the pattern.
8181
StringRef suffix() const { return Pattern.take_back(SuffixSize); }
82+
// Returns the longest plain substring of the pattern between prefix and
83+
// suffix.
84+
StringRef longest_substr() const;
8285

8386
private:
8487
StringRef Pattern;

llvm/lib/Support/GlobPattern.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,49 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
132132
return std::move(SubPatterns);
133133
}
134134

135+
static StringRef maxPlainSubstring(StringRef S) {
136+
StringRef Best;
137+
while (!S.empty()) {
138+
size_t PrefixSize = S.find_first_of("?*[{\\");
139+
if (PrefixSize == std::string::npos)
140+
PrefixSize = S.size();
141+
142+
if (Best.size() < PrefixSize)
143+
Best = S.take_front(PrefixSize);
144+
145+
S = S.drop_front(PrefixSize);
146+
147+
// It's impossible, as the first and last characters of the input string
148+
// must be Glob special characters, otherwise they would be parts of
149+
// the prefix or the suffix.
150+
assert(!S.empty());
151+
152+
switch (S.front()) {
153+
case '\\':
154+
S = S.drop_front(2);
155+
break;
156+
case '[': {
157+
// Drop '[' and the first character which can be ']'.
158+
S = S.drop_front(2);
159+
size_t EndBracket = S.find_first_of("]");
160+
// Should not be possible, SubGlobPattern::create should fail on invalid
161+
// pattern before we get here.
162+
assert(EndBracket != std::string::npos);
163+
S = S.drop_front(EndBracket + 1);
164+
break;
165+
}
166+
case '{':
167+
// TODO: implement.
168+
// Fallback to whatever is best for now.
169+
return Best;
170+
default:
171+
S = S.drop_front(1);
172+
}
173+
}
174+
175+
return Best;
176+
}
177+
135178
Expected<GlobPattern>
136179
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
137180
GlobPattern Pat;
@@ -202,6 +245,11 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
202245
return Pat;
203246
}
204247

248+
StringRef GlobPattern::longest_substr() const {
249+
return maxPlainSubstring(
250+
Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
251+
}
252+
205253
bool GlobPattern::match(StringRef S) const {
206254
if (!S.consume_front(prefix()))
207255
return false;

llvm/unittests/Support/GlobPatternTest.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,72 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
329329
EXPECT_EQ("cd", Pat->suffix());
330330
}
331331

332+
TEST_F(GlobPatternTest, Substr) {
333+
auto Pat = GlobPattern::create("");
334+
ASSERT_TRUE((bool)Pat);
335+
EXPECT_EQ("", Pat->longest_substr());
336+
337+
Pat = GlobPattern::create("abcd");
338+
ASSERT_TRUE((bool)Pat);
339+
EXPECT_EQ("", Pat->longest_substr());
340+
341+
Pat = GlobPattern::create("a*bcd");
342+
ASSERT_TRUE((bool)Pat);
343+
EXPECT_EQ("", Pat->longest_substr());
344+
345+
Pat = GlobPattern::create("*abcd");
346+
ASSERT_TRUE((bool)Pat);
347+
EXPECT_EQ("", Pat->longest_substr());
348+
349+
Pat = GlobPattern::create("abcd*");
350+
ASSERT_TRUE((bool)Pat);
351+
EXPECT_EQ("", Pat->longest_substr());
352+
353+
Pat = GlobPattern::create("a*bc*d");
354+
ASSERT_TRUE((bool)Pat);
355+
EXPECT_EQ("bc", Pat->longest_substr());
356+
357+
Pat = GlobPattern::create("a*bc*def*g");
358+
ASSERT_TRUE((bool)Pat);
359+
EXPECT_EQ("def", Pat->longest_substr());
360+
361+
Pat = GlobPattern::create("a*bcd*ef*g");
362+
ASSERT_TRUE((bool)Pat);
363+
EXPECT_EQ("bcd", Pat->longest_substr());
364+
365+
Pat = GlobPattern::create("a*bcd*efg*h");
366+
ASSERT_TRUE((bool)Pat);
367+
EXPECT_EQ("bcd", Pat->longest_substr());
368+
369+
Pat = GlobPattern::create("a*bcd[ef]g*h");
370+
ASSERT_TRUE((bool)Pat);
371+
EXPECT_EQ("bcd", Pat->longest_substr());
372+
373+
Pat = GlobPattern::create("a*bc[d]efg*h");
374+
ASSERT_TRUE((bool)Pat);
375+
EXPECT_EQ("efg", Pat->longest_substr());
376+
377+
Pat = GlobPattern::create("a*bc[]]efg*h");
378+
ASSERT_TRUE((bool)Pat);
379+
EXPECT_EQ("efg", Pat->longest_substr());
380+
381+
Pat = GlobPattern::create("a*bcde\\fg*h");
382+
ASSERT_TRUE((bool)Pat);
383+
EXPECT_EQ("bcde", Pat->longest_substr());
384+
385+
Pat = GlobPattern::create("a*bcde\\[fg*h");
386+
ASSERT_TRUE((bool)Pat);
387+
EXPECT_EQ("bcde", Pat->longest_substr());
388+
389+
Pat = GlobPattern::create("a*bcde?fg*h");
390+
ASSERT_TRUE((bool)Pat);
391+
EXPECT_EQ("bcde", Pat->longest_substr());
392+
393+
Pat = GlobPattern::create("a*bcdef{g}*h");
394+
ASSERT_TRUE((bool)Pat);
395+
EXPECT_EQ("bcdef", Pat->longest_substr());
396+
}
397+
332398
TEST_F(GlobPatternTest, Pathological) {
333399
std::string P, S(40, 'a');
334400
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};

0 commit comments

Comments
 (0)