Skip to content
Merged
22 changes: 15 additions & 7 deletions llvm/include/llvm/Support/GlobPattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,30 @@ class GlobPattern {
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
bool isTrivialMatchAll() const {
if (!Prefix.empty())
if (PrefixSize)
return false;
if (!Suffix.empty())
if (SuffixSize)
return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}

StringRef prefix() const { return Prefix; }
StringRef suffix() const { return Suffix; }
// The following functions are just shortcuts for faster matching. They are
// conservative to simplify implementations.

private:
StringRef Prefix;
StringRef Suffix;
// Returns plain prefix of the pattern.
StringRef prefix() const { return Pattern.take_front(PrefixSize); }
// Returns plain suffix of the pattern.
StringRef suffix() const { return Pattern.take_back(SuffixSize); }
// Returns the longest plain substring of the pattern between prefix and
// suffix.
StringRef longest_substr() const;

private:
StringRef Pattern;
size_t PrefixSize = 0;
size_t SuffixSize = 0;
struct SubGlobPattern {
/// \param Pat the pattern to match against
LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
Expand Down
67 changes: 59 additions & 8 deletions llvm/lib/Support/GlobPattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,70 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}

static StringRef maxPlainSubstring(StringRef S) {
StringRef Best;
while (!S.empty()) {
size_t PrefixSize = S.find_first_of("?*[{\\");
if (PrefixSize == std::string::npos)
PrefixSize = S.size();

if (Best.size() < PrefixSize)
Best = S.take_front(PrefixSize);

S = S.drop_front(PrefixSize);

// It's impossible, as the first and last characters of the input string
// must be Glob special characters, otherwise they would be parts of
// the prefix or the suffix.
assert(!S.empty());

switch (S.front()) {
case '\\':
S = S.drop_front(2);
break;
case '[': {
// Drop '[' and the first character which can be ']'.
S = S.drop_front(2);
size_t EndBracket = S.find_first_of("]");
// Should not be possible, SubGlobPattern::create should fail on invalid
// pattern before we get here.
assert(EndBracket != std::string::npos);
S = S.drop_front(EndBracket + 1);
break;
}
case '{':
// TODO: implement.
// Fallback to whatever is best for now.
return Best;
default:
S = S.drop_front(1);
}
}

return Best;
}

Expected<GlobPattern>
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
Pat.Pattern = S;

// Store the prefix that does not contain any metacharacter.
size_t PrefixSize = S.find_first_of("?*[{\\");
Pat.Prefix = S.substr(0, PrefixSize);
if (PrefixSize == std::string::npos)
Pat.PrefixSize = S.find_first_of("?*[{\\");
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
S = S.substr(PrefixSize);
}
S = S.substr(Pat.PrefixSize);

// Just in case we stop on unmatched opening brackets.
size_t SuffixStart = S.find_last_of("?*[]{}\\");
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
++SuffixStart;
Pat.Suffix = S.substr(SuffixStart);
if (SuffixStart < S.size())
++SuffixStart;
Pat.SuffixSize = S.size() - SuffixStart;
S = S.substr(0, SuffixStart);

SmallVector<std::string, 1> SubPats;
Expand Down Expand Up @@ -199,10 +245,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
return Pat;
}

StringRef GlobPattern::longest_substr() const {
return maxPlainSubstring(
Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
}

bool GlobPattern::match(StringRef S) const {
if (!S.consume_front(Prefix))
if (!S.consume_front(prefix()))
return false;
if (!S.consume_back(Suffix))
if (!S.consume_back(suffix()))
return false;
if (SubGlobs.empty() && S.empty())
return true;
Expand Down
66 changes: 66 additions & 0 deletions llvm/unittests/Support/GlobPatternTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,72 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
EXPECT_EQ("cd", Pat->suffix());
}

TEST_F(GlobPatternTest, Substr) {
auto Pat = GlobPattern::create("");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->longest_substr());

Pat = GlobPattern::create("abcd");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->longest_substr());

Pat = GlobPattern::create("a*bcd");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->longest_substr());

Pat = GlobPattern::create("*abcd");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->longest_substr());

Pat = GlobPattern::create("abcd*");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->longest_substr());

Pat = GlobPattern::create("a*bc*d");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bc", Pat->longest_substr());

Pat = GlobPattern::create("a*bc*def*g");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("def", Pat->longest_substr());

Pat = GlobPattern::create("a*bcd*ef*g");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcd", Pat->longest_substr());

Pat = GlobPattern::create("a*bcd*efg*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcd", Pat->longest_substr());

Pat = GlobPattern::create("a*bcd[ef]g*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcd", Pat->longest_substr());

Pat = GlobPattern::create("a*bc[d]efg*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("efg", Pat->longest_substr());

Pat = GlobPattern::create("a*bc[]]efg*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("efg", Pat->longest_substr());

Pat = GlobPattern::create("a*bcde\\fg*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcde", Pat->longest_substr());

Pat = GlobPattern::create("a*bcde\\[fg*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcde", Pat->longest_substr());

Pat = GlobPattern::create("a*bcde?fg*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcde", Pat->longest_substr());

Pat = GlobPattern::create("a*bcdef{g}*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcdef", Pat->longest_substr());
}

TEST_F(GlobPatternTest, Pathological) {
std::string P, S(40, 'a');
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
Expand Down
Loading