From 65427808c03811ef488d96c076860a9eb59ccd67 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 15:55:20 -0700 Subject: [PATCH 1/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6 --- llvm/include/llvm/Support/GlobPattern.h | 22 +++++--- llvm/lib/Support/GlobPattern.cpp | 57 ++++++++++++++++++--- llvm/unittests/Support/GlobPatternTest.cpp | 58 ++++++++++++++++++++++ 3 files changed, 122 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index c1b44849b9794..4824f3fa01e5b 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -63,22 +63,30 @@ class GlobPattern { // Returns true for glob pattern "*". Can be used to avoid expensive // preparation/acquisition of the input for match(). bool isTrivialMatchAll() const { - if (!Prefix.empty()) + if (PrefixSize) return false; - if (!Suffix.empty()) + if (SuffixSize) return false; if (SubGlobs.size() != 1) return false; return SubGlobs[0].getPat() == "*"; } - StringRef prefix() const { return Prefix; } - StringRef suffix() const { return Suffix; } + // The followind functions are as shortcuts to some matching. They are + // conservative to simplify implementations. -private: - StringRef Prefix; - StringRef Suffix; + // Returns plain prefix of the pattern. + StringRef prefix() const { return Pattern.take_front(PrefixSize); } + // Returns plain suffix of the pattern. + StringRef suffix() const { return Pattern.take_back(SuffixSize); } + // Returns the longest plain substring of the pattern between of prefix and + // suffix. + StringRef longest_substr() const; +private: + StringRef Pattern; + size_t PrefixSize = 0; + size_t SuffixSize = 0; struct SubGlobPattern { /// \param Pat the pattern to match against LLVM_ABI static Expected create(StringRef Pat); diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 0ecf47dc1d3d1..dfc1508ce63af 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -132,24 +132,60 @@ parseBraceExpansions(StringRef S, std::optional MaxSubPatterns) { return std::move(SubPatterns); } +static StringRef maxPlainSubstring(StringRef S) { + StringRef R; + while (!S.empty()) { + size_t PrefixSize = S.find_first_of("?*[{\\"); + if (PrefixSize == std::string::npos) + PrefixSize = S.size(); + + if (R.size() < PrefixSize) + R = S.take_front(PrefixSize); + S = S.drop_front(PrefixSize); + + switch (S.front()) { + case '\\': + S = S.drop_front(2); + break; + case '[': { + size_t EndBracket = S.find_first_of("]"); + if (EndBracket == std::string::npos) + return R; // Incorrect, but let SubGlobPattern::create handle it. + S = S.drop_front(EndBracket + 1); + break; + } + case '{': + // TODO: implement. + return {}; + default: + S = S.drop_front(1); + } + } + + return R; +} + Expected GlobPattern::create(StringRef S, std::optional MaxSubPatterns) { GlobPattern Pat; + Pat.Pattern = S; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); - Pat.Prefix = S.substr(0, PrefixSize); - if (PrefixSize == std::string::npos) + Pat.PrefixSize = S.find_first_of("?*[{\\"); + if (Pat.PrefixSize == std::string::npos) { + Pat.PrefixSize = S.size(); return Pat; - S = S.substr(PrefixSize); + } + S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. size_t SuffixStart = S.find_last_of("?*[]{}\\"); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; - ++SuffixStart; - Pat.Suffix = S.substr(SuffixStart); + if (SuffixStart < S.size()) + ++SuffixStart; + Pat.SuffixSize = S.size() - SuffixStart; S = S.substr(0, SuffixStart); SmallVector SubPats; @@ -199,10 +235,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) { return Pat; } +StringRef GlobPattern::longest_substr() const { + return maxPlainSubstring( + Pattern.drop_front(PrefixSize).drop_back(SuffixSize)); +} + bool GlobPattern::match(StringRef S) const { - if (!S.consume_front(Prefix)) + if (!S.consume_front(prefix())) return false; - if (!S.consume_back(Suffix)) + if (!S.consume_back(suffix())) return false; if (SubGlobs.empty() && S.empty()) return true; diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index 58fd7678131c6..a0e0d1415f383 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -329,6 +329,64 @@ TEST_F(GlobPatternTest, PrefixSuffix) { EXPECT_EQ("cd", Pat->suffix()); } +TEST_F(GlobPatternTest, Substr) { + auto Pat = GlobPattern::create(""); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("abcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("*abcd"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("abcd*"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc*d"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bc", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bc*def*g"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("def", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd*ef*g"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd*efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcd[ef]g*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcd", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde\\fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde\\[fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcde?fg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("bcde", Pat->longest_substr()); + + Pat = GlobPattern::create("a*bcdef{g}*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("", Pat->longest_substr()); +} + TEST_F(GlobPatternTest, Pathological) { std::string P, S(40, 'a'); StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"}; From 8a3b8b6b84aa01cc1668ac07deb737f8800588c8 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 16:10:05 -0700 Subject: [PATCH 2/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6 [skip ci] --- llvm/include/llvm/Support/GlobPattern.h | 18 ++++++++++++------ llvm/lib/Support/GlobPattern.cpp | 19 +++++++++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index c1b44849b9794..6ebf64565559b 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -63,21 +63,27 @@ class GlobPattern { // Returns true for glob pattern "*". Can be used to avoid expensive // preparation/acquisition of the input for match(). bool isTrivialMatchAll() const { - if (!Prefix.empty()) + if (PrefixSize) return false; - if (!Suffix.empty()) + if (SuffixSize) return false; if (SubGlobs.size() != 1) return false; return SubGlobs[0].getPat() == "*"; } - StringRef prefix() const { return Prefix; } - StringRef suffix() const { return Suffix; } + // The following functions are just shortcuts for faster matching. They are + // conservative to simplify implementations. + + // Returns plain prefix of the pattern. + StringRef prefix() const { return Pattern.take_front(PrefixSize); } + // Returns plain suffix of the pattern. + StringRef suffix() const { return Pattern.take_back(SuffixSize); } private: - StringRef Prefix; - StringRef Suffix; + StringRef Pattern; + size_t PrefixSize = 0; + size_t SuffixSize = 0; struct SubGlobPattern { /// \param Pat the pattern to match against diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 0ecf47dc1d3d1..f56a8fcf4bf9d 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -135,21 +135,24 @@ parseBraceExpansions(StringRef S, std::optional MaxSubPatterns) { Expected GlobPattern::create(StringRef S, std::optional MaxSubPatterns) { GlobPattern Pat; + Pat.Pattern = S; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); - Pat.Prefix = S.substr(0, PrefixSize); - if (PrefixSize == std::string::npos) + Pat.PrefixSize = S.find_first_of("?*[{\\"); + if (Pat.PrefixSize == std::string::npos) { + Pat.PrefixSize = S.size(); return Pat; - S = S.substr(PrefixSize); + } + S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. size_t SuffixStart = S.find_last_of("?*[]{}\\"); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; - ++SuffixStart; - Pat.Suffix = S.substr(SuffixStart); + if (SuffixStart < S.size()) + ++SuffixStart; + Pat.SuffixSize = S.size() - SuffixStart; S = S.substr(0, SuffixStart); SmallVector SubPats; @@ -200,9 +203,9 @@ GlobPattern::SubGlobPattern::create(StringRef S) { } bool GlobPattern::match(StringRef S) const { - if (!S.consume_front(Prefix)) + if (!S.consume_front(prefix())) return false; - if (!S.consume_back(Suffix)) + if (!S.consume_back(suffix())) return false; if (SubGlobs.empty() && S.empty()) return true; From 260c56f38d95ca1678402399955dea8e11d8cd89 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 16:42:08 -0700 Subject: [PATCH 3/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6 [skip ci] --- llvm/include/llvm/Support/GlobPattern.h | 18 ++++++++++++------ llvm/lib/Support/GlobPattern.cpp | 19 +++++++++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index c1b44849b9794..6ebf64565559b 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -63,21 +63,27 @@ class GlobPattern { // Returns true for glob pattern "*". Can be used to avoid expensive // preparation/acquisition of the input for match(). bool isTrivialMatchAll() const { - if (!Prefix.empty()) + if (PrefixSize) return false; - if (!Suffix.empty()) + if (SuffixSize) return false; if (SubGlobs.size() != 1) return false; return SubGlobs[0].getPat() == "*"; } - StringRef prefix() const { return Prefix; } - StringRef suffix() const { return Suffix; } + // The following functions are just shortcuts for faster matching. They are + // conservative to simplify implementations. + + // Returns plain prefix of the pattern. + StringRef prefix() const { return Pattern.take_front(PrefixSize); } + // Returns plain suffix of the pattern. + StringRef suffix() const { return Pattern.take_back(SuffixSize); } private: - StringRef Prefix; - StringRef Suffix; + StringRef Pattern; + size_t PrefixSize = 0; + size_t SuffixSize = 0; struct SubGlobPattern { /// \param Pat the pattern to match against diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 0ecf47dc1d3d1..f56a8fcf4bf9d 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -135,21 +135,24 @@ parseBraceExpansions(StringRef S, std::optional MaxSubPatterns) { Expected GlobPattern::create(StringRef S, std::optional MaxSubPatterns) { GlobPattern Pat; + Pat.Pattern = S; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); - Pat.Prefix = S.substr(0, PrefixSize); - if (PrefixSize == std::string::npos) + Pat.PrefixSize = S.find_first_of("?*[{\\"); + if (Pat.PrefixSize == std::string::npos) { + Pat.PrefixSize = S.size(); return Pat; - S = S.substr(PrefixSize); + } + S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. size_t SuffixStart = S.find_last_of("?*[]{}\\"); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; - ++SuffixStart; - Pat.Suffix = S.substr(SuffixStart); + if (SuffixStart < S.size()) + ++SuffixStart; + Pat.SuffixSize = S.size() - SuffixStart; S = S.substr(0, SuffixStart); SmallVector SubPats; @@ -200,9 +203,9 @@ GlobPattern::SubGlobPattern::create(StringRef S) { } bool GlobPattern::match(StringRef S) const { - if (!S.consume_front(Prefix)) + if (!S.consume_front(prefix())) return false; - if (!S.consume_back(Suffix)) + if (!S.consume_back(suffix())) return false; if (SubGlobs.empty() && S.empty()) return true; From 83248227e1fa44ca3830d0203b7b674e97bfcb54 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 16:48:16 -0700 Subject: [PATCH 4/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 [skip ci] --- llvm/include/llvm/Support/GlobPattern.h | 18 ++++++++++++------ llvm/lib/Support/GlobPattern.cpp | 19 +++++++++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index c1b44849b9794..6ebf64565559b 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -63,21 +63,27 @@ class GlobPattern { // Returns true for glob pattern "*". Can be used to avoid expensive // preparation/acquisition of the input for match(). bool isTrivialMatchAll() const { - if (!Prefix.empty()) + if (PrefixSize) return false; - if (!Suffix.empty()) + if (SuffixSize) return false; if (SubGlobs.size() != 1) return false; return SubGlobs[0].getPat() == "*"; } - StringRef prefix() const { return Prefix; } - StringRef suffix() const { return Suffix; } + // The following functions are just shortcuts for faster matching. They are + // conservative to simplify implementations. + + // Returns plain prefix of the pattern. + StringRef prefix() const { return Pattern.take_front(PrefixSize); } + // Returns plain suffix of the pattern. + StringRef suffix() const { return Pattern.take_back(SuffixSize); } private: - StringRef Prefix; - StringRef Suffix; + StringRef Pattern; + size_t PrefixSize = 0; + size_t SuffixSize = 0; struct SubGlobPattern { /// \param Pat the pattern to match against diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 0ecf47dc1d3d1..f56a8fcf4bf9d 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -135,21 +135,24 @@ parseBraceExpansions(StringRef S, std::optional MaxSubPatterns) { Expected GlobPattern::create(StringRef S, std::optional MaxSubPatterns) { GlobPattern Pat; + Pat.Pattern = S; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); - Pat.Prefix = S.substr(0, PrefixSize); - if (PrefixSize == std::string::npos) + Pat.PrefixSize = S.find_first_of("?*[{\\"); + if (Pat.PrefixSize == std::string::npos) { + Pat.PrefixSize = S.size(); return Pat; - S = S.substr(PrefixSize); + } + S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. size_t SuffixStart = S.find_last_of("?*[]{}\\"); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; - ++SuffixStart; - Pat.Suffix = S.substr(SuffixStart); + if (SuffixStart < S.size()) + ++SuffixStart; + Pat.SuffixSize = S.size() - SuffixStart; S = S.substr(0, SuffixStart); SmallVector SubPats; @@ -200,9 +203,9 @@ GlobPattern::SubGlobPattern::create(StringRef S) { } bool GlobPattern::match(StringRef S) const { - if (!S.consume_front(Prefix)) + if (!S.consume_front(prefix())) return false; - if (!S.consume_back(Suffix)) + if (!S.consume_back(suffix())) return false; if (SubGlobs.empty() && S.empty()) return true; From d30cbdfbea542d82d7d0ec33a378763772e6b954 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 16:53:50 -0700 Subject: [PATCH 5/8] another test Created using spr 1.3.7 --- llvm/unittests/Support/GlobPatternTest.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index 5c38f84669c89..872a21e948d7a 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -374,6 +374,10 @@ TEST_F(GlobPatternTest, Substr) { ASSERT_TRUE((bool)Pat); EXPECT_EQ("efg", Pat->longest_substr()); + Pat = GlobPattern::create("a*bc[]]efg*h"); + ASSERT_TRUE((bool)Pat); + EXPECT_EQ("efg", Pat->longest_substr()); + Pat = GlobPattern::create("a*bcde\\fg*h"); ASSERT_TRUE((bool)Pat); EXPECT_EQ("bcde", Pat->longest_substr()); From d5a2930a19e06b4ef4aa0d2137da3a03c2b6cf20 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 16:56:04 -0700 Subject: [PATCH 6/8] spelling Created using spr 1.3.7 --- llvm/lib/Support/GlobPattern.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index ed35463b24074..6a4664ee6ae9c 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -146,7 +146,7 @@ static StringRef maxPlainSubstring(StringRef S) { S = S.drop_front(PrefixSize); // It's impossible, as the first and last characters of the input string - // must be Glob special characters, otherwise the would be parts of + // must be Glob special characters, otherwise they would be parts of // the prefix or the suffix. assert(!S.empty()); @@ -166,7 +166,7 @@ static StringRef maxPlainSubstring(StringRef S) { } case '{': // TODO: implement. - // Fallback to what ever is best for now. + // Fallback to whatever is best for now. return Best; default: S = S.drop_front(1); From 7bd8de4e7dd0decb442ed6ee19885df9b5feae3b Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Oct 2025 16:58:30 -0700 Subject: [PATCH 7/8] nostream Created using spr 1.3.7 --- llvm/lib/Support/GlobPattern.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 6a4664ee6ae9c..2715229c65be1 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/GlobPattern.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; From 67ae39b77599d7646f76f1242338163a5314d65d Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Oct 2025 20:12:25 -0700 Subject: [PATCH 8/8] rebase Created using spr 1.3.7 --- llvm/include/llvm/Support/GlobPattern.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 8b8ac89304e31..8cae6a38d8326 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -87,6 +87,7 @@ class GlobPattern { StringRef Pattern; size_t PrefixSize = 0; size_t SuffixSize = 0; + struct SubGlobPattern { /// \param Pat the pattern to match against LLVM_ABI static Expected create(StringRef Pat);