Skip to content

Commit f109c4b

Browse files
vitalybukagithub-actions[bot]
authored andcommitted
Automerge: [NFC][SpecialCaseList] Split Matcher into RegexMatcher and GlobMatcher (#162303)
Glob will be optimized Regex we we will keep intact. Using std::variant to avoid virtual methods, and allow to switch unique_ptr to move in future.
2 parents 4c6ba8a + a2723dd commit f109c4b

File tree

2 files changed

+113
-64
lines changed

2 files changed

+113
-64
lines changed

llvm/include/llvm/Support/SpecialCaseList.h

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <memory>
2121
#include <string>
2222
#include <utility>
23+
#include <variant>
2324
#include <vector>
2425

2526
namespace llvm {
@@ -120,20 +121,33 @@ class SpecialCaseList {
120121
SpecialCaseList &operator=(SpecialCaseList const &) = delete;
121122

122123
private:
123-
/// Represents a set of globs and their line numbers
124-
class Matcher {
124+
// Lagacy v1 matcher.
125+
class RegexMatcher {
125126
public:
126-
LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
127-
bool UseRegex);
127+
LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
128128
LLVM_ABI void
129129
match(StringRef Query,
130130
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
131131

132-
LLVM_ABI bool matchAny(StringRef Query) const {
133-
bool R = false;
134-
match(Query, [&](StringRef, unsigned) { R = true; });
135-
return R;
136-
}
132+
struct Reg {
133+
Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
134+
: Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
135+
std::string Name;
136+
unsigned LineNo;
137+
Regex Rg;
138+
Reg(Reg &&) = delete;
139+
Reg() = default;
140+
};
141+
142+
std::vector<std::unique_ptr<Reg>> RegExes;
143+
};
144+
145+
class GlobMatcher {
146+
public:
147+
LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
148+
LLVM_ABI void
149+
match(StringRef Query,
150+
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
137151

138152
struct Glob {
139153
Glob(StringRef Name, unsigned LineNo) : Name(Name), LineNo(LineNo) {}
@@ -146,27 +160,37 @@ class SpecialCaseList {
146160
Glob() = default;
147161
};
148162

149-
struct Reg {
150-
Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
151-
: Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
152-
std::string Name;
153-
unsigned LineNo;
154-
Regex Rg;
155-
Reg(Reg &&) = delete;
156-
Reg() = default;
157-
};
163+
std::vector<std::unique_ptr<Glob>> Globs;
164+
};
158165

159-
std::vector<std::unique_ptr<Matcher::Glob>> Globs;
160-
std::vector<std::unique_ptr<Reg>> RegExes;
161-
bool RemoveDotSlash = false;
166+
/// Represents a set of patterns and their line numbers
167+
class Matcher {
168+
public:
169+
LLVM_ABI Matcher(bool UseGlobs, bool RemoveDotSlash);
170+
171+
LLVM_ABI void
172+
match(StringRef Query,
173+
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
174+
175+
LLVM_ABI bool matchAny(StringRef Query) const {
176+
bool R = false;
177+
match(Query, [&](StringRef, unsigned) { R = true; });
178+
return R;
179+
}
180+
181+
LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
182+
183+
std::variant<RegexMatcher, GlobMatcher> M;
184+
bool RemoveDotSlash;
162185
};
163186

164187
using SectionEntries = StringMap<StringMap<Matcher>>;
165188

166189
protected:
167190
struct Section {
168-
Section(StringRef Str, unsigned FileIdx)
169-
: SectionStr(Str), FileIdx(FileIdx) {};
191+
Section(StringRef Str, unsigned FileIdx, bool UseGlobs)
192+
: SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false), SectionStr(Str),
193+
FileIdx(FileIdx) {}
170194

171195
Section(Section &&) = default;
172196

@@ -197,7 +221,7 @@ class SpecialCaseList {
197221

198222
LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
199223
unsigned FileIdx, unsigned LineNo,
200-
bool UseGlobs = true);
224+
bool UseGlobs);
201225

202226
/// Parses just-constructed SpecialCaseList entries from a memory buffer.
203227
LLVM_ABI bool parse(unsigned FileIdx, const MemoryBuffer *MB,

llvm/lib/Support/SpecialCaseList.cpp

Lines changed: 65 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -30,57 +30,82 @@
3030

3131
namespace llvm {
3232

33-
Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
34-
bool UseGlobs) {
33+
Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
34+
unsigned LineNumber) {
3535
if (Pattern.empty())
3636
return createStringError(errc::invalid_argument,
37-
Twine("Supplied ") +
38-
(UseGlobs ? "glob" : "regex") + " was blank");
39-
40-
if (!UseGlobs) {
41-
// Replace * with .*
42-
auto Regexp = Pattern.str();
43-
for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
44-
pos += strlen(".*")) {
45-
Regexp.replace(pos, strlen("*"), ".*");
46-
}
37+
"Supplied regex was blank");
38+
39+
// Replace * with .*
40+
auto Regexp = Pattern.str();
41+
for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
42+
pos += strlen(".*")) {
43+
Regexp.replace(pos, strlen("*"), ".*");
44+
}
4745

48-
Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
46+
Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
4947

50-
// Check that the regexp is valid.
51-
Regex CheckRE(Regexp);
52-
std::string REError;
53-
if (!CheckRE.isValid(REError))
54-
return createStringError(errc::invalid_argument, REError);
48+
// Check that the regexp is valid.
49+
Regex CheckRE(Regexp);
50+
std::string REError;
51+
if (!CheckRE.isValid(REError))
52+
return createStringError(errc::invalid_argument, REError);
5553

56-
auto Rg =
57-
std::make_unique<Matcher::Reg>(Pattern, LineNumber, std::move(CheckRE));
58-
RegExes.emplace_back(std::move(Rg));
54+
auto Rg = std::make_unique<Reg>(Pattern, LineNumber, std::move(CheckRE));
55+
RegExes.emplace_back(std::move(Rg));
5956

60-
return Error::success();
61-
}
57+
return Error::success();
58+
}
59+
60+
void SpecialCaseList::RegexMatcher::match(
61+
StringRef Query,
62+
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
63+
for (const auto &Regex : reverse(RegExes))
64+
if (Regex->Rg.match(Query))
65+
Cb(Regex->Name, Regex->LineNo);
66+
}
67+
68+
Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
69+
unsigned LineNumber) {
70+
if (Pattern.empty())
71+
return createStringError(errc::invalid_argument, "Supplied glob was blank");
6272

63-
auto Glob = std::make_unique<Matcher::Glob>(Pattern, LineNumber);
73+
auto G = std::make_unique<Glob>(Pattern, LineNumber);
6474
// We must be sure to use the string in `Glob` rather than the provided
6575
// reference which could be destroyed before match() is called
66-
if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024)
67-
.moveInto(Glob->Pattern))
76+
if (auto Err = GlobPattern::create(G->Name, /*MaxSubPatterns=*/1024)
77+
.moveInto(G->Pattern))
6878
return Err;
69-
Globs.push_back(std::move(Glob));
79+
Globs.emplace_back(std::move(G));
7080
return Error::success();
7181
}
7282

73-
void SpecialCaseList::Matcher::match(
83+
void SpecialCaseList::GlobMatcher::match(
7484
StringRef Query,
7585
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
76-
if (RemoveDotSlash)
77-
Query = llvm::sys::path::remove_leading_dotslash(Query);
7886
for (const auto &Glob : reverse(Globs))
7987
if (Glob->Pattern.match(Query))
8088
Cb(Glob->Name, Glob->LineNo);
81-
for (const auto &Regex : reverse(RegExes))
82-
if (Regex->Rg.match(Query))
83-
Cb(Regex->Name, Regex->LineNo);
89+
}
90+
91+
SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
92+
: RemoveDotSlash(RemoveDotSlash) {
93+
if (UseGlobs)
94+
M.emplace<GlobMatcher>();
95+
else
96+
M.emplace<RegexMatcher>();
97+
}
98+
99+
void SpecialCaseList::Matcher::match(
100+
StringRef Query,
101+
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
102+
if (RemoveDotSlash)
103+
Query = llvm::sys::path::remove_leading_dotslash(Query);
104+
return std::visit([&](auto &V) { return V.match(Query, Cb); }, M);
105+
}
106+
107+
Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
108+
return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
84109
}
85110

86111
// TODO: Refactor this to return Expected<...>
@@ -139,10 +164,10 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
139164
Expected<SpecialCaseList::Section *>
140165
SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
141166
unsigned LineNo, bool UseGlobs) {
142-
Sections.emplace_back(SectionStr, FileNo);
167+
Sections.emplace_back(SectionStr, FileNo, UseGlobs);
143168
auto &Section = Sections.back();
144169

145-
if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo, UseGlobs)) {
170+
if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) {
146171
return createStringError(errc::invalid_argument,
147172
"malformed section at line " + Twine(LineNo) +
148173
": '" + SectionStr +
@@ -170,7 +195,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
170195
bool RemoveDotSlash = Version > 2;
171196

172197
Section *CurrentSection;
173-
if (auto Err = addSection("*", FileIdx, 1).moveInto(CurrentSection)) {
198+
if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) {
174199
Error = toString(std::move(Err));
175200
return false;
176201
}
@@ -213,10 +238,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
213238
}
214239

215240
auto [Pattern, Category] = Postfix.split("=");
216-
auto &Entry = CurrentSection->Entries[Prefix][Category];
217-
Entry.RemoveDotSlash =
218-
RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix);
219-
if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
241+
auto [It, _] = CurrentSection->Entries[Prefix].try_emplace(
242+
Category, UseGlobs,
243+
RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix));
244+
if (auto Err = It->second.insert(Pattern, LineNo)) {
220245
Error =
221246
(Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
222247
Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))

0 commit comments

Comments
 (0)