Skip to content

Commit d46bce9

Browse files
committed
Canonicalize special case list filenames when loading
1 parent 47236f1 commit d46bce9

File tree

11 files changed

+92
-61
lines changed

11 files changed

+92
-61
lines changed

clang/docs/SanitizerSpecialCaseList.rst

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@ tool-specific docs.
174174
# Lines starting with # are ignored.
175175
# Turn off checks for the source file
176176
# Entries without sections are placed into [*] and apply to all sanitizers
177-
# On windows, "/" also matches "\" in filenames
178177
src:path/to/source/file.c
179178
src:*/source/file.c
180179
# Turn off checks for this main file, including files included by it.
@@ -197,6 +196,23 @@ tool-specific docs.
197196
[{cfi-vcall,cfi-icall}]
198197
fun:*BadCfiCall
199198
199+
200+
.. note::
201+
202+
By default, ``src`` and ``mainfile`` are matched against the filename as seen
203+
by LLVM. On Windows, this might involve a mix of forward and backslashes as
204+
file separators, and writing patterns to match both variants can be
205+
inconvenient.
206+
207+
If the special case list file begins with ``#!canonical-paths``, then paths
208+
will be canonicalized before patterns are matched against them. This involves
209+
stripping any leading dots and slashes, and (on Windows only) converting all
210+
backslashes to forward slashes.
211+
212+
If the file uses both ``#!special-case-list-v1`` and ``#!canonical-paths``,
213+
then they should occupy the first two lines, and ``#!canonical-paths`` must
214+
appear on the second line.
215+
200216
``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
201217
does not need plumbing into the build system. This works well for internal
202218
linkage functions but has a caveat for C++ vague linkage functions.

clang/lib/Basic/Diagnostic.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -612,18 +612,24 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId,
612612
SrcEntriesIt->getValue();
613613
// We also use presumed locations here to improve reproducibility for
614614
// preprocessed inputs.
615-
if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid())
616-
return globsMatches(
617-
CategoriesToMatchers,
618-
llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()));
615+
if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid()) {
616+
if (CanonicalizePaths) {
617+
return globsMatches(
618+
CategoriesToMatchers,
619+
llvm::sys::path::convert_to_slash(
620+
llvm::sys::path::remove_leading_dotslash(PLoc.getFilename())));
621+
} else {
622+
return globsMatches(
623+
CategoriesToMatchers,
624+
llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()));
625+
}
626+
}
619627
return false;
620628
}
621629

622630
bool WarningsSpecialCaseList::globsMatches(
623631
const llvm::StringMap<Matcher> &CategoriesToMatchers,
624632
StringRef FilePath) const {
625-
static bool HaveWindowsPathStyle =
626-
llvm::sys::path::is_style_windows(llvm::sys::path::Style::native);
627633
StringRef LongestMatch;
628634
bool LongestIsPositive = false;
629635
for (const auto &Entry : CategoriesToMatchers) {
@@ -633,8 +639,7 @@ bool WarningsSpecialCaseList::globsMatches(
633639
for (const auto &Glob : Matcher.Globs) {
634640
if (Glob->Name.size() < LongestMatch.size())
635641
continue;
636-
if (!Glob->Pattern.match(FilePath,
637-
/*IsSlashAgnostic=*/HaveWindowsPathStyle))
642+
if (!Glob->Pattern.match(FilePath))
638643
continue;
639644
LongestMatch = Glob->Name;
640645
LongestIsPositive = IsPositive;

clang/lib/Basic/SanitizerSpecialCaseList.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() {
4242
SanitizerMask Mask;
4343

4444
#define SANITIZER(NAME, ID) \
45-
if (S.SectionMatcher->match(NAME, /*IsFilename=*/false)) \
45+
if (S.SectionMatcher->match(NAME)) \
4646
Mask |= SanitizerKind::ID;
4747
#define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID)
4848

clang/unittests/Basic/DiagnosticTest.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -362,9 +362,8 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) {
362362
}
363363

364364
#ifdef _WIN32
365-
// We're only slash-agnostic on windows hosts
366-
TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) {
367-
llvm::StringLiteral SuppressionMappingFile = R"(
365+
TEST_F(SuppressionMappingTest, CanonicalizesSlashesOnWindows) {
366+
llvm::StringLiteral SuppressionMappingFile = R"(#!canonical-paths
368367
[unused]
369368
src:*clang/*
370369
src:*clang/lib/Sema/*=emit
@@ -378,14 +377,21 @@ TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) {
378377

379378
EXPECT_TRUE(Diags.isSuppressedViaMapping(
380379
diag::warn_unused_function, locForFile(R"(clang/lib/Basic/bar.h)")));
380+
EXPECT_TRUE(Diags.isSuppressedViaMapping(
381+
diag::warn_unused_function, locForFile(R"(clang/lib/Basic\bar.h)")));
382+
EXPECT_TRUE(Diags.isSuppressedViaMapping(
383+
diag::warn_unused_function, locForFile(R"(clang\lib/Basic/bar.h)")));
384+
EXPECT_FALSE(Diags.isSuppressedViaMapping(
385+
diag::warn_unused_function, locForFile(R"(clang/lib/Sema/baz.h)")));
381386
EXPECT_FALSE(Diags.isSuppressedViaMapping(
382387
diag::warn_unused_function, locForFile(R"(clang/lib/Sema\baz.h)")));
383388

384-
// We require a literal backslash before "Sema"
385-
EXPECT_TRUE(Diags.isSuppressedViaMapping(
389+
// The backslash gets canonicalized so we never match the third pattern
390+
EXPECT_FALSE(Diags.isSuppressedViaMapping(
386391
diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)")));
387392
EXPECT_FALSE(Diags.isSuppressedViaMapping(
388393
diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)")));
389394
}
390395
#endif
396+
391397
} // namespace

llvm/docs/ReleaseNotes.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,10 @@ Changes to BOLT
174174
Changes to Sanitizers
175175
---------------------
176176

177-
* On windows hosts, the [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format)
178-
now treats forward slashes in filenames as matching either a forward or a
179-
backslash, to accommodate paths with mixed unix and windows styles.
177+
* (Sanitizer Special Case Lists)[https://clang.llvm.org/docs/SanitizerSpecialCaseList.html]
178+
may now be prefixed with ``#!canonical-paths`` to specify that filename patterns
179+
should be matched against canonicalized paths, without leading dots or slashes
180+
and (on Windows only) without any backslashes.
180181

181182
Other Changes
182183
-------------

llvm/include/llvm/Support/GlobPattern.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ namespace llvm {
3636
/// brace expansions are not supported and characters `{,}` are treated as
3737
/// literals.
3838
/// * `\` escapes the next character so it is treated as a literal.
39-
/// * If \p IsSlashAgnostic is passed to the match function, then forward
40-
/// slashes `/` also match backslashes `\`.
4139
///
4240
/// Some known edge cases are:
4341
/// * The literal `]` is allowed as the first character in a character class,
@@ -59,9 +57,8 @@ class GlobPattern {
5957
/// brace expansion
6058
LLVM_ABI static Expected<GlobPattern>
6159
create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
62-
/// \param IsSlashAgnostic whether to treat '/' as also matching '\'
6360
/// \returns \p true if \p S matches this glob pattern
64-
LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic = false) const;
61+
LLVM_ABI bool match(StringRef S) const;
6562

6663
// Returns true for glob pattern "*". Can be used to avoid expensive
6764
// preparation/acquisition of the input for match().
@@ -79,9 +76,8 @@ class GlobPattern {
7976
struct SubGlobPattern {
8077
/// \param Pat the pattern to match against
8178
LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
82-
/// \param IsSlashAgnostic whether to treat '/' as also matching '\'
8379
/// \returns \p true if \p S matches this glob pattern
84-
LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic) const;
80+
LLVM_ABI bool match(StringRef S) const;
8581
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
8682

8783
// Brackets with their end position and matched bytes.

llvm/include/llvm/Support/SpecialCaseList.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,10 @@ class SpecialCaseList {
122122
class Matcher {
123123
public:
124124
LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
125-
bool UseRegex);
125+
bool UseGlobs);
126126
// Returns the line number in the source file that this query matches to.
127-
// On windows, treat '/' as also matching '\' in filenames when using globs.
128-
// Returns zero if no match is found
129-
LLVM_ABI unsigned match(StringRef Query, bool IsFilename) const;
127+
// Returns zero if no match is found.
128+
LLVM_ABI unsigned match(StringRef Query) const;
130129

131130
struct Glob {
132131
std::string Name;
@@ -155,6 +154,7 @@ class SpecialCaseList {
155154
};
156155

157156
std::vector<Section> Sections;
157+
bool CanonicalizePaths = false;
158158

159159
LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
160160
unsigned FileIdx, unsigned LineNo,

llvm/lib/Support/GlobPattern.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
137137
GlobPattern Pat;
138138

139139
// Store the prefix that does not contain any metacharacter.
140-
size_t PrefixSize = S.find_first_of("?*[{\\/");
140+
size_t PrefixSize = S.find_first_of("?*[{\\");
141141
Pat.Prefix = S.substr(0, PrefixSize);
142142
if (PrefixSize == std::string::npos)
143143
return Pat;
@@ -190,22 +190,21 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
190190
return Pat;
191191
}
192192

193-
bool GlobPattern::match(StringRef S, bool IsSlashAgnostic) const {
193+
bool GlobPattern::match(StringRef S) const {
194194
if (!S.consume_front(Prefix))
195195
return false;
196196
if (SubGlobs.empty() && S.empty())
197197
return true;
198198
for (auto &Glob : SubGlobs)
199-
if (Glob.match(S, IsSlashAgnostic))
199+
if (Glob.match(S))
200200
return true;
201201
return false;
202202
}
203203

204204
// Factor the pattern into segments split by '*'. The segment is matched
205-
// sequentially by finding the first occurrence past the end of the previous
205+
// sequentianlly by finding the first occurrence past the end of the previous
206206
// match.
207-
bool GlobPattern::SubGlobPattern::match(StringRef Str,
208-
bool IsSlashAgnostic) const {
207+
bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
209208
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
210209
*SavedS = S;
211210
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -232,10 +231,6 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str,
232231
++S;
233232
continue;
234233
}
235-
} else if (IsSlashAgnostic && *P == '/' && *S == '\\') {
236-
++P;
237-
++S;
238-
continue;
239234
} else if (*P == *S || *P == '?') {
240235
++P;
241236
++S;

llvm/lib/Support/SpecialCaseList.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "llvm/ADT/STLExtras.h"
1818
#include "llvm/Support/LineIterator.h"
1919
#include "llvm/Support/MemoryBuffer.h"
20-
#include "llvm/Support/Path.h"
2120
#include "llvm/Support/VirtualFileSystem.h"
2221
#include <stdio.h>
2322
#include <string>
@@ -67,13 +66,9 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
6766
return Error::success();
6867
}
6968

70-
unsigned SpecialCaseList::Matcher::match(StringRef Query,
71-
bool IsFilename) const {
72-
static bool HaveWindowsPathStyle =
73-
llvm::sys::path::is_style_windows(llvm::sys::path::Style::native);
69+
unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
7470
for (const auto &Glob : reverse(Globs))
75-
if (Glob->Pattern.match(
76-
Query, /*IsSlashAgnostic=*/(HaveWindowsPathStyle && IsFilename)))
71+
if (Glob->Pattern.match(Query))
7772
return Glob->LineNo;
7873
for (const auto &[Regex, LineNumber] : reverse(RegExes))
7974
if (Regex->match(Query))
@@ -158,12 +153,17 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
158153
return false;
159154
}
160155

156+
// Scan the start of the file for special comments. These don't appear when
157+
// iterating below because comment lines are automatically skipped.
158+
StringRef Buffer = MB->getBuffer();
161159
// In https://reviews.llvm.org/D154014 we added glob support and planned to
162160
// remove regex support in patterns. We temporarily support the original
163-
// behavior using regexes if "#!special-case-list-v1" is the first line of the
164-
// file. For more details, see
161+
// behavior using regexes if "#!special-case-list-v1" is the first line of
162+
// the file. For more details, see
165163
// https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
166-
bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n");
164+
bool UseGlobs = !Buffer.consume_front("#!special-case-list-v1\n");
165+
// Specifies that patterns should be matched against canonicalized filepaths.
166+
CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n");
167167

168168
for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
169169
!LineIt.is_at_eof(); LineIt++) {
@@ -223,8 +223,7 @@ std::pair<unsigned, unsigned>
223223
SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
224224
StringRef Query, StringRef Category) const {
225225
for (const auto &S : reverse(Sections)) {
226-
bool IsFilename = Prefix == "src" || Prefix == "mainfile";
227-
if (S.SectionMatcher->match(Section, IsFilename)) {
226+
if (S.SectionMatcher->match(Section)) {
228227
unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
229228
if (Blame)
230229
return {S.FileIdx, Blame};
@@ -243,8 +242,12 @@ unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
243242
if (II == I->second.end())
244243
return 0;
245244

246-
bool IsFilename = Prefix == "src" || Prefix == "mainfile";
247-
return II->getValue().match(Query, IsFilename);
245+
if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) {
246+
return II->getValue().match(llvm::sys::path::convert_to_slash(
247+
llvm::sys::path::remove_leading_dotslash(Query)));
248+
} else {
249+
return II->getValue().match(Query);
250+
}
248251
}
249252

250253
} // namespace llvm

llvm/unittests/Support/GlobPatternTest.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -271,13 +271,4 @@ TEST_F(GlobPatternTest, Pathological) {
271271
EXPECT_FALSE(Pat->match(S));
272272
EXPECT_TRUE(Pat->match(S + 'b'));
273273
}
274-
275-
TEST_F(GlobPatternTest, SlashAgnostic) {
276-
auto Pat = GlobPattern::create("clang/*");
277-
ASSERT_TRUE((bool)Pat);
278-
EXPECT_TRUE(Pat->match("clang/foo"));
279-
EXPECT_FALSE(Pat->match(R"(clang\foo)"));
280-
EXPECT_TRUE(Pat->match("clang/foo", /*isSlashAgnostic=*/true));
281-
EXPECT_TRUE(Pat->match(R"(clang\foo)", /*isSlashAgnostic=*/true));
282-
}
283274
}

0 commit comments

Comments
 (0)