Skip to content

Commit 5cbaf55

Browse files
authored
[analyzer] Show element count in ArrayBound underflow reports (#158639)
The underflow reports of checker security.ArrayBound previously displayed the (negative) byte offset of the accessed location; but those numbers were a bit hard to decipher (especially when the elements were large structs), so this commit replaces the byte offset with an index value (if the offset is an integer multiple of the size of the accessed element). This change only affects the content of the messages; the checker will find the same issues before and after this commit.
1 parent 385c9f5 commit 5cbaf55

File tree

3 files changed

+77
-34
lines changed

3 files changed

+77
-34
lines changed

clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,20 @@ struct Messages {
130130
std::string Short, Full;
131131
};
132132

133+
enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
134+
135+
constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
136+
"a negative or overflowing"};
137+
static StringRef asAdjective(BadOffsetKind Problem) {
138+
return Adjectives[static_cast<int>(Problem)];
139+
}
140+
141+
constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of",
142+
"around"};
143+
static StringRef asPreposition(BadOffsetKind Problem) {
144+
return Prepositions[static_cast<int>(Problem)];
145+
}
146+
133147
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
134148
// instead of `PreStmt` because the current implementation passes the whole
135149
// expression to `CheckerContext::getSVal()` which only works after the
@@ -388,18 +402,6 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
388402
return SV ? getConcreteValue(*SV) : std::nullopt;
389403
}
390404

391-
static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
392-
const SubRegion *Region, NonLoc Offset) {
393-
std::string RegName = getRegionName(Space, Region), OffsetStr = "";
394-
395-
if (auto ConcreteOffset = getConcreteValue(Offset))
396-
OffsetStr = formatv(" {0}", ConcreteOffset);
397-
398-
return {
399-
formatv("Out of bound access to memory preceding {0}", RegName),
400-
formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
401-
}
402-
403405
/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
404406
/// it can be performed (`Divisor` is nonzero and there is no remainder). The
405407
/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
@@ -419,10 +421,11 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
419421
return true;
420422
}
421423

422-
static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
423-
const SubRegion *Region, NonLoc Offset,
424-
NonLoc Extent, SVal Location,
425-
bool AlsoMentionUnderflow) {
424+
static Messages getNonTaintMsgs(const ASTContext &ACtx,
425+
const MemSpaceRegion *Space,
426+
const SubRegion *Region, NonLoc Offset,
427+
std::optional<NonLoc> Extent, SVal Location,
428+
BadOffsetKind Problem) {
426429
std::string RegName = getRegionName(Space, Region);
427430
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
428431
assert(EReg && "this checker only handles element access");
@@ -439,15 +442,21 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
439442
SmallString<256> Buf;
440443
llvm::raw_svector_ostream Out(Buf);
441444
Out << "Access of ";
442-
if (!ExtentN && !UseByteOffsets)
445+
if (OffsetN && !ExtentN && !UseByteOffsets) {
446+
// If the offset is reported as an index, then the report must mention the
447+
// element type (because it is not always clear from the code). It's more
448+
// natural to mention the element type later where the extent is described,
449+
// but if the extent is unknown/irrelevant, then the element type can be
450+
// inserted into the message at this point.
443451
Out << "'" << ElemType.getAsString() << "' element in ";
452+
}
444453
Out << RegName << " at ";
445-
if (AlsoMentionUnderflow) {
446-
Out << "a negative or overflowing " << OffsetOrIndex;
447-
} else if (OffsetN) {
454+
if (OffsetN) {
455+
if (Problem == BadOffsetKind::Negative)
456+
Out << "negative ";
448457
Out << OffsetOrIndex << " " << *OffsetN;
449458
} else {
450-
Out << "an overflowing " << OffsetOrIndex;
459+
Out << asAdjective(Problem) << " " << OffsetOrIndex;
451460
}
452461
if (ExtentN) {
453462
Out << ", while it holds only ";
@@ -465,8 +474,7 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
465474
}
466475

467476
return {formatv("Out of bound access to memory {0} {1}",
468-
AlsoMentionUnderflow ? "around" : "after the end of",
469-
RegName),
477+
asPreposition(Problem), RegName),
470478
std::string(Buf)};
471479
}
472480

@@ -635,7 +643,9 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
635643
} else {
636644
if (!WithinLowerBound) {
637645
// ...and it cannot be valid (>= 0), so report an error.
638-
Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset);
646+
Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg,
647+
ByteOffset, /*Extent=*/std::nullopt,
648+
Location, BadOffsetKind::Negative);
639649
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
640650
return;
641651
}
@@ -677,9 +687,12 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
677687
return;
678688
}
679689

690+
BadOffsetKind Problem = AlsoMentionUnderflow
691+
? BadOffsetKind::Indeterminate
692+
: BadOffsetKind::Overflowing;
680693
Messages Msgs =
681-
getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset,
682-
*KnownSize, Location, AlsoMentionUnderflow);
694+
getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
695+
*KnownSize, Location, Problem);
683696
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
684697
return;
685698
}

clang/test/Analysis/ArrayBound/assumption-reporting.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ int assumingUpper(int arg) {
7070
// expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}}
7171
int b = TenElements[arg - 10];
7272
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
73-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
73+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
7474
return a + b;
7575
}
7676

@@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) {
9999
// expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}}
100100
int b = TenElements[(int)arg - 10];
101101
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
102-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
102+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
103103
return a + b;
104104
}
105105

@@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) {
111111
int a = TenElements[arg]; // no note here, we already know that 'arg' is in bounds
112112
int b = TenElements[(int)arg - 10];
113113
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
114-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
114+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
115115
return a + b;
116116
}
117117

@@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) {
145145
// expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}}
146146
int c = TenElements[arg-2];
147147
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
148-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
148+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
149149
return a + b + c;
150150
}
151151

clang/test/Analysis/ArrayBound/verbose-tests.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,47 @@ int TenElements[10];
1111
void arrayUnderflow(void) {
1212
TenElements[-3] = 5;
1313
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
14-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
14+
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -3}}
1515
}
1616

1717
int underflowWithDeref(void) {
1818
int *p = TenElements;
1919
--p;
2020
return *p;
2121
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
22-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
22+
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -1}}
23+
}
24+
25+
char underflowReportedAsChar(void) {
26+
// Underflow is reported with the type of the accessed element (here 'char'),
27+
// not the type that appears in the declaration of the original array (which
28+
// would be 'int').
29+
return ((char *)TenElements)[-1];
30+
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
31+
// expected-note@-2 {{Access of 'char' element in 'TenElements' at negative index -1}}
2332
}
2433

34+
struct TwoInts {
35+
int a, b;
36+
};
37+
38+
struct TwoInts underflowReportedAsStruct(void) {
39+
// Another case where the accessed type is used for reporting the offset.
40+
return *(struct TwoInts*)(TenElements - 4);
41+
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
42+
// expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at negative index -2}}
43+
}
44+
45+
struct TwoInts underflowOnlyByteOffset(void) {
46+
// In this case the negative byte offset is not a multiple of the size of the
47+
// accessed element, so the part "= -... * sizeof(type)" is omitted at the
48+
// end of the message.
49+
return *(struct TwoInts*)(TenElements - 3);
50+
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
51+
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
52+
}
53+
54+
2555
int rng(void);
2656
int getIndex(void) {
2757
switch (rng()) {
@@ -40,10 +70,10 @@ void gh86959(void) {
4070
while (rng())
4171
TenElements[getIndex()] = 10;
4272
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
43-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}}
73+
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -172}}
4474
}
4575

46-
int scanf(const char *restrict fmt, ...);
76+
int scanf(const char *fmt, ...);
4777

4878
void taintedIndex(void) {
4979
int index;

0 commit comments

Comments
 (0)