Skip to content

Commit 78be939

Browse files
committed
Reimplement with different approach
This unifies the code of `getPrecedesMsg` and `getExceedsMsg` and avoids the `-count * sizeof(type)` multiplications that would evaluate to nonsense due to C type conversion footguns.
1 parent db0723c commit 78be939

File tree

3 files changed

+51
-57
lines changed

3 files changed

+51
-57
lines changed

clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp

Lines changed: 37 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,18 @@ struct Messages {
130130
std::string Short, Full;
131131
};
132132

133+
enum class BadOffsetKind {Negative, Overflowing, Indeterminate};
134+
135+
constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing", "a negative or overflowing"};
136+
StringRef asAdjective(BadOffsetKind Problem) {
137+
return Adjectives[static_cast<int>(Problem)];
138+
}
139+
140+
constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of", "around"};
141+
StringRef asPreposition(BadOffsetKind Problem) {
142+
return Prepositions[static_cast<int>(Problem)];
143+
}
144+
133145
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
134146
// instead of `PreStmt` because the current implementation passes the whole
135147
// expression to `CheckerContext::getSVal()` which only works after the
@@ -388,29 +400,6 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
388400
return SV ? getConcreteValue(*SV) : std::nullopt;
389401
}
390402

391-
static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
392-
const SubRegion *Region, NonLoc Offset,
393-
QualType ElemType, int64_t ElemSize) {
394-
std::string RegName = getRegionName(Space, Region);
395-
396-
std::string OffsetStr = "", ElemInfoStr = "";
397-
if (std::optional<int64_t> ConcreteOffset = getConcreteValue(Offset)) {
398-
OffsetStr = formatv(" {0}", ConcreteOffset);
399-
if (*ConcreteOffset % ElemSize == 0) {
400-
int64_t Count = *ConcreteOffset / ElemSize;
401-
if (Count != -1)
402-
ElemInfoStr =
403-
formatv(" = {0} * sizeof({1})", Count, ElemType.getAsString());
404-
else
405-
ElemInfoStr = formatv(" = -sizeof({0})", ElemType.getAsString());
406-
}
407-
}
408-
409-
return {formatv("Out of bound access to memory preceding {0}", RegName),
410-
formatv("Access of {0} at negative byte offset{1}{2}", RegName,
411-
OffsetStr, ElemInfoStr)};
412-
}
413-
414403
/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
415404
/// it can be performed (`Divisor` is nonzero and there is no remainder). The
416405
/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
@@ -430,30 +419,41 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
430419
return true;
431420
}
432421

433-
static Messages getExceedsMsgs(const MemSpaceRegion *Space,
422+
static Messages getNonTaintMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
434423
const SubRegion *Region, NonLoc Offset,
435-
NonLoc Extent, bool AlsoMentionUnderflow,
436-
QualType ElemType, int64_t ElemSize) {
424+
std::optional<NonLoc> Extent, SVal Location,
425+
BadOffsetKind Problem) {
437426
std::string RegName = getRegionName(Space, Region);
427+
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
428+
assert(EReg && "this checker only handles element access");
429+
QualType ElemType = EReg->getElementType();
438430

439431
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
440432
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
441433

434+
int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
435+
442436
bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
443437
const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
444438

445439
SmallString<256> Buf;
446440
llvm::raw_svector_ostream Out(Buf);
447441
Out << "Access of ";
448-
if (!ExtentN && !UseByteOffsets)
442+
if (OffsetN && !ExtentN && !UseByteOffsets) {
443+
// If the offset is reported as an index, then the report must mention the
444+
// element type (because it is not always clear from the code). It's more
445+
// natural to mention the element type later where the extent is described,
446+
// but if the extent is unknown/irrelevant, then the element type can be
447+
// inserted into the message at this point.
449448
Out << "'" << ElemType.getAsString() << "' element in ";
449+
}
450450
Out << RegName << " at ";
451-
if (AlsoMentionUnderflow) {
452-
Out << "a negative or overflowing " << OffsetOrIndex;
453-
} else if (OffsetN) {
451+
if (OffsetN) {
452+
if (Problem == BadOffsetKind::Negative)
453+
Out << "negative ";
454454
Out << OffsetOrIndex << " " << *OffsetN;
455455
} else {
456-
Out << "an overflowing " << OffsetOrIndex;
456+
Out << asAdjective(Problem) << " " << OffsetOrIndex;
457457
}
458458
if (ExtentN) {
459459
Out << ", while it holds only ";
@@ -471,7 +471,7 @@ static Messages getExceedsMsgs(const MemSpaceRegion *Space,
471471
}
472472

473473
return {formatv("Out of bound access to memory {0} {1}",
474-
AlsoMentionUnderflow ? "around" : "after the end of",
474+
asPreposition(Problem),
475475
RegName),
476476
std::string(Buf)};
477477
}
@@ -591,13 +591,6 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
591591
if (!RawOffset)
592592
return;
593593

594-
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
595-
assert(EReg && "this checker only handles element access");
596-
QualType ElemType = EReg->getElementType();
597-
598-
int64_t ElemSize =
599-
C.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
600-
601594
auto [Reg, ByteOffset] = *RawOffset;
602595

603596
// The state updates will be reported as a single note tag, which will be
@@ -648,8 +641,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
648641
} else {
649642
if (!WithinLowerBound) {
650643
// ...and it cannot be valid (>= 0), so report an error.
651-
Messages Msgs =
652-
getPrecedesMsgs(Space, Reg, ByteOffset, ElemType, ElemSize);
644+
Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg,
645+
ByteOffset, /*Extent=*/std::nullopt, Location, BadOffsetKind::Negative);
653646
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
654647
return;
655648
}
@@ -691,9 +684,10 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
691684
return;
692685
}
693686

687+
BadOffsetKind Problem = AlsoMentionUnderflow ? BadOffsetKind::Indeterminate : BadOffsetKind::Overflowing;
694688
Messages Msgs =
695-
getExceedsMsgs(Space, Reg, ByteOffset, *KnownSize,
696-
AlsoMentionUnderflow, ElemType, ElemSize);
689+
getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
690+
*KnownSize, Location, Problem);
697691
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
698692
return;
699693
}

clang/test/Analysis/ArrayBound/assumption-reporting.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ int assumingUpper(int arg) {
7070
// expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}}
7171
int b = TenElements[arg - 10];
7272
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
73-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
73+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
7474
return a + b;
7575
}
7676

@@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) {
9999
// expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}}
100100
int b = TenElements[(int)arg - 10];
101101
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
102-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
102+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
103103
return a + b;
104104
}
105105

@@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) {
111111
int a = TenElements[arg]; // no note here, we already know that 'arg' is in bounds
112112
int b = TenElements[(int)arg - 10];
113113
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
114-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
114+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
115115
return a + b;
116116
}
117117

@@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) {
145145
// expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}}
146146
int c = TenElements[arg-2];
147147
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
148-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
148+
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
149149
return a + b + c;
150150
}
151151

clang/test/Analysis/ArrayBound/verbose-tests.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,24 @@ int TenElements[10];
1111
void arrayUnderflow(void) {
1212
TenElements[-3] = 5;
1313
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
14-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -12 = -3 * sizeof(int)}}
14+
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -3}}
1515
}
1616

1717
int underflowWithDeref(void) {
1818
int *p = TenElements;
1919
--p;
2020
return *p;
2121
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
22-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -4 = -sizeof(int)}}
22+
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -1}}
2323
}
2424

2525
char underflowReportedAsChar(void) {
26-
// The "= -... * sizeof(type)" part uses the type of the accessed element
27-
// (here 'char'), not the type that appears in the declaration of the
28-
// original array (which would be 'int').
26+
// Underflow is reported with the type of the accessed element (here 'char'),
27+
// not the type that appears in the declaration of the original array (which
28+
// would be 'int').
2929
return ((char *)TenElements)[-1];
3030
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
31-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -1 = -sizeof(char)}}
31+
// expected-note@-2 {{Access of 'char' element in 'TenElements' at negative index -1}}
3232
}
3333

3434
struct TwoInts {
@@ -39,7 +39,7 @@ struct TwoInts underflowReportedAsStruct(void) {
3939
// Another case where the accessed type is used for reporting the offset.
4040
return *(struct TwoInts*)(TenElements - 4);
4141
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
42-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -16 = -2 * sizeof(struct TwoInts)}}
42+
// expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at negative index -2}}
4343
}
4444

4545
struct TwoInts underflowOnlyByteOffset(void) {
@@ -48,7 +48,7 @@ struct TwoInts underflowOnlyByteOffset(void) {
4848
// end of the message.
4949
return *(struct TwoInts*)(TenElements - 3);
5050
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
51-
// expected-note-re@-2 {{Access of 'TenElements' at negative byte offset -12{{$}}}}
51+
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
5252
}
5353

5454

@@ -70,10 +70,10 @@ void gh86959(void) {
7070
while (rng())
7171
TenElements[getIndex()] = 10;
7272
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
73-
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -688 = -172 * sizeof(int)}}
73+
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -172}}
7474
}
7575

76-
int scanf(const char *restrict fmt, ...);
76+
int scanf(const char *fmt, ...);
7777

7878
void taintedIndex(void) {
7979
int index;

0 commit comments

Comments
 (0)