Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 38 additions & 26 deletions clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,20 @@ struct Messages {
std::string Short, Full;
};

enum class BadOffsetKind { Negative, Overflowing, Indeterminate };

constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
"a negative or overflowing"};
static StringRef asAdjective(BadOffsetKind Problem) {
return Adjectives[static_cast<int>(Problem)];
}

constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of",
"around"};
static StringRef asPreposition(BadOffsetKind Problem) {
return Prepositions[static_cast<int>(Problem)];
}

// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
// instead of `PreStmt` because the current implementation passes the whole
// expression to `CheckerContext::getSVal()` which only works after the
Expand Down Expand Up @@ -388,18 +402,6 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
return SV ? getConcreteValue(*SV) : std::nullopt;
}

static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
const SubRegion *Region, NonLoc Offset) {
std::string RegName = getRegionName(Space, Region), OffsetStr = "";

if (auto ConcreteOffset = getConcreteValue(Offset))
OffsetStr = formatv(" {0}", ConcreteOffset);

return {
formatv("Out of bound access to memory preceding {0}", RegName),
formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
}

/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
/// it can be performed (`Divisor` is nonzero and there is no remainder). The
/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
Expand All @@ -419,10 +421,10 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
return true;
}

static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
const SubRegion *Region, NonLoc Offset,
NonLoc Extent, SVal Location,
bool AlsoMentionUnderflow) {
static Messages getNonTaintMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make ACtx const?

Copy link
Contributor Author

@NagyDonat NagyDonat Sep 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, done in 5b1646f.

By the way this non-const reference was already present in the codebase (since my older message improvement commit added it IIRC last year); here it only appears in the diff because the first approach for this PR deleted it and then the second approach reverted that change.

const SubRegion *Region, NonLoc Offset,
std::optional<NonLoc> Extent, SVal Location,
BadOffsetKind Problem) {
std::string RegName = getRegionName(Space, Region);
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
assert(EReg && "this checker only handles element access");
Expand All @@ -439,15 +441,21 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of ";
if (!ExtentN && !UseByteOffsets)
if (OffsetN && !ExtentN && !UseByteOffsets) {
// If the offset is reported as an index, then the report must mention the
// element type (because it is not always clear from the code). It's more
// natural to mention the element type later where the extent is described,
// but if the extent is unknown/irrelevant, then the element type can be
// inserted into the message at this point.
Out << "'" << ElemType.getAsString() << "' element in ";
}
Out << RegName << " at ";
if (AlsoMentionUnderflow) {
Out << "a negative or overflowing " << OffsetOrIndex;
} else if (OffsetN) {
if (OffsetN) {
if (Problem == BadOffsetKind::Negative)
Out << "negative ";
Out << OffsetOrIndex << " " << *OffsetN;
} else {
Out << "an overflowing " << OffsetOrIndex;
Out << asAdjective(Problem) << " " << OffsetOrIndex;
}
if (ExtentN) {
Out << ", while it holds only ";
Expand All @@ -465,8 +473,7 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
}

return {formatv("Out of bound access to memory {0} {1}",
AlsoMentionUnderflow ? "around" : "after the end of",
RegName),
asPreposition(Problem), RegName),
std::string(Buf)};
}

Expand Down Expand Up @@ -635,7 +642,9 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
} else {
if (!WithinLowerBound) {
// ...and it cannot be valid (>= 0), so report an error.
Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset);
Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg,
ByteOffset, /*Extent=*/std::nullopt,
Location, BadOffsetKind::Negative);
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
return;
}
Expand Down Expand Up @@ -677,9 +686,12 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
return;
}

BadOffsetKind Problem = AlsoMentionUnderflow
? BadOffsetKind::Indeterminate
: BadOffsetKind::Overflowing;
Messages Msgs =
getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset,
*KnownSize, Location, AlsoMentionUnderflow);
getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
*KnownSize, Location, Problem);
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
return;
}
Expand Down
8 changes: 4 additions & 4 deletions clang/test/Analysis/ArrayBound/assumption-reporting.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ int assumingUpper(int arg) {
// expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}}
int b = TenElements[arg - 10];
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
return a + b;
}

Expand Down Expand Up @@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) {
// expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}}
int b = TenElements[(int)arg - 10];
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
return a + b;
}

Expand All @@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) {
int a = TenElements[arg]; // no note here, we already know that 'arg' is in bounds
int b = TenElements[(int)arg - 10];
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
return a + b;
}

Expand Down Expand Up @@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) {
// expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}}
int c = TenElements[arg-2];
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
// expected-note@-2 {{Access of 'TenElements' at a negative index}}
return a + b + c;
}

Expand Down
38 changes: 34 additions & 4 deletions clang/test/Analysis/ArrayBound/verbose-tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,47 @@ int TenElements[10];
void arrayUnderflow(void) {
TenElements[-3] = 5;
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -3}}
}

int underflowWithDeref(void) {
int *p = TenElements;
--p;
return *p;
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -1}}
}

char underflowReportedAsChar(void) {
// Underflow is reported with the type of the accessed element (here 'char'),
// not the type that appears in the declaration of the original array (which
// would be 'int').
return ((char *)TenElements)[-1];
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'char' element in 'TenElements' at negative index -1}}
}

struct TwoInts {
int a, b;
};

struct TwoInts underflowReportedAsStruct(void) {
// Another case where the accessed type is used for reporting the offset.
return *(struct TwoInts*)(TenElements - 4);
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at negative index -2}}
}

struct TwoInts underflowOnlyByteOffset(void) {
// In this case the negative byte offset is not a multiple of the size of the
// accessed element, so the part "= -... * sizeof(type)" is omitted at the
// end of the message.
return *(struct TwoInts*)(TenElements - 3);
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
}


int rng(void);
int getIndex(void) {
switch (rng()) {
Expand All @@ -40,10 +70,10 @@ void gh86959(void) {
while (rng())
TenElements[getIndex()] = 10;
// expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}}
// expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}}
// expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -172}}
}

int scanf(const char *restrict fmt, ...);
int scanf(const char *fmt, ...);

void taintedIndex(void) {
int index;
Expand Down