diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index a1f5bdfb9edf1..5f60b0ee517bb 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -915,9 +915,18 @@ def decodeValueOfObjCType : Checker<"decodeValueOfObjCType">, let ParentPackage = Security in { - def ArrayBoundChecker : Checker<"ArrayBound">, - HelpText<"Warn about out of bounds access to memory">, - Documentation; + def ArrayBoundChecker + : Checker<"ArrayBound">, + HelpText<"Warn about out of bounds access to memory">, + CheckerOptions<[ + CmdLineOption, + ]>, + Documentation; def FloatLoopCounter : Checker<"FloatLoopCounter">, diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index 6ad5acd4e76f2..4ea5273127577 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -103,27 +103,6 @@ class StateUpdateReporter { private: std::string getMessage(PathSensitiveBugReport &BR) const; - - /// Return true if information about the value of `Sym` can put constraints - /// on some symbol which is interesting within the bug report `BR`. - /// In particular, this returns true when `Sym` is interesting within `BR`; - /// but it also returns true if `Sym` is an expression that contains integer - /// constants and a single symbolic operand which is interesting (in `BR`). - /// We need to use this instead of plain `BR.isInteresting()` because if we - /// are analyzing code like - /// int array[10]; - /// int f(int arg) { - /// return array[arg] && array[arg + 10]; - /// } - /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not - /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough - /// to detect this out of bounds access). - static bool providesInformationAboutInteresting(SymbolRef Sym, - PathSensitiveBugReport &BR); - static bool providesInformationAboutInteresting(SVal SV, - PathSensitiveBugReport &BR) { - return providesInformationAboutInteresting(SV.getAsSymbol(), BR); - } }; struct Messages { @@ -157,12 +136,27 @@ class ArrayBoundChecker : public Checker, BugType BT{this, "Out-of-bound access"}; BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; + enum class ConstantArrayIndexResult { + Done, //< Could model the index access based on its type + Unknown //< Could not model the array access based on its type + }; + + // `ConstantArrayType`s have a constant size, so use it to check the access. + ConstantArrayIndexResult + performCheckArrayTypeIndex(const ArraySubscriptExpr *E, + CheckerContext &C) const; + void performCheck(const Expr *E, CheckerContext &C) const; void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs, NonLoc Offset, std::optional Extent, bool IsTaintBug = false) const; + void warnFlexibleArrayAccess(CheckerContext &C, ProgramStateRef State, + const ArraySubscriptExpr *E, StringRef Name, + NonLoc Index, nonloc::ConcreteInt ArraySize, + QualType ArrayType) const; + static void markPartsInteresting(PathSensitiveBugReport &BR, ProgramStateRef ErrorState, NonLoc Val, bool MarkTaint); @@ -177,8 +171,12 @@ class ArrayBoundChecker : public Checker, static bool isInAddressOf(const Stmt *S, ASTContext &AC); public: + bool EnableFakeFlexibleArrayWarn{false}; + void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const { - performCheck(E, C); + if (performCheckArrayTypeIndex(E, C) == ConstantArrayIndexResult::Unknown) { + performCheck(E, C); + } } void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const { if (E->getOpcode() == UO_Deref) @@ -478,10 +476,8 @@ static Messages getNonTaintMsgs(const ASTContext &ACtx, std::string(Buf)}; } -static Messages getTaintMsgs(const MemSpaceRegion *Space, - const SubRegion *Region, const char *OffsetName, +static Messages getTaintMsgs(StringRef RegName, const char *OffsetName, bool AlsoMentionUnderflow) { - std::string RegName = getRegionName(Space, Region); return {formatv("Potential out of bound access to {0} with tainted {1}", RegName, OffsetName), formatv("Access of {0} with a tainted {1} that may be {2}too large", @@ -489,6 +485,43 @@ static Messages getTaintMsgs(const MemSpaceRegion *Space, AlsoMentionUnderflow ? "negative or " : "")}; } +/// Return true if information about the value of `Sym` can put constraints +/// on some symbol which is interesting within the bug report `BR`. +/// In particular, this returns true when `Sym` is interesting within `BR`; +/// but it also returns true if `Sym` is an expression that contains integer +/// constants and a single symbolic operand which is interesting (in `BR`). +/// We need to use this instead of plain `BR.isInteresting()` because if we +/// are analyzing code like +/// int array[10]; +/// int f(int arg) { +/// return array[arg] && array[arg + 10]; +/// } +/// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not +/// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough +/// to detect this out of bounds access). +static bool providesInformationAboutInteresting(SymbolRef Sym, + PathSensitiveBugReport &BR) { + if (!Sym) + return false; + for (SymbolRef PartSym : Sym->symbols()) { + // The interestingess mark may appear on any layer as we're stripping off + // the SymIntExpr, UnarySymExpr etc. layers... + if (BR.isInteresting(PartSym)) + return true; + // ...but if both sides of the expression are symbolic, then there is no + // practical algorithm to produce separate constraints for the two + // operands (from the single combined result). + if (isa(PartSym)) + return false; + } + return false; +} + +static bool providesInformationAboutInteresting(SVal SV, + PathSensitiveBugReport &BR) { + return providesInformationAboutInteresting(SV.getAsSymbol(), BR); +} + const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const { // Don't create a note tag if we didn't assume anything: if (!AssumedNonNegative && !AssumedUpperBound) @@ -555,22 +588,266 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const { return std::string(Out.str()); } -bool StateUpdateReporter::providesInformationAboutInteresting( - SymbolRef Sym, PathSensitiveBugReport &BR) { - if (!Sym) - return false; - for (SymbolRef PartSym : Sym->symbols()) { - // The interestingess mark may appear on any layer as we're stripping off - // the SymIntExpr, UnarySymExpr etc. layers... - if (BR.isInteresting(PartSym)) - return true; - // ...but if both sides of the expression are symbolic, then there is no - // practical algorithm to produce separate constraints for the two - // operands (from the single combined result). - if (isa(PartSym)) - return false; +// If the base expression of `expr` refers to a `ConstantArrayType`, +// return the element type and the array size. +// Note that "real" Flexible Array Members are `IncompleteArrayType`. +// For them, this function returns `std::nullopt`. +static std::optional> +getArrayTypeInfo(SValBuilder &SVB, ArraySubscriptExpr const *E) { + Expr const *ArrayBaseExpr = E->getBase()->IgnoreParenImpCasts(); + QualType const ArrayQualType = ArrayBaseExpr->getType().getCanonicalType(); + if (ArrayQualType.isNull() || !ArrayQualType->isConstantArrayType()) + return std::nullopt; + + auto *ConstArrayType = + dyn_cast(ArrayQualType->getAsArrayTypeUnsafe()); + if (!ConstArrayType) + return std::nullopt; + + return std::make_pair( + ConstArrayType->getElementType(), + // Note that an array size is technically unsigned, but + // `compareValueToThreshold` (via `getSimplifiedOffsets`) + // will do some arithmetics that could overflow and cause FN. For instance + // ``` + // int array[20]; + // array[unsigned_index + 21]; + // ``` + // `unsigned_index + 21 < 20` is turned into `unsigned_index < 20 - 21`, + // and if 20 is unsigned, that will overflow to the biggest possible array + // which is always trivially true. We obviously do not want that, so we + // need to treat the size as signed. + SVB.makeIntVal(llvm::APSInt{ConstArrayType->getSize(), false})); +} + +static Messages getNegativeIndexMessage(StringRef Name, + nonloc::ConcreteInt ArraySize, + NonLoc Index) { + uint64_t const ArraySizeVal = ArraySize.getValue()->getZExtValue(); + std::string const IndexStr = [&]() -> std::string { + if (std::optional ConcreteIndex = getConcreteValue(Index)) { + return formatv(" {0}", *ConcreteIndex); + } + return ""; + }(); + + return {formatv("Out of bound access to {0} at a negative index", Name), + formatv("Access of {0} containing {1} elements at negative index{2}", + Name, ArraySizeVal, IndexStr)}; +} + +template +static std::string truncateWithEllipsis(StringRef Str) { + static_assert(MaxLength > 4, "MaxLength must be long enough to hold a single " + "character plus the ellipsis"); + if (Str.size() <= MaxLength) + return Str.str(); + + return (Str.substr(0, MaxLength - 3) + "...").str(); +} + +static Messages getOOBIndexMessage(StringRef Name, NonLoc Index, + nonloc::ConcreteInt ArraySize, + QualType ElemType, + bool AlsoMentionUnderflow) { + std::optional IndexN = getConcreteValue(Index); + int64_t ExtentN = ArraySize.getValue()->getZExtValue(); + + SmallString<256> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Access of " << Name << " at "; + if (AlsoMentionUnderflow) { + Out << "a negative or overflowing index"; + } else if (IndexN.has_value()) { + Out << "index " << *IndexN; + } else { + Out << "an overflowing index"; } - return false; + + std::string const ElemTypeStr = truncateWithEllipsis(ElemType.getAsString()); + + Out << ", while it holds only "; + if (ExtentN != 1) + Out << ExtentN << " '" << ElemTypeStr << "' elements"; + else + Out << "a single " << "'" << ElemTypeStr << "' element"; + + return {formatv("Out of bound access to memory {0} {1}", + AlsoMentionUnderflow ? "around" : "after the end of", Name), + std::string(Buf)}; +} + +// Generate a representation of `Expr` suitable for diagnosis. +static std::string ExprReprForDiagnosis(Expr const *Base, CheckerContext &C) { + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + + switch (Base->getStmtClass()) { + case Stmt::MemberExprClass: + Out << "the field '"; + Out << dyn_cast(Base)->getMemberDecl()->getName().str(); + Out << '\''; + break; + case Stmt::ArraySubscriptExprClass: + Out << "the subarray '"; + Base->printPretty(Out, nullptr, {C.getLangOpts()}); + Out << '\''; + break; + default: + Out << '\''; + Base->printPretty(Out, nullptr, {C.getLangOpts()}); + Out << '\''; + } + + return std::string(Buf); +} + +class StateIndexUpdateReporter { + std::string Repr; + QualType ElementType; + NonLoc Index; + nonloc::ConcreteInt ArraySize; + bool AssumedNonNegative = false; + bool AssumedInBounds = false; + + std::string getMessage(PathSensitiveBugReport &BR) const { + SmallString<256> Buf; + if (providesInformationAboutInteresting(Index, BR)) { + llvm::raw_svector_ostream Out{Buf}; + Out << "Assuming index is"; + if (AssumedNonNegative) + Out << " non-negative"; + if (AssumedInBounds) { + if (AssumedNonNegative) + Out << " and"; + Out << " less than " << ArraySize.getValue()->getZExtValue() << ", "; + Out << "the number of '" << ElementType.getAsString() + << "' elements in "; + Out << Repr; + } + } + return std::string{Buf.str()}; + } + +public: + StateIndexUpdateReporter(StringRef Repr, QualType ElementType, NonLoc Index, + nonloc::ConcreteInt ArraySize) + : Repr(Repr), ElementType{ElementType}, Index{Index}, + ArraySize{ArraySize} {} + + void recordNonNegativeAssumption() { AssumedNonNegative = true; } + + void recordInBoundsAssumption() { AssumedInBounds = true; } + + const NoteTag *createNoteTag(CheckerContext &C) const { + // Don't create a note tag if we didn't assume anything: + if (!AssumedNonNegative && !AssumedInBounds) { + return nullptr; + } + + return C.getNoteTag( + [*this](PathSensitiveBugReport &BR) { return getMessage(BR); }); + } +}; + +// If the array is a `ConstantArrayType`, check the axis size. +// It returns `ConstantArrayIndexResult::Unknown` if it could not reason about +// the array access, deferring to the regular check based on the region. +auto ArrayBoundChecker::performCheckArrayTypeIndex(const ArraySubscriptExpr *E, + CheckerContext &C) const + -> ConstantArrayIndexResult { + auto State = C.getState(); + SValBuilder &SVB = C.getSValBuilder(); + + std::optional> const ArrayInfo = + getArrayTypeInfo(SVB, E); + std::optional const Index = + SVB.simplifySVal(State, C.getSVal(E->getIdx())).getAs(); + if (!ArrayInfo || !Index) + return ConstantArrayIndexResult::Unknown; + + auto const &[ArrayType, ArraySize] = *ArrayInfo; + + Expr const *BaseExpr = E->getBase()->IgnoreImpCasts(); + std::string const ExprAsStr = ExprReprForDiagnosis(BaseExpr, C); + bool const IsFakeFAM = BaseExpr->isFlexibleArrayMemberLike( + C.getASTContext(), C.getLangOpts().getStrictFlexArraysLevel(), + /*IgnoreTemplateOrMacroSubstitution=*/true); + + StateIndexUpdateReporter SUR(ExprAsStr, ArrayType, *Index, ArraySize); + + // Is the index negative? + auto [NegativeIndexState, NonNegativeIndexState] = + compareValueToThreshold(State, *Index, SVB.makeZeroArrayIndex(), SVB); + bool const AlsoMentionUnderflow = (NegativeIndexState != nullptr); + + // Negative is possible + if (NegativeIndexState) { + // But it can't be + if (E->getIdx()->getType()->isUnsignedIntegerOrEnumerationType()) { + // And positive isn't possible + if (!NonNegativeIndexState) { + // The state is broken + return ConstantArrayIndexResult::Done; + } + // As in `performCheck`, we add no assumptions about the index + } else if (!NonNegativeIndexState) { + // Positive is not possible, this is a bug + Messages Msgs = getNegativeIndexMessage(ExprAsStr, ArraySize, *Index); + reportOOB(C, NegativeIndexState, Msgs, *Index, ArraySize); + return ConstantArrayIndexResult::Done; + + } else { + // Both negative and positive are possible, assume positive + SUR.recordNonNegativeAssumption(); + } + } else if (!NonNegativeIndexState) { + // Broken state + return ConstantArrayIndexResult::Done; + } + + // The index is greater than 0, is it within bounds? + auto [WithinUpperBound, OutOfBounds] = + compareValueToThreshold(NonNegativeIndexState, *Index, ArraySize, SVB); + if (!WithinUpperBound && !OutOfBounds) { + // Invalid state + return ConstantArrayIndexResult::Done; + } + + if (!WithinUpperBound) { + if (isIdiomaticPastTheEndPtr(E, OutOfBounds, *Index, ArraySize, C)) { + C.addTransition(OutOfBounds, SUR.createNoteTag(C)); + return ConstantArrayIndexResult::Done; + } + // Silence for fake flexible arrays unless explicitly enabled + if (!IsFakeFAM) { + Messages Msgs = getOOBIndexMessage(ExprAsStr, *Index, ArraySize, + ArrayType, AlsoMentionUnderflow); + reportOOB(C, OutOfBounds, Msgs, *Index, ArraySize); + } else if (EnableFakeFlexibleArrayWarn) { + warnFlexibleArrayAccess(C, OutOfBounds, E, ExprAsStr, *Index, ArraySize, + ArrayType); + } + return ConstantArrayIndexResult::Done; + } + + // The access might be within range, but it may be tainted + if (OutOfBounds && isTainted(OutOfBounds, *Index)) { + Messages Msgs = getTaintMsgs(ExprAsStr, "index", AlsoMentionUnderflow); + reportOOB(C, OutOfBounds, Msgs, *Index, ArraySize, + /*IsTaintBug=*/true); + } + + // When "Flexible Array Members" are involved, assume only non-negative + // even if we want the warning for OOB FAM access. + if (!IsFakeFAM) { + if (WithinUpperBound != NonNegativeIndexState) + SUR.recordInBoundsAssumption(); + C.addTransition(WithinUpperBound, SUR.createNoteTag(C)); + } else + C.addTransition(NonNegativeIndexState, SUR.createNoteTag(C)); + + return ConstantArrayIndexResult::Done; } void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { @@ -708,8 +985,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { if (isTainted(State, ASE->getIdx(), C.getLocationContext())) OffsetName = "index"; - Messages Msgs = - getTaintMsgs(Space, Reg, OffsetName, AlsoMentionUnderflow); + Messages Msgs = getTaintMsgs(getRegionName(Space, Reg), OffsetName, + AlsoMentionUnderflow); reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize, /*IsTaintBug=*/true); return; @@ -785,6 +1062,43 @@ void ArrayBoundChecker::reportOOB(CheckerContext &C, ProgramStateRef ErrorState, C.emitReport(std::move(BR)); } +void ArrayBoundChecker::warnFlexibleArrayAccess(CheckerContext &C, + ProgramStateRef State, + const ArraySubscriptExpr *E, + StringRef Name, NonLoc Index, + nonloc::ConcreteInt ArraySize, + QualType ElemType) const { + ExplodedNode *WarnNode = C.generateNonFatalErrorNode(State); + if (WarnNode) { + int64_t ExtentN = ArraySize.getValue()->getZExtValue(); + + assert(ExtentN <= 1 && "Flexible arrays are expected to have size 0 or 1"); + + SmallString<256> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Access of " << Name << " containing "; + if (ExtentN != 1) { + Out << ExtentN << " '" << ElemType.getAsString() << "' elements"; + } else { + Out << "a single '" << ElemType.getAsString() << "' element"; + } + Out << " as a possible 'flexible array member'"; + + auto BR = std::make_unique( + BT, + formatv( + "Potential out of bound access to {0}, which may be a 'flexible " + "array member'", + Name) + .str(), + Buf, WarnNode); + + markPartsInteresting(*BR, State, Index, false); + markPartsInteresting(*BR, State, ArraySize, false); + C.emitReport(std::move(BR)); + } +} + bool ArrayBoundChecker::isFromCtypeMacro(const Expr *E, ASTContext &ACtx) { SourceLocation Loc = E->getBeginLoc(); if (!Loc.isMacroID()) @@ -837,7 +1151,10 @@ bool ArrayBoundChecker::isIdiomaticPastTheEndPtr(const Expr *E, } void ento::registerArrayBoundChecker(CheckerManager &mgr) { - mgr.registerChecker(); + auto *checker = mgr.registerChecker(); + checker->EnableFakeFlexibleArrayWarn = + mgr.getAnalyzerOptions().getCheckerBooleanOption( + checker->getName(), "EnableFakeFlexibleArrayWarn"); } bool ento::shouldRegisterArrayBoundChecker(const CheckerManager &mgr) { diff --git a/clang/test/Analysis/ArrayBound/assumption-reporting.c b/clang/test/Analysis/ArrayBound/assumption-reporting.c index bffd5d9bc35b5..a6cb86c3f221c 100644 --- a/clang/test/Analysis/ArrayBound/assumption-reporting.c +++ b/clang/test/Analysis/ArrayBound/assumption-reporting.c @@ -69,8 +69,8 @@ int assumingUpper(int arg) { int a = TenElements[arg]; // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[arg - 10]; - // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at a negative index}} + // expected-warning@-1 {{Out of bound access to 'TenElements' at a negative index}} + // expected-note@-2 {{Access of 'TenElements' containing 10 elements at negative index}} return a + b; } @@ -98,8 +98,8 @@ int assumingUpperUnsigned(unsigned arg) { int a = TenElements[arg]; // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'TenElements'}} int b = TenElements[(int)arg - 10]; - // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at a negative index}} + // expected-warning@-1 {{Out of bound access to 'TenElements' at a negative index}} + // expected-note@-2 {{Access of 'TenElements' containing 10 elements at negative index}} return a + b; } @@ -110,8 +110,8 @@ int assumingNothing(unsigned arg) { return 0; int a = TenElements[arg]; // no note here, we already know that 'arg' is in bounds int b = TenElements[(int)arg - 10]; - // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at a negative index}} + // expected-warning@-1 {{Out of bound access to 'TenElements' at a negative index}} + // expected-note@-2 {{Access of 'TenElements' containing 10 elements at negative index}} return a + b; } @@ -144,8 +144,8 @@ int assumingConvertedToIntP(struct foo f, int arg) { int b = ((int*)(f.b))[arg]; // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}} int c = TenElements[arg-2]; - // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'TenElements' at a negative index}} + // expected-warning@-1 {{Out of bound access to 'TenElements' at a negative index}} + // expected-note@-2 {{Access of 'TenElements' containing 10 elements at negative index}} return a + b + c; } @@ -213,3 +213,77 @@ int triggeredByAnyReport(int arg) { // expected-warning@-1 {{Right operand is negative in right shift}} // expected-note@-2 {{The result of right shift is undefined because the right operand is negative}} } + +extern void clang_analyzer_dump(int); + +int* md_array_assumptions(int x, int y, int z) { + int *mem = (int*)malloc(y); + + int array[80][90][100]; + int value = array[x][y][z]; + // expected-note@-1 {{Assuming index is non-negative and less than 90, the number of 'int[100]' elements in the subarray 'array[x]'}} + + int* ptr = &mem[100]; + // expected-warning@-1 {{Out of bound access to memory after the end of the heap area}} + // expected-note@-2 {{Access of 'int' element in the heap area at index 100}} + if (y) {} + return ptr; +} + +int* md_array_assumptions_2(int x, int y, int z) { + int *mem = (int*)malloc(x); + + int array[80][90][100]; + int value = array[x][y][z]; + // expected-note@-1 {{Assuming index is non-negative and less than 80, the number of 'int[90][100]' elements in 'array'}} + + int* ptr = &mem[100]; + // expected-warning@-1 {{Out of bound access to memory after the end of the heap area}} + // expected-note@-2 {{Access of 'int' element in the heap area at index 100}} + if (x) {} + return ptr; +} + +int* md_array_assumptions_3(int x, int y, int z) { + int *mem = (int*)malloc(z); + + int array[80][90][100]; + int value = array[x][y][z]; + // expected-note@-1 {{Assuming index is non-negative and less than 100, the number of 'int' elements in the subarray 'array[x][y]'}} + + int* ptr = &mem[100]; + // expected-warning@-1 {{Out of bound access to memory after the end of the heap area}} + // expected-note@-2 {{Access of 'int' element in the heap area at index 100}} + if (z) {} + return ptr; +} + + +struct Compound { + int arr[10][20]; +}; + +void compound() { + struct Compound c; + c.arr[3][28] = 1; + // expected-warning@-1 {{Out of bound access to memory after the end of the subarray 'c.arr[3]'}} + // expected-note@-2 {{Access of the subarray 'c.arr[3]' at index 28, while it holds only 20 'int' elements}} +} + +void compound_arg(struct Compound *c) { + c->arr[3][28] = 1; + // expected-warning@-1 {{Out of bound access to memory after the end of the subarray 'c->arr[3]'}} + // expected-note@-2 {{Access of the subarray 'c->arr[3]' at index 28, while it holds only 20 'int' elements}} +} + +int array_arg(int x[10][20]) { + return x[5][22]; + // expected-warning@-1 {{Out of bound access to memory after the end of the subarray 'x[5]'}} + // expected-note@-2 {{Access of the subarray 'x[5]' at index 22, while it holds only 20 'int' elements}} +} + +int ptr_to_array_arg(int (*x)[10][20]) { + return (*x)[5][22]; + // expected-warning@-1 {{Out of bound access to memory after the end of the subarray '(*x)[5]'}} + // expected-note@-2 {{Access of the subarray '(*x)[5]' at index 22, while it holds only 20 'int' elements}} +} diff --git a/clang/test/Analysis/ArrayBound/brief-tests.c b/clang/test/Analysis/ArrayBound/brief-tests.c index f4811efd8d8b6..e5a90b282bef1 100644 --- a/clang/test/Analysis/ArrayBound/brief-tests.c +++ b/clang/test/Analysis/ArrayBound/brief-tests.c @@ -6,6 +6,7 @@ // then it should be placed here. void clang_analyzer_value(int); +void clang_analyzer_eval(int); // Tests doing an out-of-bounds access after the end of an array using: // - constant integer index @@ -88,7 +89,7 @@ void test1_ptr_arith_ok2(int x) { // - constant integer size for buffer void test2(int x) { int buf[100]; - buf[-1] = 1; // expected-warning{{Out of bound access to memory}} + buf[-1] = 1; // expected-warning{{Out of bound access to 'buf' at a negative index}} } // Tests doing an out-of-bounds access before the start of an array using: @@ -118,7 +119,7 @@ void test2_ptr_arith(int x) { // - constant integer sizes for the array void test2_multi(int x) { int buf[100][100]; - buf[0][-1] = 1; // expected-warning{{Out of bound access to memory}} + buf[0][-1] = 1; // expected-warning{{Out of bound access to the subarray 'buf[0]' at a negative index}} } // Tests doing an out-of-bounds access before the start of a multi-dimensional @@ -127,7 +128,17 @@ void test2_multi(int x) { // - constant integer sizes for the array void test2_multi_b(int x) { int buf[100][100]; - buf[-1][0] = 1; // expected-warning{{Out of bound access to memory}} + buf[-1][0] = 1; // expected-warning{{Out of bound access to 'buf' at a negative index}} +} + +void test2_multi_c() { + int buf[100][100]; + buf[0][103] = 1; // expected-warning{{Out of bound access to memory after the end of the subarray 'buf[0]'}} +} + +void test2_multi_d() { + int buf[80][90][100]; + buf[0][91][0] = 1; // expected-warning {{Out of bound access to memory after the end of the subarray 'buf[0]'}} } void test2_multi_ok(int x) { @@ -138,7 +149,7 @@ void test2_multi_ok(int x) { void test3(int x) { int buf[100]; if (x < 0) - buf[x] = 1; // expected-warning{{Out of bound access to memory}} + buf[x] = 1; // expected-warning{{Out of bound access to 'buf' at a negative index}} } void test4(int x) { @@ -164,7 +175,7 @@ typedef struct { user_t *get_symbolic_user(void); char test_underflow_symbolic_2() { user_t *user = get_symbolic_user(); - return user->name[-1]; // expected-warning{{Out of bound access to memory}} + return user->name[-1]; // expected-warning{{Out of bound access to the field 'name' at a negative index}} } void test_incomplete_struct(void) { @@ -272,3 +283,67 @@ void sizeof_vla_3(int a) { y[5] = 5; // should be {{Out of bounds access}} } } + +void md_array_assumptions(int x, int y, int z) { + int array[80][90][100]; + int value = array[x][y][z]; + + clang_analyzer_eval(x < 80); // expected-warning {{TRUE}} + clang_analyzer_eval(y < 90); // expected-warning {{TRUE}} + clang_analyzer_eval(z < 100); // expected-warning {{TRUE}} + + clang_analyzer_eval(x >= 0); // expected-warning {{TRUE}} + clang_analyzer_eval(y >= 0); // expected-warning {{TRUE}} + clang_analyzer_eval(z >= 0); // expected-warning {{TRUE}} +} + +int unsigned_index_quirk_inner(unsigned char index) { + static const unsigned char array[] = {8, 8, 4, 4, 2, 2, 1}; + + if (index < 6) { + return 0; + } + + // Unsurprising + clang_analyzer_eval(index >= 6); // expected-warning {{TRUE}} + + index++; + + // Could overflow since 255 is a possible value, kind of surprising + clang_analyzer_eval(index > 6); // expected-warning {{TRUE}} expected-warning {{FALSE}} + + if (index >= 7) { + return 0; + } + + // Because of the if above, 7 is not possible. But because of the overflow, 0 + // is. + clang_analyzer_eval(index == 0); // expected-warning {{TRUE}} + + // Regression test: if we don't call `simplifySVal` so 255+1 is folded into 0, + // we get a false positive "out of bounds". + return array[index]; // no warning +} + +void custom_unreachable(); + +int get_index(char b) { + switch (b) { + case 'a': + return 0; + default: + custom_unreachable(); + } + // expected-warning@+1 {{non-void function does not return a value in all control paths}} +} + +int access_by_undefined(char b) { + char array[10] = {0}; + int i = get_index(b); + // CPP-7012, even though there is a path that does return a value, CSA keeps going + clang_analyzer_value(i); // expected-warning {{[-2147483648, 2147483647]}} expected-warning {{32s:0}} + int x = array[i]; + // `i` could be [0, 9], but `get_index` should have pruned [1, 9] out of the possible scenarios + clang_analyzer_value(i); // expected-warning {{[0, 9]}} expected-warning {{32s:0}} + return x; +} diff --git a/clang/test/Analysis/ArrayBound/cplusplus.cpp b/clang/test/Analysis/ArrayBound/cplusplus.cpp index 680fbf4817c30..8b9cb450a9e2a 100644 --- a/clang/test/Analysis/ArrayBound/cplusplus.cpp +++ b/clang/test/Analysis/ArrayBound/cplusplus.cpp @@ -1,4 +1,6 @@ -// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -analyzer-checker=unix,core,security.ArrayBound -verify %s +// RUN: %clang_analyze_cc1 -std=c++11 -Wno-array-bounds -analyzer-checker=unix,core,security.ArrayBound,debug.ExprInspection -verify %s + +void clang_analyzer_value(int); // Test the interactions of `security.ArrayBound` with C++ features. @@ -96,7 +98,7 @@ void test2_ptr_arith(int x) { // of a multi-dimensional array void test2_multi(int x) { auto buf = new int[100][100]; - buf[0][-1] = 1; // expected-warning{{Out of bound access to memory}} + buf[0][-1] = 1; // expected-warning{{Out of bound access to the subarray 'buf[0]' at a negative index}} } // Tests under-indexing @@ -117,9 +119,20 @@ void test2_multi_c(int x) { // of a multi-dimensional array void test2_multi_2(int x) { auto buf = new int[100][100]; - buf[99][100] = 1; // expected-warning{{Out of bound access to memory}} + buf[99][100] = 1; // expected-warning{{Out of bound access to memory after the end of the subarray 'buf[99]'}} +} + +void test2_multi_3() { + auto buf = new int[100][100]; + buf[0][100] = 1; // expected-warning{{Out of bound access to memory after the end of the subarray 'buf[0]'}} } +void test2_multi_4() { + auto buf = new int[100][100][100]; + buf[0][101][0] = 1; // expected-warning{{Out of bound access to memory after the end of the subarray 'buf[0]'}} +} + + // Tests normal access of // a multi-dimensional array void test2_multi_ok(int x) { @@ -179,3 +192,28 @@ int test_reference_that_might_be_after_the_end(int idx) { return -1; return ref; } + +int test_type_alias() { + using Arr1 = int[10]; + using Arr2 = Arr1[20]; + Arr2 arr; + return arr[0][30]; // expected-warning {{Out of bound access to memory after the end of the subarray 'arr[0]'}} +} + +int* test_type_alias_2() { + using Arr1 = int[10]; + using Arr2 = Arr1[20]; + Arr2* arr = new Arr2[]{}; + return (*arr)[30]; // expected-warning {{Out of bound access to memory after the end of the heap area}} +} + +// Similar to `out-of-bounds.c` +// See Github ticket #39492. +int test_cast_to_unsigned(signed char x) { + int *table = new int[256]; + unsigned char y = x; + if (x >= 0) + return x; + clang_analyzer_value(y); // expected-warning {{8s:{ [-128, -1] } }} + return table[y]; // no-warning +} diff --git a/clang/test/Analysis/ArrayBound/flexible-array-members.c b/clang/test/Analysis/ArrayBound/flexible-array-members.c new file mode 100644 index 0000000000000..06721006f8b0b --- /dev/null +++ b/clang/test/Analysis/ArrayBound/flexible-array-members.c @@ -0,0 +1,110 @@ +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,security.ArrayBound,debug.ExprInspection \ +// RUN: -fstrict-flex-arrays=0 -DSTRICT_FLEX=0 \ +// RUN: -verify %s +// +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,security.ArrayBound,debug.ExprInspection \ +// RUN: -fstrict-flex-arrays=1 -DSTRICT_FLEX=1 \ +// RUN: -verify %s +// +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,security.ArrayBound,debug.ExprInspection \ +// RUN: -fstrict-flex-arrays=2 -DSTRICT_FLEX=2 \ +// RUN: -verify %s +// +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,security.ArrayBound,debug.ExprInspection \ +// RUN: -fstrict-flex-arrays=2 -DSTRICT_FLEX=2 \ +// RUN: -DWARN_FLEXIBLE_ARRAY -analyzer-config security.ArrayBound:EnableFakeFlexibleArrayWarn=true \ +// RUN: -verify %s +// +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,security.ArrayBound,debug.ExprInspection \ +// RUN: -fstrict-flex-arrays=3 -DSTRICT_FLEX=3 \ +// RUN: -verify %s + +#include "../Inputs/system-header-simulator-for-malloc.h" + +void clang_analyzer_warnIfReached(); + +struct RealFAM { + int size; + int args[]; +}; + +int use_fam(struct RealFAM *ptr) { + int x = ptr->args[1]; + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + return x; +} + +struct FAM0 { + int size; + int args[0]; +}; + +int use_fam0(struct FAM0 *ptr) { + int x = ptr->args[1]; +#if STRICT_FLEX > 2 + // expected-warning@-2 {{Out of bound access to memory after the end of the field 'args'}} +#else +#ifdef WARN_FLEXIBLE_ARRAY + // expected-warning@-5 {{Potential out of bound access to the field 'args', which may be a 'flexible array member'}} +#endif + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +#endif + return x; +} + +struct FAM1 { + int size; + int args[1]; +}; + +int use_fam1(struct FAM1 *ptr) { + int x = ptr->args[2]; +#if STRICT_FLEX > 1 + // expected-warning@-2 {{Out of bound access to memory after the end of the field 'args'}} +#else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +#endif + return x; +} + +struct FAMN { + int size; + int args[64]; +}; + +int use_famn(struct FAMN *ptr) { + int x = ptr->args[128]; +#if STRICT_FLEX > 0 + // expected-warning@-2 {{Out of bound access to memory after the end of the field 'args'}} +#else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +#endif + return x; +} + +struct NotFAM { + int keys[1]; + int values[1]; +}; + +int use_not_fam(struct NotFAM *ptr) { + return ptr->keys[3]; // expected-warning {{Out of bound access to memory after the end of the field 'keys'}} +} + +// Pattern used in GCC +union FlexibleArrayUnion { + int args[1]; + struct { + int x, y, z; + }; +}; + +int use_union(union FlexibleArrayUnion *p) { + int x = p->args[2]; +#if STRICT_FLEX > 1 + // expected-warning@-2 {{Out of bound access to memory after the end of the field 'args'}} +#else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +#endif + return x; +} diff --git a/clang/test/Analysis/ArrayBound/verbose-tests.c b/clang/test/Analysis/ArrayBound/verbose-tests.c index e3416886d13e5..92b80483c637f 100644 --- a/clang/test/Analysis/ArrayBound/verbose-tests.c +++ b/clang/test/Analysis/ArrayBound/verbose-tests.c @@ -10,8 +10,8 @@ int TenElements[10]; void arrayUnderflow(void) { TenElements[-3] = 5; - // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -3}} + // expected-warning@-1 {{Out of bound access to 'TenElements' at a negative index}} + // expected-note@-2 {{Access of 'TenElements' containing 10 elements at negative index -3}} } int underflowWithDeref(void) { @@ -69,8 +69,8 @@ void gh86959(void) { // expected-note@+1 {{Entering loop body}} while (rng()) TenElements[getIndex()] = 10; - // expected-warning@-1 {{Out of bound access to memory preceding 'TenElements'}} - // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative index -172}} + // expected-warning@-1 {{Out of bound access to 'TenElements' at a negative index}} + // expected-note@-2 {{Access of 'TenElements' containing 10 elements at negative index -172}} } int scanf(const char *fmt, ...); @@ -240,14 +240,18 @@ struct vec { double arrayInStruct(void) { return v.elems[64]; - // expected-warning@-1 {{Out of bound access to memory after the end of 'v.elems'}} - // expected-note@-2 {{Access of 'v.elems' at index 64, while it holds only 64 'double' elements}} +#if STRICT_FLEX >= 1 + // expected-warning@-2 {{Out of bound access to memory after the end of the field 'elems'}} + // expected-note@-3 {{Access of the field 'elems' at index 64, while it holds only 64 'double' elements}} +#endif } double arrayInStructPtr(struct vec *pv) { return pv->elems[64]; - // expected-warning@-1 {{Out of bound access to memory after the end of the field 'elems'}} - // expected-note@-2 {{Access of the field 'elems' at index 64, while it holds only 64 'double' elements}} +#if STRICT_FLEX >= 1 + // expected-warning@-2 {{Out of bound access to memory after the end of the field 'elems'}} + // expected-note@-3 {{Access of the field 'elems' at index 64, while it holds only 64 'double' elements}} +#endif } struct item { @@ -432,3 +436,37 @@ int *nothingIsCertain(int x, int y) { return mem; } + +// Should raise when the index for a given sub-array is out-of-bounds +extern int actionBufferList[200][100]; + +void recvToSubArrayIndex() { + int index = 0; + scanf("%d", &index); + // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to the 2nd argument}} + + // expected-note@+2 {{Assuming 'index' is <= 100}} + // expected-note@+1 {{Taking false branch}} + if (index > 100) { + return; + } + + actionBufferList[0][index] = '\0'; + // expected-warning@-1 {{Potential out of bound access to the subarray 'actionBufferList[0]' with tainted index}} + // expected-note@-2 {{Access of the subarray 'actionBufferList[0]' with a tainted index that may be negative or too large}} +} + + +#define LONGSTRUCT(x) struct VERYLONGPREFIXIFWEPRINTEVERYTHINGISUNREADABLE ## x + +LONGSTRUCT(Bar) { + int x; +}; + +int test_case_long_elem_name() { + LONGSTRUCT(Bar) table[10]; + return table[55].x; + // expected-warning@-1 {{Out of bound access to memory after the end of 'table'}} + // expected-note@-2 {{Access of 'table' at index 55, while it holds only 10 'struct VERYLONGPR...' elements}} +} diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index 7936273415ad4..d4067ddf3bd6a 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -120,6 +120,7 @@ // CHECK-NEXT: region-store-small-array-limit = 5 // CHECK-NEXT: region-store-small-struct-limit = 2 // CHECK-NEXT: report-in-main-source-file = false +// CHECK-NEXT: security.ArrayBound:EnableFakeFlexibleArrayWarn = false // CHECK-NEXT: security.cert.env.InvalidPtr:InvalidatingGetEnv = false // CHECK-NEXT: serialize-stats = false // CHECK-NEXT: silence-checkers = ""