@@ -34,24 +34,37 @@ using namespace taint;
3434using llvm::formatv;
3535
3636namespace {
37- // / If `E` is a "clean" array subscript expression, return the type of the
38- // / accessed element. If the base of the subscript expression is modified by
39- // / pointer arithmetic (and not the beginning of a "full" memory region), this
40- // / always returns nullopt because that's the right (or the least bad) thing to
41- // / do for the diagnostic output that's relying on this .
42- static std::optional<QualType> determineElementType ( const Expr *E,
43- const CheckerContext &C) {
37+ // / If `E` is an array subscript expression with a base that is "clean" (= not
38+ // / modified by pointer arithmetic = the beginning of a memory region), return
39+ // / it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
40+ // / This helper function is used by two separate heuristics that are only valid
41+ // / in these "clean" cases .
42+ static const ArraySubscriptExpr *
43+ getAsCleanArraySubscriptExpr ( const Expr *E, const CheckerContext &C) {
4444 const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
4545 if (!ASE)
46- return std:: nullopt ;
46+ return nullptr ;
4747
4848 const MemRegion *SubscriptBaseReg = C.getSVal (ASE->getBase ()).getAsRegion ();
4949 if (!SubscriptBaseReg)
50- return std:: nullopt ;
50+ return nullptr ;
5151
5252 // The base of the subscript expression is affected by pointer arithmetics,
53- // so we want to report byte offsets instead of indices.
53+ // so we want to report byte offsets instead of indices and we don't want to
54+ // activate the "index is unsigned -> cannot be negative" shortcut.
5455 if (isa<ElementRegion>(SubscriptBaseReg->StripCasts ()))
56+ return nullptr ;
57+
58+ return ASE;
59+ }
60+
61+ // / If `E` is a "clean" array subscript expression, return the type of the
62+ // / accessed element; otherwise return std::nullopt because that's the best (or
63+ // / least bad) option for the diagnostic generation that relies on this.
64+ static std::optional<QualType> determineElementType (const Expr *E,
65+ const CheckerContext &C) {
66+ const auto *ASE = getAsCleanArraySubscriptExpr (E, C);
67+ if (!ASE)
5568 return std::nullopt ;
5669
5770 return ASE->getType ();
@@ -140,7 +153,9 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
140153 ProgramStateRef ErrorState, NonLoc Val,
141154 bool MarkTaint);
142155
143- static bool isFromCtypeMacro (const Stmt *S, ASTContext &AC);
156+ static bool isFromCtypeMacro (const Expr *E, ASTContext &AC);
157+
158+ static bool isOffsetObviouslyNonnegative (const Expr *E, CheckerContext &C);
144159
145160 static bool isIdiomaticPastTheEndPtr (const Expr *E, ProgramStateRef State,
146161 NonLoc Offset, NonLoc Limit,
@@ -587,20 +602,48 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
587602 State, ByteOffset, SVB.makeZeroArrayIndex (), SVB);
588603
589604 if (PrecedesLowerBound) {
590- // The offset may be invalid (negative)...
591- if (!WithinLowerBound) {
592- // ...and it cannot be valid (>= 0), so report an error.
593- Messages Msgs = getPrecedesMsgs (Reg, ByteOffset);
594- reportOOB (C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt );
595- return ;
605+ // The analyzer thinks that the offset may be invalid (negative)...
606+
607+ if (isOffsetObviouslyNonnegative (E, C)) {
608+ // ...but the offset is obviously non-negative (clear array subscript
609+ // with an unsigned index), so we're in a buggy situation.
610+
611+ // TODO: Currently the analyzer ignores many casts (e.g. signed ->
612+ // unsigned casts), so it can easily reach states where it will load a
613+ // signed (and negative) value from an unsigned variable. This sanity
614+ // check is a duct tape "solution" that silences most of the ugly false
615+ // positives that are caused by this buggy behavior. Note that this is
616+ // not a complete solution: this cannot silence reports where pointer
617+ // arithmetic complicates the picture and cannot ensure modeling of the
618+ // "unsigned index is positive with highest bit set" cases which are
619+ // "usurped" by the nonsense "unsigned index is negative" case.
620+ // For more information about this topic, see the umbrella ticket
621+ // https://github.com/llvm/llvm-project/issues/39492
622+ // TODO: Remove this hack once 'SymbolCast's are modeled properly.
623+
624+ if (!WithinLowerBound) {
625+ // The state is completely nonsense -- let's just sink it!
626+ C.addSink ();
627+ return ;
628+ }
629+ // Otherwise continue on the 'WithinLowerBound' branch where the
630+ // unsigned index _is_ non-negative. Don't mention this assumption as a
631+ // note tag, because it would just confuse the users!
632+ } else {
633+ if (!WithinLowerBound) {
634+ // ...and it cannot be valid (>= 0), so report an error.
635+ Messages Msgs = getPrecedesMsgs (Reg, ByteOffset);
636+ reportOOB (C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt );
637+ return ;
638+ }
639+ // ...but it can be valid as well, so the checker will (optimistically)
640+ // assume that it's valid and mention this in the note tag.
641+ SUR.recordNonNegativeAssumption ();
596642 }
597- // ...but it can be valid as well, so the checker will (optimistically)
598- // assume that it's valid and mention this in the note tag.
599- SUR.recordNonNegativeAssumption ();
600643 }
601644
602645 // Actually update the state. The "if" only fails in the extremely unlikely
603- // case when compareValueToThreshold returns {nullptr, nullptr} becasue
646+ // case when compareValueToThreshold returns {nullptr, nullptr} because
604647 // evalBinOpNN fails to evaluate the less-than operator.
605648 if (WithinLowerBound)
606649 State = WithinLowerBound;
@@ -660,7 +703,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
660703 }
661704
662705 // Actually update the state. The "if" only fails in the extremely unlikely
663- // case when compareValueToThreshold returns {nullptr, nullptr} becasue
706+ // case when compareValueToThreshold returns {nullptr, nullptr} because
664707 // evalBinOpNN fails to evaluate the less-than operator.
665708 if (WithinUpperBound)
666709 State = WithinUpperBound;
@@ -725,8 +768,8 @@ void ArrayBoundChecker::reportOOB(CheckerContext &C, ProgramStateRef ErrorState,
725768 C.emitReport (std::move (BR));
726769}
727770
728- bool ArrayBoundChecker::isFromCtypeMacro (const Stmt *S , ASTContext &ACtx) {
729- SourceLocation Loc = S ->getBeginLoc ();
771+ bool ArrayBoundChecker::isFromCtypeMacro (const Expr *E , ASTContext &ACtx) {
772+ SourceLocation Loc = E ->getBeginLoc ();
730773 if (!Loc.isMacroID ())
731774 return false ;
732775
@@ -744,6 +787,14 @@ bool ArrayBoundChecker::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
744787 (MacroName == " isupper" ) || (MacroName == " isxdigit" ));
745788}
746789
790+ bool ArrayBoundChecker::isOffsetObviouslyNonnegative (const Expr *E,
791+ CheckerContext &C) {
792+ const ArraySubscriptExpr *ASE = getAsCleanArraySubscriptExpr (E, C);
793+ if (!ASE)
794+ return false ;
795+ return ASE->getIdx ()->getType ()->isUnsignedIntegerOrEnumerationType ();
796+ }
797+
747798bool ArrayBoundChecker::isInAddressOf (const Stmt *S, ASTContext &ACtx) {
748799 ParentMapContext &ParentCtx = ACtx.getParentMapContext ();
749800 do {
0 commit comments