-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[clang][analyzer] Improve checker 'unix.cstring.NotNullTerminated' #149106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Check for non-presence of terminating zero in simple cases.
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-static-analyzer-1 Author: Balázs Kéri (balazske) ChangesCheck for non-presence of terminating zero in simple cases. Full diff: https://github.com/llvm/llvm-project/pull/149106.diff 4 Files Affected:
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 26c5028e04955..3d6e0e7d73756 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2105,10 +2105,11 @@ unix.cstring.NotNullTerminated (C)
Check for arguments which are not null-terminated strings;
applies to the ``strlen``, ``strcpy``, ``strcat``, ``strcmp`` family of functions.
-Only very fundamental cases are detected where the passed memory block is
-absolutely different from a null-terminated string. This checker does not
-find if a memory buffer is passed where the terminating zero character
-is missing.
+The checker can detect if the passed memory block is not a string object at all,
+like address of a label or function. Additionally it can detect simple cases
+when the terminating zero is missing, for example if the data was initialized
+as array without terminating zero, or the terminating zero was overwritten by
+an assignment (with a value that is provably not zero).
.. code-block:: c
@@ -2121,6 +2122,17 @@ is missing.
int l = strlen((char *)&&label); // warn
}
+ int test3() {
+ char buf[4] = {1, 2, 3, 4};
+ return strlen(buf); // warn
+ }
+
+ int test4() {
+ char buf[] = "abcd";
+ buf[4] = 'e';
+ return strlen(buf); // warn
+ }
+
.. _unix-cstring-NullArg:
unix.cstring.NullArg (C)
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 31cb150892a5d..4f14853931c54 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -78,12 +78,9 @@ static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
: Ctx.WideCharTy);
}
-class CStringChecker : public Checker< eval::Call,
- check::PreStmt<DeclStmt>,
- check::LiveSymbols,
- check::DeadSymbols,
- check::RegionChanges
- > {
+class CStringChecker
+ : public Checker<eval::Call, check::PreStmt<DeclStmt>, check::LiveSymbols,
+ check::DeadSymbols, check::RegionChanges> {
mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
@@ -324,11 +321,14 @@ class CStringChecker : public Checker< eval::Call,
SizeArgExpr Size, AnyArgExpr First,
AnyArgExpr Second,
CharKind CK = CharKind::Regular) const;
+ // Check for presence of terminating zero in a string argument.
+ ProgramStateRef checkNullTerminated(CheckerContext &C, ProgramStateRef State,
+ AnyArgExpr Arg, SVal ArgVal) const;
+
void emitOverlapBug(CheckerContext &C,
ProgramStateRef state,
const Stmt *First,
const Stmt *Second) const;
-
void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
StringRef WarningMsg) const;
void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
@@ -959,6 +959,66 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
return state;
}
+ProgramStateRef CStringChecker::checkNullTerminated(CheckerContext &C,
+ ProgramStateRef State,
+ AnyArgExpr Arg,
+ SVal ArgVal) const {
+ if (!State)
+ return nullptr;
+
+ if (!Filter.CheckCStringNotNullTerm)
+ return State;
+
+ SValBuilder &SVB = C.getSValBuilder();
+
+ auto TryGetTypedValueR = [](const MemRegion *R) -> const TypedValueRegion * {
+ if (!R)
+ return nullptr;
+ return R->StripCasts()->getAs<TypedValueRegion>();
+ };
+
+ const TypedValueRegion *StrReg = TryGetTypedValueR(ArgVal.getAsRegion());
+ if (!StrReg)
+ return State;
+ int Offset = 0;
+ if (const auto *ElemReg = StrReg->getAs<ElementRegion>()) {
+ RegionRawOffset ROffset = ElemReg->getAsArrayOffset();
+ StrReg = TryGetTypedValueR(ROffset.getRegion());
+ if (!StrReg)
+ return State;
+ Offset = ROffset.getOffset().getQuantity();
+ }
+
+ DefinedOrUnknownSVal Extent = getDynamicExtent(State, StrReg, SVB);
+ if (Extent.isUnknown())
+ return State;
+ const llvm::APSInt *KnownExtent = SVB.getKnownValue(State, Extent);
+ if (!KnownExtent)
+ return State;
+ MemRegionManager &RegionM = SVB.getRegionManager();
+ int RegionExtent = KnownExtent->getExtValue();
+ if (Offset >= RegionExtent)
+ return State;
+ for (int I = Offset; I < RegionExtent; ++I) {
+ const ElementRegion *ElemR = RegionM.getElementRegion(C.getASTContext().CharTy, SVB.makeArrayIndex(I), StrReg, C.getASTContext());
+ SVal ElemVal = State->getSValAsScalarOrLoc(ElemR);
+ if (!State->isNonNull(ElemVal).isConstrainedTrue())
+ // We have here a lower bound for the string length.
+ // Try to update the CStringLength value?
+ return State;
+ }
+
+ SmallString<80> Buf;
+ llvm::raw_svector_ostream OS(Buf);
+ assert(CurrentFunctionDescription);
+ OS << "Terminating zero missing from string passed as "
+ << (Arg.ArgumentIndex + 1) << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1)
+ << " argument to " << CurrentFunctionDescription;
+
+ emitNotCStringBug(C, State, Arg.Expression, OS.str());
+ return nullptr;
+}
+
ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
const MemRegion *MR,
SVal strLength) {
@@ -1718,6 +1778,9 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C,
SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
state = checkNonNull(C, state, Arg, ArgVal);
+ if (!IsStrnlen)
+ state = checkNullTerminated(C, state, Arg, ArgVal);
+
if (!state)
return;
@@ -1882,6 +1945,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
SVal DstVal = state->getSVal(Dst.Expression, LCtx);
state = checkNonNull(C, state, Dst, DstVal);
+ // Look for terminating zero.
+ if (!IsBounded || appendK != ConcatFnKind::none)
+ state = checkNullTerminated(C, state, Dst, DstVal);
if (!state)
return;
@@ -1889,6 +1955,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
state = checkNonNull(C, state, srcExpr, srcVal);
+ // Look for terminating zero.
+ if (!IsBounded)
+ state = checkNullTerminated(C, state, srcExpr, srcVal);
if (!state)
return;
@@ -2340,6 +2409,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
AnyArgExpr Left = {Call.getArgExpr(0), 0};
SVal LeftVal = state->getSVal(Left.Expression, LCtx);
state = checkNonNull(C, state, Left, LeftVal);
+ // Look for terminating zero.
+ if (!IsBounded)
+ state = checkNullTerminated(C, state, Left, LeftVal);
if (!state)
return;
@@ -2347,6 +2419,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
AnyArgExpr Right = {Call.getArgExpr(1), 1};
SVal RightVal = state->getSVal(Right.Expression, LCtx);
state = checkNonNull(C, state, Right, RightVal);
+ // Look for terminating zero.
+ if (!IsBounded)
+ state = checkNullTerminated(C, state, Right, RightVal);
if (!state)
return;
@@ -2478,6 +2553,8 @@ void CStringChecker::evalStrsep(CheckerContext &C,
// a null string).
SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
+ // Look for terminating zero.
+ State = checkNullTerminated(C, State, SearchStrPtr, SearchStrVal);
if (!State)
return;
@@ -2485,6 +2562,8 @@ void CStringChecker::evalStrsep(CheckerContext &C,
AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
State = checkNonNull(C, State, DelimStr, DelimStrVal);
+ // Look for terminating zero.
+ State = checkNullTerminated(C, State, DelimStr, DelimStrVal);
if (!State)
return;
@@ -2675,6 +2754,7 @@ void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
// This is an invalid call, let's just ignore it.
return;
}
+ // FIXME: Why not check for null pointer (and null-terminated string)?
const auto AllArguments =
llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h
index fadc09f65d536..3a0cf75cfb9ff 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator.h
@@ -80,6 +80,8 @@ size_t strlen(const char *);
char *strcpy(char *restrict, const char *restrict);
char *strncpy(char *restrict dst, const char *restrict src, size_t n);
+char *strcat(char *restrict, const char *restrict);
+char *strncat(char *restrict, const char *restrict, size_t);
char *strsep(char **restrict stringp, const char *restrict delim);
void *memcpy(void *restrict dst, const void *restrict src, size_t n);
void *memset(void *s, int c, size_t n);
diff --git a/clang/test/Analysis/cstring-notnullterm.c b/clang/test/Analysis/cstring-notnullterm.c
new file mode 100644
index 0000000000000..8d586444fc835
--- /dev/null
+++ b/clang/test/Analysis/cstring-notnullterm.c
@@ -0,0 +1,158 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=unix.cstring.NotNullTerminated -verify %s
+
+#include "Inputs/system-header-simulator.h"
+
+extern void *malloc(size_t);
+
+size_t test_addr_fn() {
+ return strlen((char *)&malloc); // expected-warning{{Argument to string length function is the address of the function 'malloc', which is not a null-terminated string}}
+}
+
+size_t test_addr_label() {
+lab:
+ return strlen((char *)&&lab); // expected-warning{{Argument to string length function is the address of the label 'lab', which is not a null-terminated string}}
+}
+
+size_t test_init_compound(int i) {
+ char src1[6] = {1,2,3,4,5,6};
+ char src2[6] = {1,2,3,0,5,6};
+ switch (i) {
+ case 1:
+ return strlen(src1); // expected-warning{{Terminating zero missing from string passed as 1st argument to string length function}}
+ case 2:
+ return strlen(src1 + 1); // expected-warning{{Terminating zero missing from string}}
+ case 3:
+ return strlen(src2);
+ case 4:
+ return strlen(src2 + 4); // expected-warning{{Terminating zero missing from string}}
+ case 5:
+ return strlen(src2 + 3);
+ }
+ src1[5] = 0;
+ return strlen(src1);
+}
+
+size_t test_init_literal(int i) {
+ char src1[] = "abcdef";
+ int l = strlen(src1);
+ src1[6] = '.';
+ src1[3] = 0;
+ switch (i) {
+ case 1:
+ return strlen(src1);
+ case 2:
+ return strlen(src1 + 4); // expected-warning{{Terminating zero missing from string}}
+ }
+ return l;
+}
+
+size_t test_init_assign(int i, char a) {
+ char src[6];
+ src[1] = '1';
+ src[2] = '2';
+ src[4] = '4';
+ src[5] = '5';
+
+ switch (i) {
+ case 0:
+ return strlen(src);
+ case 1:
+ return strlen(src + 1);
+ case 2:
+ return strlen(src + 2);
+ case 3:
+ return strlen(src + 3);
+ case 4:
+ return strlen(src + 4); // expected-warning{{Terminating zero missing from string}}
+ }
+ src[5] = a;
+ size_t l = strlen(src + 4);
+ src[5] = 0;
+ l += strlen(src + 4);
+ src[5] = '5';
+ return l + strlen(src + 4); // expected-warning{{Terminating zero missing from string}}
+}
+
+size_t test_assign1() {
+ char str1[5] = {'0','1','2','3','4'};
+ char str2[5];
+ str2[0] = str1[0];
+ str2[1] = str1[1];
+ str2[4] = str1[4];
+ size_t l = strlen(str2);
+ return l + strlen(str2 + 4); // expected-warning{{Terminating zero missing from string}}
+}
+
+size_t test_assign2() {
+ char str1[5] = {1,2,3,4,5};
+ char str2[5];
+ str2[0] = str1[0];
+ str2[4] = str2[0];
+ return strlen(str2 + 4); // expected-warning{{Terminating zero missing from string}}
+}
+
+void test_ignore(char *dst) {
+ char str1[5] = {1,2,3,4,5};
+ strncpy(dst, str1, 5);
+ strcpy(dst, str1); // expected-warning{{Terminating zero missing from string}}
+}
+
+size_t test_malloc() {
+ char *buf = (char *)malloc(4);
+ if (!buf)
+ return 0;
+ buf[3] = 'a';
+ return strlen(buf);
+}
+
+extern void f_ext(char *);
+char *g_buf = 0;
+
+size_t test_escape1() {
+ char buf[4] = {1,2,3,4};
+ f_ext(buf);
+ return strlen(buf);
+}
+
+size_t test_escape2(char *x) {
+ char buf[4] = {1,2,3,4};
+ g_buf = buf;
+ f_ext(x);
+ return strlen(buf);
+}
+
+size_t test_escape3() {
+ char buf[4] = {1,2,3,4};
+ f_ext(buf + 3);
+ return strlen(buf);
+}
+
+void test_str_fn(int i, char *dst) {
+ char buf[] = {1, 2, 3};
+ switch (i) {
+ case 1:
+ strcpy(buf, "aa"); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 2:
+ strcpy(dst, buf); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 3:
+ strncpy(buf, "aa", 3);
+ break;
+ case 4:
+ strncpy(dst, buf, 3);
+ break;
+ case 5:
+ strcat(buf, "aa"); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 6:
+ strcat(dst, buf); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 7:
+ strncat(buf, "aa", 3); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 8:
+ strncat(dst, buf, 3);
+ break;
+ }
+}
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions c,cpp,h -- clang/test/Analysis/cstring-notnullterm.c clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp clang/test/Analysis/Inputs/system-header-simulator.hView the diff from clang-format here.diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 2761c68e0..345eb4a06 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -325,10 +325,8 @@ public:
ProgramStateRef checkNullTerminated(CheckerContext &C, ProgramStateRef State,
AnyArgExpr Arg, SVal ArgVal) const;
- void emitOverlapBug(CheckerContext &C,
- ProgramStateRef state,
- const Stmt *First,
- const Stmt *Second) const;
+ void emitOverlapBug(CheckerContext &C, ProgramStateRef state,
+ const Stmt *First, const Stmt *Second) const;
void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
StringRef WarningMsg) const;
void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
|
|
I frequently see you and @NagyDonat fighting clang-format. Let me share what I use in my workflow. {
"emeraldwalk.runonsave": {
"commands": [
{
"match": "\\.(c|cpp|h|hpp)$",
"isAsync": true,
"cmd": "bash -c \"export PATH=\\\"/myclanginstall/clang-18/bin:$PATH\\\"; git clang-format --force\""
}
]
}
}After every time I hit "save", it reshuffles the code that I touch, but nothing else. Its awesome. Let me know if you found this useful. |
| for (int I = Offset; I < RegionExtent; ++I) { | ||
| const ElementRegion *ElemR = RegionM.getElementRegion( | ||
| C.getASTContext().CharTy, SVB.makeArrayIndex(I), StrReg, | ||
| C.getASTContext()); | ||
| SVal ElemVal = State->getSValAsScalarOrLoc(ElemR); | ||
| if (!State->isNonNull(ElemVal).isConstrainedTrue()) | ||
| // We have here a lower bound for the string length. | ||
| // Try to update the CStringLength value? | ||
| return State; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can see the code now for the approach discussed at #146664 (comment). Let's continue the discussion there.
Check for non-presence of terminating zero in simple cases.