Skip to content

Commit e9e9ba4

Browse files
authored
[clang][analyzer] Support strlen with offset to string literal in CStringChecker (llvm#159795)
Handle cases like `strlen(string_constant + 3)` in `CStringChecker` by returning the original string length minus offset.
1 parent 22bf331 commit e9e9ba4

File tree

2 files changed

+86
-24
lines changed

2 files changed

+86
-24
lines changed

clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,8 @@ class CStringChecker
251251
const Expr *Ex,
252252
const MemRegion *MR,
253253
bool hypothetical);
254+
static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
255+
254256
SVal getCStringLength(CheckerContext &C,
255257
ProgramStateRef &state,
256258
const Expr *Ex,
@@ -983,6 +985,21 @@ SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
983985
return strLength;
984986
}
985987

988+
const StringLiteral *
989+
CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
990+
switch (MR->getKind()) {
991+
case MemRegion::StringRegionKind:
992+
return cast<StringRegion>(MR)->getStringLiteral();
993+
case MemRegion::NonParamVarRegionKind:
994+
if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
995+
Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
996+
return dyn_cast_or_null<StringLiteral>(Decl->getInit());
997+
return nullptr;
998+
default:
999+
return nullptr;
1000+
}
1001+
}
1002+
9861003
SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
9871004
const Expr *Ex, SVal Buf,
9881005
bool hypothetical) const {
@@ -1013,30 +1030,19 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
10131030
// its length. For anything we can't figure out, just return UnknownVal.
10141031
MR = MR->StripCasts();
10151032

1016-
switch (MR->getKind()) {
1017-
case MemRegion::StringRegionKind: {
1018-
// Modifying the contents of string regions is undefined [C99 6.4.5p6],
1019-
// so we can assume that the byte length is the correct C string length.
1020-
SValBuilder &svalBuilder = C.getSValBuilder();
1021-
QualType sizeTy = svalBuilder.getContext().getSizeType();
1022-
const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
1023-
return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
1024-
}
1025-
case MemRegion::NonParamVarRegionKind: {
1033+
if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
10261034
// If we have a global constant with a string literal initializer,
10271035
// compute the initializer's length.
1028-
const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1029-
if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
1030-
if (const Expr *Init = Decl->getInit()) {
1031-
if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
1032-
SValBuilder &SvalBuilder = C.getSValBuilder();
1033-
QualType SizeTy = SvalBuilder.getContext().getSizeType();
1034-
return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
1035-
}
1036-
}
1037-
}
1038-
[[fallthrough]];
1036+
// Modifying the contents of string regions is undefined [C99 6.4.5p6],
1037+
// so we can assume that the byte length is the correct C string length.
1038+
// FIXME: Embedded null characters are not handled.
1039+
SValBuilder &SVB = C.getSValBuilder();
1040+
return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
10391041
}
1042+
1043+
switch (MR->getKind()) {
1044+
case MemRegion::StringRegionKind:
1045+
case MemRegion::NonParamVarRegionKind:
10401046
case MemRegion::SymbolicRegionKind:
10411047
case MemRegion::AllocaRegionKind:
10421048
case MemRegion::ParamVarRegionKind:
@@ -1046,10 +1052,28 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
10461052
case MemRegion::CompoundLiteralRegionKind:
10471053
// FIXME: Can we track this? Is it necessary?
10481054
return UnknownVal();
1049-
case MemRegion::ElementRegionKind:
1050-
// FIXME: How can we handle this? It's not good enough to subtract the
1051-
// offset from the base string length; consider "123\x00567" and &a[5].
1055+
case MemRegion::ElementRegionKind: {
1056+
// If an offset into the string literal is used, use the original length
1057+
// minus the offset.
1058+
// FIXME: Embedded null characters are not handled.
1059+
const ElementRegion *ER = cast<ElementRegion>(MR);
1060+
const SubRegion *SuperReg =
1061+
cast<SubRegion>(ER->getSuperRegion()->StripCasts());
1062+
const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
1063+
if (!StrLit)
1064+
return UnknownVal();
1065+
SValBuilder &SVB = C.getSValBuilder();
1066+
NonLoc Idx = ER->getIndex();
1067+
QualType SizeTy = SVB.getContext().getSizeType();
1068+
NonLoc LengthVal =
1069+
SVB.makeIntVal(StrLit->getLength(), SizeTy).castAs<NonLoc>();
1070+
if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
1071+
SVB.getConditionType())
1072+
.castAs<DefinedOrUnknownSVal>(),
1073+
true))
1074+
return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx, SizeTy);
10521075
return UnknownVal();
1076+
}
10531077
default:
10541078
// Other regions (mostly non-data) can't have a reliable C string length.
10551079
// In this case, an error is emitted and UndefinedVal is returned.
@@ -1074,6 +1098,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
10741098

10751099
const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
10761100
ProgramStateRef &state, const Expr *expr, SVal val) const {
1101+
// FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
10771102

10781103
// Get the memory region pointed to by the val.
10791104
const MemRegion *bufRegion = val.getAsRegion();

clang/test/Analysis/string.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,21 @@ size_t strlen(const char *s);
8282

8383
void strlen_constant0(void) {
8484
clang_analyzer_eval(strlen("123") == 3); // expected-warning{{TRUE}}
85+
clang_analyzer_eval(strlen(&("123"[1])) == 2); // expected-warning{{TRUE}}
8586
}
8687

8788
void strlen_constant1(void) {
8889
const char *a = "123";
8990
clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
91+
clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{TRUE}}
92+
clang_analyzer_eval(strlen(a + 3) == 0); // expected-warning{{TRUE}}
93+
clang_analyzer_eval(strlen(a + 4)); // expected-warning{{UNKNOWN}}
9094
}
9195

9296
void strlen_constant2(char x) {
9397
char a[] = "123";
9498
clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
99+
clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{UNKNOWN}}
95100

96101
a[0] = x;
97102
clang_analyzer_eval(strlen(a) == 3); // expected-warning{{UNKNOWN}}
@@ -105,10 +110,12 @@ char global_non_const_arr[] = "op";
105110

106111
void strlen_global_constant_ptr(void) {
107112
clang_analyzer_eval(strlen(global_str_ptr) == 4); // expected-warning{{TRUE}}
113+
clang_analyzer_eval(strlen(global_str_ptr + 1) == 3); // expected-warning{{TRUE}}
108114
}
109115

110116
void strlen_global_constant_arr(void) {
111117
clang_analyzer_eval(strlen(global_str_arr) == 4); // expected-warning{{TRUE}}
118+
clang_analyzer_eval(strlen(global_str_arr + 1) == 3); // expected-warning{{TRUE}}
112119
}
113120

114121
void strlen_global_non_const_ptr(void) {
@@ -235,6 +242,17 @@ void testStrlenCallee(void) {
235242
clang_analyzer_eval(lenBefore == lenAfter); // expected-warning{{UNKNOWN}}
236243
}
237244

245+
void strlen_symbolic_offset(unsigned x) {
246+
const char *str = "abcd";
247+
if (x < 1 || x > 3)
248+
return;
249+
// FIXME: these should be TRUE
250+
clang_analyzer_eval(strlen(str + x) >= 1); // expected-warning{{UNKNOWN}}
251+
clang_analyzer_eval(strlen(str + x) <= 3); // expected-warning{{UNKNOWN}}
252+
if (x != 1)
253+
return;
254+
clang_analyzer_eval(strlen(str + x) == 3); // expected-warning{{TRUE}}
255+
}
238256

239257
//===----------------------------------------------------------------------===
240258
// strnlen()
@@ -244,32 +262,38 @@ size_t strnlen(const char *s, size_t maxlen);
244262

245263
void strnlen_constant0(void) {
246264
clang_analyzer_eval(strnlen("123", 10) == 3); // expected-warning{{TRUE}}
265+
clang_analyzer_eval(strnlen(&("123"[1]), 10) == 2); // expected-warning{{TRUE}}
247266
}
248267

249268
void strnlen_constant1(void) {
250269
const char *a = "123";
251270
clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
271+
clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{TRUE}}
252272
}
253273

254274
void strnlen_constant2(char x) {
255275
char a[] = "123";
256276
clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
277+
clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{UNKNOWN}}
257278
a[0] = x;
258279
clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{UNKNOWN}}
259280
}
260281

261282
void strnlen_constant4(void) {
262283
clang_analyzer_eval(strnlen("123456", 3) == 3); // expected-warning{{TRUE}}
284+
clang_analyzer_eval(strnlen(&("123456"[1]), 3) == 3); // expected-warning{{TRUE}}
263285
}
264286

265287
void strnlen_constant5(void) {
266288
const char *a = "123456";
267289
clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
290+
clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{TRUE}}
268291
}
269292

270293
void strnlen_constant6(char x) {
271294
char a[] = "123456";
272295
clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
296+
clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{UNKNOWN}}
273297
a[0] = x;
274298
clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{UNKNOWN}}
275299
}
@@ -326,6 +350,19 @@ void strnlen_at_actual(size_t limit) {
326350
}
327351
}
328352

353+
void strnlen_at_actual_1(size_t limit) {
354+
const char *str = "abc";
355+
size_t len = strnlen(str + 1, limit);
356+
clang_analyzer_eval(len <= 2); // expected-warning{{TRUE}}
357+
// This is due to eager assertion in strnlen.
358+
if (limit == 0) {
359+
clang_analyzer_eval(len == 0); // expected-warning{{TRUE}}
360+
} else {
361+
clang_analyzer_eval(len == 2); // expected-warning{{UNKNOWN}}
362+
clang_analyzer_eval(len < 2); // expected-warning{{UNKNOWN}}
363+
}
364+
}
365+
329366
//===----------------------------------------------------------------------===
330367
// strcpy()
331368
//===----------------------------------------------------------------------===

0 commit comments

Comments
 (0)