Skip to content

Commit 22357fe

Browse files
authored
[analyzer] Avoid unnecessary super region invalidation in CStringChecker (#146212)
Bounded string functions takes smallest of two values as it's copy size (`amountCopied` variable in `evalStrcpyCommon`), and it's used to decided whether this operation will cause out-of-bound access and invalidate it's super region if it does. for `strlcat`: `amountCopied = min (size - dstLen - 1 , srcLen)` for others: `amountCopied = min (srcLen, size)` Currently when one of two values is unknown or `SValBuilder` can't decide which one is smaller, `amountCopied` will remain `UnknownVal`, which will invalidate copy destination's super region unconditionally. This patch add check to see if one of these two values is definitely in-bound, if so `amountCopied` has to be in-bound too, because it‘s less than or equal to them, we can avoid the invalidation of super region and some related false positives in this situation. Note: This patch uses `size` as an approximation of `size - dstLen - 1` in `strlcat` case because currently analyzer doesn't handle complex expressions like this very well. Closes #143807.
1 parent 1113224 commit 22357fe

File tree

2 files changed

+177
-3
lines changed

2 files changed

+177
-3
lines changed

clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2223,16 +2223,58 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
22232223
Result = lastElement;
22242224
}
22252225

2226+
// For bounded method, amountCopied take the minimum of two values,
2227+
// for ConcatFnKind::strlcat:
2228+
// amountCopied = min (size - dstLen - 1 , srcLen)
2229+
// for others:
2230+
// amountCopied = min (srcLen, size)
2231+
// So even if we don't know about amountCopied, as long as one of them will
2232+
// not cause an out-of-bound access, the whole function's operation will not
2233+
// too, that will avoid invalidating the superRegion of data member in that
2234+
// situation.
2235+
bool CouldAccessOutOfBound = true;
2236+
if (IsBounded && amountCopied.isUnknown()) {
2237+
auto CouldAccessOutOfBoundForSVal =
2238+
[&](std::optional<NonLoc> Val) -> bool {
2239+
if (!Val)
2240+
return true;
2241+
return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
2242+
Dst.Expression->getType(), *Val,
2243+
C.getASTContext().getSizeType());
2244+
};
2245+
2246+
CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
2247+
2248+
if (CouldAccessOutOfBound) {
2249+
// Get the max number of characters to copy.
2250+
const Expr *LenExpr = Call.getArgExpr(2);
2251+
SVal LenVal = state->getSVal(LenExpr, LCtx);
2252+
2253+
// Protect against misdeclared strncpy().
2254+
LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
2255+
2256+
// Because analyzer doesn't handle expressions like `size -
2257+
// dstLen - 1` very well, we roughly use `size` for
2258+
// ConcatFnKind::strlcat here, same with other concat kinds.
2259+
CouldAccessOutOfBound =
2260+
CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
2261+
}
2262+
}
2263+
22262264
// Invalidate the destination (regular invalidation without pointer-escaping
22272265
// the address of the top-level region). This must happen before we set the
22282266
// C string length because invalidation will clear the length.
22292267
// FIXME: Even if we can't perfectly model the copy, we should see if we
22302268
// can use LazyCompoundVals to copy the source values into the destination.
22312269
// This would probably remove any existing bindings past the end of the
22322270
// string, but that's still an improvement over blank invalidation.
2233-
state = invalidateDestinationBufferBySize(
2234-
C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2235-
amountCopied, C.getASTContext().getSizeType());
2271+
if (CouldAccessOutOfBound)
2272+
state = invalidateDestinationBufferBySize(
2273+
C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2274+
amountCopied, C.getASTContext().getSizeType());
2275+
else
2276+
state = invalidateDestinationBufferNeverOverflows(
2277+
C, state, Call.getCFGElementRef(), *dstRegVal);
22362278

22372279
// Invalidate the source (const-invalidation without const-pointer-escaping
22382280
// the address of the top-level region).
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -verify %s
2+
3+
typedef decltype(sizeof(int)) size_t;
4+
void clang_analyzer_eval(bool);
5+
6+
char *strncpy(char *dest, const char *src, size_t x);
7+
8+
constexpr int initB = 100;
9+
struct Base {
10+
int b;
11+
Base(): b(initB) {}
12+
};
13+
14+
// issue 143807
15+
struct strncpyTestClass: public Base {
16+
int *m_ptr;
17+
char m_buff[1000];
18+
19+
void KnownLen(char *src) {
20+
m_ptr = new int;
21+
strncpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size
22+
delete m_ptr; // no warning
23+
}
24+
25+
void KnownSrcLen(size_t n) {
26+
m_ptr = new int;
27+
strncpy(m_buff, "xyz", n); // known src size but unknown len
28+
delete m_ptr; // no warning
29+
}
30+
};
31+
32+
void strncpyTest(char *src, size_t n) {
33+
strncpyTestClass rep;
34+
rep.KnownLen(src);
35+
rep.KnownSrcLen(n);
36+
clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
37+
}
38+
39+
size_t strlcpy(char *dest, const char *src, size_t size);
40+
41+
struct strlcpyTestClass: public Base {
42+
int *m_ptr;
43+
char m_buff[1000];
44+
45+
void KnownLen(char *src) {
46+
m_ptr = new int;
47+
strlcpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size
48+
delete m_ptr; // no warning
49+
}
50+
51+
void KnownSrcLen(size_t n) {
52+
m_ptr = new int;
53+
strlcpy(m_buff, "xyz", n); // known src size but unknown len
54+
delete m_ptr; // no warning
55+
}
56+
};
57+
58+
void strlcpyTest(char *src, size_t n) {
59+
strlcpyTestClass rep;
60+
rep.KnownLen(src);
61+
rep.KnownSrcLen(n);
62+
clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
63+
}
64+
65+
char *strncat(char *s1, const char *s2, size_t n);
66+
67+
struct strncatTestClass: public Base {
68+
int *m_ptr;
69+
char m_buff[1000];
70+
71+
void KnownLen(char *src) {
72+
m_ptr = new int;
73+
strncat(m_buff, src, sizeof(m_buff) - 1); // known len but unknown src size
74+
delete m_ptr; // no warning
75+
}
76+
77+
void KnownSrcLen(size_t n) {
78+
m_ptr = new int;
79+
strncat(m_buff, "xyz", n); // known src size but unknown len
80+
delete m_ptr; // no warning
81+
}
82+
};
83+
84+
void strncatTest(char *src, size_t n) {
85+
strncatTestClass rep;
86+
rep.KnownLen(src);
87+
rep.KnownSrcLen(n);
88+
clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
89+
}
90+
91+
struct strncatReportOutOfBoundTestClass {
92+
int *m_ptr;
93+
char m_buff[1000];
94+
95+
void KnownLen(char *src) {
96+
m_ptr = new int;
97+
// expected-warning@+1{{String concatenation function overflows the destination buffer}}
98+
strncat(m_buff, src, sizeof(m_buff)); // known len but unknown src size
99+
delete m_ptr; // no warning
100+
}
101+
};
102+
103+
void strncatReportOutOfBoundTest(char *src, size_t n) {
104+
strncatReportOutOfBoundTestClass rep;
105+
rep.KnownLen(src);
106+
}
107+
108+
size_t strlcat(char *dst, const char *src, size_t size);
109+
110+
struct strlcatTestClass: public Base {
111+
int *m_ptr;
112+
char m_buff[1000];
113+
114+
void KnownLen(char *src) {
115+
m_ptr = new int;
116+
strlcat(m_buff, src, sizeof(m_buff)); // known len but unknown src size
117+
delete m_ptr; // no warning
118+
}
119+
120+
void KnownSrcLen(size_t n) {
121+
m_ptr = new int;
122+
strlcat(m_buff, "xyz", n); // known src size but unknown len
123+
delete m_ptr; // no warning
124+
}
125+
};
126+
127+
void strlcatTest(char *src, size_t n) {
128+
strlcatTestClass rep;
129+
rep.KnownLen(src);
130+
rep.KnownSrcLen(n);
131+
clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
132+
}

0 commit comments

Comments
 (0)