Skip to content

Commit b642e8b

Browse files
authored
[analyzer] Improve messaging in security.VAList (#157846)
Previously the checker `security.VAList` only tracked the set of the inintialized `va_list` objects; this commit replaces this with a mapping that can distinguish the "uninitialized" `va_list` objects from the "already released" ones. Moreover, a new "unknown" state is introduced to replace the slightly hacky solutions that checked the `Symbolic` nature of the region. In addition to sligthly improving the messages, this commit also prepares the ground for a follow-up change that would introduce an "indeterminate" state (which needs `va_end` but cannot be otherwise used) to model the requirements of SEI CERT rule MSC39-C, which states: > The va_list may be passed as an argument to another function, but > calling va_arg() within that function causes the va_list to have an > indeterminate value in the calling function. As a result, attempting > to read variable arguments without reinitializing the va_list can have > unexpected behavior.
1 parent 3270d98 commit b642e8b

File tree

4 files changed

+177
-103
lines changed

4 files changed

+177
-103
lines changed

clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp

Lines changed: 136 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,36 @@
1818
#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
1919
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
2020
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21+
#include "llvm/Support/FormatVariadic.h"
2122

2223
using namespace clang;
2324
using namespace ento;
25+
using llvm::formatv;
2426

25-
REGISTER_SET_WITH_PROGRAMSTATE(InitializedVALists, const MemRegion *)
27+
namespace {
28+
enum class VAListState {
29+
Uninitialized,
30+
Unknown,
31+
Initialized,
32+
Released,
33+
};
34+
35+
constexpr llvm::StringLiteral StateNames[] = {
36+
"uninitialized", "unknown", "initialized", "already released"};
37+
} // end anonymous namespace
38+
39+
static StringRef describeState(const VAListState S) {
40+
return StateNames[static_cast<int>(S)];
41+
}
42+
43+
REGISTER_MAP_WITH_PROGRAMSTATE(VAListStateMap, const MemRegion *, VAListState)
44+
45+
static VAListState getVAListState(ProgramStateRef State, const MemRegion *Reg) {
46+
if (const VAListState *Res = State->get<VAListStateMap>(Reg))
47+
return *Res;
48+
return Reg->getSymbolicBase() ? VAListState::Unknown
49+
: VAListState::Uninitialized;
50+
}
2651

2752
namespace {
2853
typedef SmallVector<const MemRegion *, 2> RegionVector;
@@ -48,7 +73,7 @@ class VAListChecker : public Checker<check::PreCall, check::PreStmt<VAArgExpr>,
4873

4974
private:
5075
const MemRegion *getVAListAsRegion(SVal SV, const Expr *VAExpr,
51-
bool &IsSymbolic, CheckerContext &C) const;
76+
CheckerContext &C) const;
5277
const ExplodedNode *getStartCallSite(const ExplodedNode *N,
5378
const MemRegion *Reg) const;
5479

@@ -57,8 +82,8 @@ class VAListChecker : public Checker<check::PreCall, check::PreStmt<VAArgExpr>,
5782
void reportLeaked(const RegionVector &Leaked, StringRef Msg1, StringRef Msg2,
5883
CheckerContext &C, ExplodedNode *N) const;
5984

60-
void checkVAListStartCall(const CallEvent &Call, CheckerContext &C,
61-
bool IsCopy) const;
85+
void checkVAListStartCall(const CallEvent &Call, CheckerContext &C) const;
86+
void checkVAListCopyCall(const CallEvent &Call, CheckerContext &C) const;
6287
void checkVAListEndCall(const CallEvent &Call, CheckerContext &C) const;
6388

6489
class VAListBugVisitor : public BugReporterVisitor {
@@ -118,41 +143,35 @@ const CallDescription VAListChecker::VaStart(CDM::CLibrary,
118143
void VAListChecker::checkPreCall(const CallEvent &Call,
119144
CheckerContext &C) const {
120145
if (VaStart.matches(Call))
121-
checkVAListStartCall(Call, C, false);
146+
checkVAListStartCall(Call, C);
122147
else if (VaCopy.matches(Call))
123-
checkVAListStartCall(Call, C, true);
148+
checkVAListCopyCall(Call, C);
124149
else if (VaEnd.matches(Call))
125150
checkVAListEndCall(Call, C);
126151
else {
127152
for (auto FuncInfo : VAListAccepters) {
128153
if (!FuncInfo.Func.matches(Call))
129154
continue;
130-
bool Symbolic;
131155
const MemRegion *VAList =
132156
getVAListAsRegion(Call.getArgSVal(FuncInfo.ParamIndex),
133-
Call.getArgExpr(FuncInfo.ParamIndex), Symbolic, C);
157+
Call.getArgExpr(FuncInfo.ParamIndex), C);
134158
if (!VAList)
135159
return;
160+
VAListState S = getVAListState(C.getState(), VAList);
136161

137-
if (C.getState()->contains<InitializedVALists>(VAList))
138-
return;
139-
140-
// We did not see va_start call, but the source of the region is unknown.
141-
// Be conservative and assume the best.
142-
if (Symbolic)
162+
if (S == VAListState::Initialized || S == VAListState::Unknown)
143163
return;
144164

145-
SmallString<80> Errmsg("Function '");
146-
Errmsg += FuncInfo.Func.getFunctionName();
147-
Errmsg += "' is called with an uninitialized va_list argument";
148-
reportUninitializedAccess(VAList, Errmsg.c_str(), C);
165+
std::string ErrMsg =
166+
formatv("Function '{0}' is called with an {1} va_list argument",
167+
FuncInfo.Func.getFunctionName(), describeState(S));
168+
reportUninitializedAccess(VAList, ErrMsg, C);
149169
break;
150170
}
151171
}
152172
}
153173

154174
const MemRegion *VAListChecker::getVAListAsRegion(SVal SV, const Expr *E,
155-
bool &IsSymbolic,
156175
CheckerContext &C) const {
157176
const MemRegion *Reg = SV.getAsRegion();
158177
if (!Reg)
@@ -168,7 +187,6 @@ const MemRegion *VAListChecker::getVAListAsRegion(SVal SV, const Expr *E,
168187
if (isa<ParmVarDecl>(DeclReg->getDecl()))
169188
Reg = C.getState()->getSVal(SV.castAs<Loc>()).getAsRegion();
170189
}
171-
IsSymbolic = Reg && Reg->getBaseRegion()->getAs<SymbolicRegion>();
172190
// Some VarRegion based VA lists reach here as ElementRegions.
173191
const auto *EReg = dyn_cast_or_null<ElementRegion>(Reg);
174192
return (EReg && VAListModelledAsArray) ? EReg->getSuperRegion() : Reg;
@@ -178,52 +196,53 @@ void VAListChecker::checkPreStmt(const VAArgExpr *VAA,
178196
CheckerContext &C) const {
179197
ProgramStateRef State = C.getState();
180198
const Expr *ArgExpr = VAA->getSubExpr();
181-
SVal VAListSVal = C.getSVal(ArgExpr);
182-
bool Symbolic;
183-
const MemRegion *VAList = getVAListAsRegion(VAListSVal, ArgExpr, Symbolic, C);
199+
const MemRegion *VAList = getVAListAsRegion(C.getSVal(ArgExpr), ArgExpr, C);
184200
if (!VAList)
185201
return;
186-
if (Symbolic)
202+
VAListState S = getVAListState(C.getState(), VAList);
203+
if (S == VAListState::Initialized || S == VAListState::Unknown)
187204
return;
188-
if (!State->contains<InitializedVALists>(VAList))
189-
reportUninitializedAccess(
190-
VAList, "va_arg() is called on an uninitialized va_list", C);
205+
206+
std::string ErrMsg =
207+
formatv("va_arg() is called on an {0} va_list", describeState(S));
208+
reportUninitializedAccess(VAList, ErrMsg, C);
191209
}
192210

193211
void VAListChecker::checkDeadSymbols(SymbolReaper &SR,
194212
CheckerContext &C) const {
195213
ProgramStateRef State = C.getState();
196-
InitializedVAListsTy Tracked = State->get<InitializedVALists>();
214+
VAListStateMapTy Tracked = State->get<VAListStateMap>();
197215
RegionVector Leaked;
198-
for (const MemRegion *Reg : Tracked) {
216+
for (const auto &[Reg, S] : Tracked) {
199217
if (SR.isLiveRegion(Reg))
200218
continue;
201-
Leaked.push_back(Reg);
202-
State = State->remove<InitializedVALists>(Reg);
219+
if (S == VAListState::Initialized)
220+
Leaked.push_back(Reg);
221+
State = State->remove<VAListStateMap>(Reg);
203222
}
204-
if (ExplodedNode *N = C.addTransition(State))
223+
if (ExplodedNode *N = C.addTransition(State)) {
205224
reportLeaked(Leaked, "Initialized va_list", " is leaked", C, N);
225+
}
206226
}
207227

208228
// This function traverses the exploded graph backwards and finds the node where
209-
// the va_list is initialized. That node is used for uniquing the bug paths.
210-
// It is not likely that there are several different va_lists that belongs to
211-
// different stack frames, so that case is not yet handled.
229+
// the va_list becomes initialized. That node is used for uniquing the bug
230+
// paths. It is not likely that there are several different va_lists that
231+
// belongs to different stack frames, so that case is not yet handled.
212232
const ExplodedNode *
213233
VAListChecker::getStartCallSite(const ExplodedNode *N,
214234
const MemRegion *Reg) const {
215235
const LocationContext *LeakContext = N->getLocationContext();
216236
const ExplodedNode *StartCallNode = N;
217237

218-
bool FoundInitializedState = false;
238+
bool SeenInitializedState = false;
219239

220240
while (N) {
221-
ProgramStateRef State = N->getState();
222-
if (!State->contains<InitializedVALists>(Reg)) {
223-
if (FoundInitializedState)
224-
break;
225-
} else {
226-
FoundInitializedState = true;
241+
VAListState S = getVAListState(N->getState(), Reg);
242+
if (S == VAListState::Initialized) {
243+
SeenInitializedState = true;
244+
} else if (SeenInitializedState) {
245+
break;
227246
}
228247
const LocationContext *NContext = N->getLocationContext();
229248
if (NContext == LeakContext || NContext->isParentOf(LeakContext))
@@ -274,71 +293,84 @@ void VAListChecker::reportLeaked(const RegionVector &Leaked, StringRef Msg1,
274293
}
275294

276295
void VAListChecker::checkVAListStartCall(const CallEvent &Call,
277-
CheckerContext &C, bool IsCopy) const {
278-
bool Symbolic;
279-
const MemRegion *VAList =
280-
getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
281-
if (!VAList)
296+
CheckerContext &C) const {
297+
const MemRegion *Arg =
298+
getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
299+
if (!Arg)
282300
return;
283301

284302
ProgramStateRef State = C.getState();
303+
VAListState ArgState = getVAListState(State, Arg);
285304

286-
if (IsCopy) {
287-
const MemRegion *Arg2 =
288-
getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), Symbolic, C);
289-
if (Arg2) {
290-
if (VAList == Arg2) {
291-
RegionVector Leaked{VAList};
292-
if (ExplodedNode *N = C.addTransition(State))
293-
reportLeaked(Leaked, "va_list", " is copied onto itself", C, N);
294-
return;
295-
}
296-
if (!State->contains<InitializedVALists>(Arg2) && !Symbolic) {
297-
if (State->contains<InitializedVALists>(VAList)) {
298-
State = State->remove<InitializedVALists>(VAList);
299-
RegionVector Leaked{VAList};
300-
if (ExplodedNode *N = C.addTransition(State))
301-
reportLeaked(Leaked, "Initialized va_list",
302-
" is overwritten by an uninitialized one", C, N);
303-
} else {
304-
reportUninitializedAccess(Arg2, "Uninitialized va_list is copied", C);
305-
}
306-
return;
307-
}
308-
}
309-
}
310-
if (State->contains<InitializedVALists>(VAList)) {
311-
RegionVector Leaked{VAList};
305+
if (ArgState == VAListState::Initialized) {
306+
RegionVector Leaked{Arg};
312307
if (ExplodedNode *N = C.addTransition(State))
313308
reportLeaked(Leaked, "Initialized va_list", " is initialized again", C,
314309
N);
315310
return;
316311
}
317312

318-
State = State->add<InitializedVALists>(VAList);
313+
State = State->set<VAListStateMap>(Arg, VAListState::Initialized);
314+
C.addTransition(State);
315+
}
316+
317+
void VAListChecker::checkVAListCopyCall(const CallEvent &Call,
318+
CheckerContext &C) const {
319+
const MemRegion *Arg1 =
320+
getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
321+
const MemRegion *Arg2 =
322+
getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), C);
323+
if (!Arg1 || !Arg2)
324+
return;
325+
326+
ProgramStateRef State = C.getState();
327+
if (Arg1 == Arg2) {
328+
RegionVector Leaked{Arg1};
329+
if (ExplodedNode *N = C.addTransition(State))
330+
reportLeaked(Leaked, "va_list", " is copied onto itself", C, N);
331+
return;
332+
}
333+
VAListState State1 = getVAListState(State, Arg1);
334+
VAListState State2 = getVAListState(State, Arg2);
335+
// Update the ProgramState by copying the state of Arg2 to Arg1.
336+
State = State->set<VAListStateMap>(Arg1, State2);
337+
if (State1 == VAListState::Initialized) {
338+
RegionVector Leaked{Arg1};
339+
std::string Msg2 =
340+
formatv(" is overwritten by {0} {1} one",
341+
(State2 == VAListState::Initialized) ? "another" : "an",
342+
describeState(State2));
343+
if (ExplodedNode *N = C.addTransition(State))
344+
reportLeaked(Leaked, "Initialized va_list", Msg2, C, N);
345+
return;
346+
}
347+
if (State2 != VAListState::Initialized && State2 != VAListState::Unknown) {
348+
std::string Msg = formatv("{0} va_list is copied", describeState(State2));
349+
Msg[0] = toupper(Msg[0]);
350+
reportUninitializedAccess(Arg2, Msg, C);
351+
return;
352+
}
319353
C.addTransition(State);
320354
}
321355

322356
void VAListChecker::checkVAListEndCall(const CallEvent &Call,
323357
CheckerContext &C) const {
324-
bool Symbolic;
325-
const MemRegion *VAList =
326-
getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
327-
if (!VAList)
358+
const MemRegion *Arg =
359+
getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
360+
if (!Arg)
328361
return;
329362

330-
// We did not see va_start call, but the source of the region is unknown.
331-
// Be conservative and assume the best.
332-
if (Symbolic)
333-
return;
363+
ProgramStateRef State = C.getState();
364+
VAListState ArgState = getVAListState(State, Arg);
334365

335-
if (!C.getState()->contains<InitializedVALists>(VAList)) {
336-
reportUninitializedAccess(
337-
VAList, "va_end() is called on an uninitialized va_list", C);
366+
if (ArgState != VAListState::Unknown &&
367+
ArgState != VAListState::Initialized) {
368+
std::string Msg = formatv("va_end() is called on an {0} va_list",
369+
describeState(ArgState));
370+
reportUninitializedAccess(Arg, Msg, C);
338371
return;
339372
}
340-
ProgramStateRef State = C.getState();
341-
State = State->remove<InitializedVALists>(VAList);
373+
State = State->set<VAListStateMap>(Arg, VAListState::Released);
342374
C.addTransition(State);
343375
}
344376

@@ -351,13 +383,26 @@ PathDiagnosticPieceRef VAListChecker::VAListBugVisitor::VisitNode(
351383
if (!S)
352384
return nullptr;
353385

386+
VAListState After = getVAListState(State, Reg);
387+
VAListState Before = getVAListState(StatePrev, Reg);
388+
if (Before == After)
389+
return nullptr;
390+
354391
StringRef Msg;
355-
if (State->contains<InitializedVALists>(Reg) &&
356-
!StatePrev->contains<InitializedVALists>(Reg))
392+
switch (After) {
393+
case VAListState::Uninitialized:
394+
Msg = "Copied uninitialized contents into the va_list";
395+
break;
396+
case VAListState::Unknown:
397+
Msg = "Copied unknown contents into the va_list";
398+
break;
399+
case VAListState::Initialized:
357400
Msg = "Initialized va_list";
358-
else if (!State->contains<InitializedVALists>(Reg) &&
359-
StatePrev->contains<InitializedVALists>(Reg))
401+
break;
402+
case VAListState::Released:
360403
Msg = "Ended va_list";
404+
break;
405+
}
361406

362407
if (Msg.empty())
363408
return nullptr;

clang/test/Analysis/valist-uninitialized-no-undef.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ void call_vsprintf_bad(char *buffer, ...) {
3737
va_list va;
3838
va_start(va, buffer); // expected-note{{Initialized va_list}}
3939
va_end(va); // expected-note{{Ended va_list}}
40-
vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 'vsprintf' is called with an uninitialized va_list argument}}
41-
// expected-note@-1{{Function 'vsprintf' is called with an uninitialized va_list argument}}
40+
vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 'vsprintf' is called with an already released va_list argument}}
41+
// expected-note@-1{{Function 'vsprintf' is called with an already released va_list argument}}
4242
}
4343

0 commit comments

Comments
 (0)