Skip to content

Commit cb9b5ef

Browse files
committed
[analyzer] Don't assume third iteration in loops
This commit ensures that if the loop condition is opaque (the analyzer cannot determine whether it's true or false) and there were at least two iterations, then the analyzer doesn't make the unjustified assumption that it can enter yet another iteration. Note that the presence of a loop suggests that the developer thought that two iterations can happen (otherwise an `if` would've been sufficient), but it does not imply that the developer expected three or four iterations -- and in fact there are many false positives where a loop iterates over a two-element (or three-element) data structure, but the analyzer cannot understand the loop condition and blindly assumes that there may be three or more iterations. (In particular, analyzing the FFMPEG project produces 100+ such false positives.) Moreover, this provides some performance improvements in the sense that the analyzer won't waste time on traversing the execution paths with 3 or 4 iterations in a loop (which are very similar to the paths with 2 iterations) and therefore will be able to traverse more branches elsewhere on the `ExplodedGraph`. This logic is disabled if the user enables the widen-loops analyzer option (which is disabled by default), because the "simulate one final iteration after the invalidation" execution path would be suppressed by the "exit the loop if the loop condition is opaque and there were at least two iterations" logic. If we want to support loop widening, we would need to create a follow-up commit which ensures that it "plays nicely" with this logic.
1 parent b343f3f commit cb9b5ef

File tree

6 files changed

+133
-41
lines changed

6 files changed

+133
-41
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ class CoreEngine {
126126
ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
127127
const ReturnStmt *RS);
128128

129+
/// Helper function called by `HandleBranch()`. If the currently handled
130+
/// branch corresponds to a loop, this returns the number of already
131+
/// completed iterations in that loop, otherwise the return value is
132+
/// `std::nullopt`. Note that this counts _all_ earlier iterations, including
133+
/// ones that were performed within an earlier iteration of an outer loop.
134+
std::optional<unsigned> getCompletedIterationCount(const CFGBlock *B,
135+
ExplodedNode *Pred) const;
136+
129137
public:
130138
/// Construct a CoreEngine object to analyze the provided CFG.
131139
CoreEngine(ExprEngine &exprengine,

clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -321,14 +321,14 @@ class ExprEngine {
321321
NodeBuilderWithSinks &nodeBuilder,
322322
ExplodedNode *Pred);
323323

324-
/// ProcessBranch - Called by CoreEngine. Used to generate successor
325-
/// nodes by processing the 'effects' of a branch condition.
326-
void processBranch(const Stmt *Condition,
327-
NodeBuilderContext& BuilderCtx,
328-
ExplodedNode *Pred,
329-
ExplodedNodeSet &Dst,
330-
const CFGBlock *DstT,
331-
const CFGBlock *DstF);
324+
/// ProcessBranch - Called by CoreEngine. Used to generate successor nodes by
325+
/// processing the 'effects' of a branch condition. If the branch condition
326+
/// is a loop condition, IterationsCompletedInLoop is the number of completed
327+
/// iterations (otherwise it's std::nullopt).
328+
void processBranch(const Stmt *Condition, NodeBuilderContext &BuilderCtx,
329+
ExplodedNode *Pred, ExplodedNodeSet &Dst,
330+
const CFGBlock *DstT, const CFGBlock *DstF,
331+
std::optional<unsigned> IterationsCompletedInLoop);
332332

333333
/// Called by CoreEngine.
334334
/// Used to generate successor nodes for temporary destructors depending
@@ -588,6 +588,8 @@ class ExprEngine {
588588
void evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src,
589589
const Expr *Ex);
590590

591+
bool didEagerlyAssumeBifurcateAt(ProgramStateRef State, const Expr *Ex) const;
592+
591593
static std::pair<const ProgramPointTag *, const ProgramPointTag *>
592594
getEagerlyAssumeBifurcationTags();
593595

clang/lib/StaticAnalyzer/Core/CoreEngine.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,8 @@ void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term,
444444
NodeBuilderContext Ctx(*this, B, Pred);
445445
ExplodedNodeSet Dst;
446446
ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()),
447-
*(B->succ_begin() + 1));
447+
*(B->succ_begin() + 1),
448+
getCompletedIterationCount(B, Pred));
448449
// Enqueue the new frontier onto the worklist.
449450
enqueue(Dst);
450451
}
@@ -591,6 +592,30 @@ ExplodedNode *CoreEngine::generateCallExitBeginNode(ExplodedNode *N,
591592
return isNew ? Node : nullptr;
592593
}
593594

595+
std::optional<unsigned>
596+
CoreEngine::getCompletedIterationCount(const CFGBlock *B,
597+
ExplodedNode *Pred) const {
598+
const LocationContext *LC = Pred->getLocationContext();
599+
BlockCounter Counter = WList->getBlockCounter();
600+
unsigned BlockCount =
601+
Counter.getNumVisited(LC->getStackFrame(), B->getBlockID());
602+
603+
const Stmt *Term = B->getTerminatorStmt();
604+
if (isa<ForStmt, WhileStmt, CXXForRangeStmt>(Term)) {
605+
assert(BlockCount >= 1 &&
606+
"Block count of currently analyzed block must be >= 1");
607+
return BlockCount - 1;
608+
}
609+
if (isa<DoStmt>(Term)) {
610+
// In a do-while loop one iteration happens before the first evaluation of
611+
// the loop condition, so we don't subtract one.
612+
return BlockCount;
613+
}
614+
// ObjCForCollectionStmt is skipped intentionally because the current
615+
// application of the iteration counts is not relevant for it.
616+
return std::nullopt;
617+
}
618+
594619
void CoreEngine::enqueue(ExplodedNodeSet &Set) {
595620
for (const auto I : Set)
596621
WList->enqueue(I);

clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2753,12 +2753,10 @@ assumeCondition(const Stmt *Condition, ExplodedNode *N) {
27532753
return State->assume(V);
27542754
}
27552755

2756-
void ExprEngine::processBranch(const Stmt *Condition,
2757-
NodeBuilderContext& BldCtx,
2758-
ExplodedNode *Pred,
2759-
ExplodedNodeSet &Dst,
2760-
const CFGBlock *DstT,
2761-
const CFGBlock *DstF) {
2756+
void ExprEngine::processBranch(
2757+
const Stmt *Condition, NodeBuilderContext &BldCtx, ExplodedNode *Pred,
2758+
ExplodedNodeSet &Dst, const CFGBlock *DstT, const CFGBlock *DstF,
2759+
std::optional<unsigned> IterationsCompletedInLoop) {
27622760
assert((!Condition || !isa<CXXBindTemporaryExpr>(Condition)) &&
27632761
"CXXBindTemporaryExprs are handled by processBindTemporary.");
27642762
const LocationContext *LCtx = Pred->getLocationContext();
@@ -2801,8 +2799,35 @@ void ExprEngine::processBranch(const Stmt *Condition,
28012799
if (StTrue && StFalse)
28022800
assert(!isa<ObjCForCollectionStmt>(Condition));
28032801

2804-
if (StTrue)
2805-
Builder.generateNode(StTrue, true, PredN);
2802+
if (StTrue) {
2803+
// If we are processing a loop condition where two iterations have
2804+
// already been completed and the the false branch is also feasible, then
2805+
// don't assume a third iteration, because it is a redundant execution
2806+
// path (unlikely to be different from earlier loop exits) and can cause
2807+
// false positives if e.g. the loop iterates over a two-element structure
2808+
// with an opaque condition.
2809+
//
2810+
// The iteration count "2" is hardcoded because it's the natural limit:
2811+
// * the fact that the programmer wrote a loop (and not just an `if`)
2812+
// implies that they thought that the loop body may be executed twice;
2813+
// * however, there are situations where the programmer knows that there
2814+
// are at most two iterations, but writes a loop that appears to be
2815+
// generic, because there is no special syntax for "loop with at most
2816+
// two iterations". (This pattern is common in FFMPEG and appears in
2817+
// many other projects as well.)
2818+
bool CompletedTwoIterations = IterationsCompletedInLoop.value_or(0) >= 2;
2819+
bool FalseAlsoFeasible =
2820+
StFalse ||
2821+
didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
2822+
bool SkipTrueBranch = CompletedTwoIterations && FalseAlsoFeasible;
2823+
2824+
// FIXME: This "don't assume third iteration" heuristic partially
2825+
// conflicts with the widen-loop analysis option (which is off by
2826+
// default). If we intend to support and stabilize the loop widening,
2827+
// we'll need to ensure that it 'plays nicely' with this logic.
2828+
if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops)
2829+
Builder.generateNode(StTrue, true, PredN);
2830+
}
28062831

28072832
if (StFalse)
28082833
Builder.generateNode(StFalse, false, PredN);
@@ -3724,6 +3749,10 @@ ExprEngine::getEagerlyAssumeBifurcationTags() {
37243749
return std::make_pair(&TrueTag, &FalseTag);
37253750
}
37263751

3752+
/// The last expression where EagerlyAssume produced two transitions (i.e. it
3753+
/// activated and the true and false case were both feasible).
3754+
REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeBifurcationAt, const Expr *)
3755+
37273756
void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
37283757
ExplodedNodeSet &Src,
37293758
const Expr *Ex) {
@@ -3746,6 +3775,11 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
37463775

37473776
auto [StateTrue, StateFalse] = State->assume(*SEV);
37483777

3778+
if (StateTrue && StateFalse) {
3779+
StateTrue = StateTrue->set<LastEagerlyAssumeBifurcationAt>(Ex);
3780+
StateFalse = StateFalse->set<LastEagerlyAssumeBifurcationAt>(Ex);
3781+
}
3782+
37493783
// First assume that the condition is true.
37503784
if (StateTrue) {
37513785
SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
@@ -3763,6 +3797,11 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
37633797
}
37643798
}
37653799

3800+
bool ExprEngine::didEagerlyAssumeBifurcateAt(ProgramStateRef State,
3801+
const Expr *Ex) const {
3802+
return Ex && State->get<LastEagerlyAssumeBifurcationAt>() == Ex;
3803+
}
3804+
37663805
void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt *A, ExplodedNode *Pred,
37673806
ExplodedNodeSet &Dst) {
37683807
StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);

clang/test/Analysis/loop-unrolling.cpp

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ int simple_no_unroll1() {
6363
int a[9];
6464
int k = 42;
6565
for (int i = 0; i < 9; i++) {
66-
clang_analyzer_numTimesReached(); // expected-warning {{4}}
66+
clang_analyzer_numTimesReached(); // expected-warning {{2}}
6767
a[i] = 42;
6868
foo(i);
6969
}
@@ -76,7 +76,7 @@ int simple_no_unroll2() {
7676
int k = 42;
7777
int i;
7878
for (i = 0; i < 9; i++) {
79-
clang_analyzer_numTimesReached(); // expected-warning {{4}}
79+
clang_analyzer_numTimesReached(); // expected-warning {{2}}
8080
a[i] = 42;
8181
i += getNum();
8282
}
@@ -309,9 +309,9 @@ int nested_inner_unrolled() {
309309
int k = 42;
310310
int j = 0;
311311
for (int i = 0; i < getNum(); i++) {
312-
clang_analyzer_numTimesReached(); // expected-warning {{4}}
312+
clang_analyzer_numTimesReached(); // expected-warning {{2}}
313313
for (j = 0; j < 8; ++j) {
314-
clang_analyzer_numTimesReached(); // expected-warning {{32}}
314+
clang_analyzer_numTimesReached(); // expected-warning {{16}}
315315
a[j] = 22;
316316
}
317317
a[i] = 42;
@@ -346,11 +346,7 @@ int simple_known_bound_loop() {
346346

347347
int simple_unknown_bound_loop() {
348348
for (int i = 2; i < getNum(); i++) {
349-
#ifdef DFS
350-
clang_analyzer_numTimesReached(); // expected-warning {{16}}
351-
#else
352349
clang_analyzer_numTimesReached(); // expected-warning {{8}}
353-
#endif
354350
}
355351
return 0;
356352
}
@@ -368,11 +364,7 @@ int nested_inlined_unroll1() {
368364
int nested_inlined_no_unroll1() {
369365
int k;
370366
for (int i = 0; i < 9; i++) {
371-
#ifdef DFS
372-
clang_analyzer_numTimesReached(); // expected-warning {{18}}
373-
#else
374-
clang_analyzer_numTimesReached(); // expected-warning {{14}}
375-
#endif
367+
clang_analyzer_numTimesReached(); // expected-warning {{10}}
376368
k = simple_unknown_bound_loop(); // reevaluation without inlining, splits the state as well
377369
}
378370
int a = 22 / k; // no-warning
@@ -475,9 +467,13 @@ int num_steps_over_limit2() {
475467

476468
int num_steps_on_limit3() {
477469
for (int i = 0; i < getNum(); i++) {
478-
clang_analyzer_numTimesReached(); // expected-warning {{4}}
470+
clang_analyzer_numTimesReached(); // expected-warning {{2}}
479471
for (int j = 0; j < 32; j++) {
480-
clang_analyzer_numTimesReached(); // expected-warning {{128}}
472+
// Here the loop unrollig logic calculates with four potential iterations
473+
// in the outer loop where it cannot determine the iteration count in
474+
// advance; but after two loops the analyzer conservatively assumes that
475+
// the (still opaque) loop condition is false.
476+
clang_analyzer_numTimesReached(); // expected-warning {{64}}
481477
}
482478
}
483479
return 0;
@@ -493,6 +489,15 @@ int num_steps_over_limit3() {
493489
return 0;
494490
}
495491

492+
int num_steps_on_limit4() {
493+
for (int i = 0; i < 4; i++) {
494+
clang_analyzer_numTimesReached(); // expected-warning {{4}}
495+
for (int j = 0; j < 32; j++) {
496+
clang_analyzer_numTimesReached(); // expected-warning {{128}}
497+
}
498+
}
499+
return 0;
500+
}
496501

497502
void pr34943() {
498503
for (int i = 0; i < 6L; ++i) {

clang/test/Analysis/misc-ps-region-store.m

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -910,38 +910,51 @@ void pr6302(id x, Class y) {
910910

911911
//===----------------------------------------------------------------------===//
912912
// Specially handle global variables that are declared constant. In the
913-
// example below, this forces the loop to take exactly 2 iterations.
913+
// example below, this forces the loop to take exactly 1 iterations.
914914
//===----------------------------------------------------------------------===//
915915

916-
const int pr6288_L_N = 2;
916+
const int pr6288_L_N = 1;
917917
void pr6288_(void) {
918-
int x[2];
919-
int *px[2];
918+
int x[1];
919+
int *px[1];
920920
int i;
921921
for (i = 0; i < pr6288_L_N; i++)
922922
px[i] = &x[i];
923923
*(px[0]) = 0; // no-warning
924924
}
925925

926926
void pr6288_pos(int z) {
927-
int x[2];
928-
int *px[2];
927+
int x[1];
928+
int *px[1];
929929
int i;
930930
for (i = 0; i < z; i++)
931931
px[i] = &x[i]; // expected-warning{{Access out-of-bound array element (buffer overflow)}}
932932
*(px[0]) = 0; // expected-warning{{Dereference of undefined pointer value}}
933933
}
934934

935935
void pr6288_b(void) {
936-
const int L_N = 2;
937-
int x[2];
938-
int *px[2];
936+
const int L_N = 1;
937+
int x[1];
938+
int *px[1];
939939
int i;
940940
for (i = 0; i < L_N; i++)
941941
px[i] = &x[i];
942942
*(px[0]) = 0; // no-warning
943943
}
944944

945+
void pr6288_no_third_iter(int z) {
946+
int x[2];
947+
int *px[2];
948+
int i;
949+
// If the loop condition is opaque, we assume that there may be two
950+
// iterations (becasuse otherwise the loop could be replaced by an if); but
951+
// we do not assume that there may be a third iteration. Therefore,
952+
// unlike 'pr6288_pos', this testcase does not produce an out-of-bounds error.
953+
for (i = 0; i < z; i++)
954+
px[i] = &x[i];
955+
*(px[0]) = 0; // expected-warning{{Dereference of undefined pointer value}}
956+
}
957+
945958
// A bug in RemoveDeadBindings was causing instance variable bindings to get
946959
// prematurely pruned from the state.
947960
@interface Rdar7817800 {

0 commit comments

Comments
 (0)