Skip to content

Commit d938ccc

Browse files
committed
[analyzer][NFC] Add option assume-one-iteration
This commit adds the new analyzer option `assume-one-iteration`, which is `false` by default, but can be set to `true` to ensure that the analyzer always assumes (at least) one iteration in loops. In some situations this "loop is skipped" execution path is an important corner case that may evade the notice of the developer and hide significant bugs -- however, there are also many situations where it's guaranteed that at least one iteration will happen (e.g. some data structure is always nonempty), but the analyzer cannot realize this and will produce false positives when it assumes that the loop is skipped. This commit refactors some logic around the implementation of the new feature, but those changes are clearly NFC when the new analyzer option is in its default state.
1 parent b9207ae commit d938ccc

File tree

4 files changed

+106
-35
lines changed

4 files changed

+106
-35
lines changed

clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,16 @@ ANALYZER_OPTION(
294294
bool, ShouldUnrollLoops, "unroll-loops",
295295
"Whether the analysis should try to unroll loops with known bounds.", false)
296296

297+
ANALYZER_OPTION(
298+
bool, ShouldAssumeOneIteration, "assume-one-iteration",
299+
"Whether the analyzer should always assume at least one iteration in "
300+
"loops where the loop condition is opaque (i.e. the analyzer cannot "
301+
"determine if it's true or false). Setting this to true eliminates some "
302+
"false positives (where e.g. a structure is nonempty, but the analyzer "
303+
"does not notice this); but it also eliminates some true positives (e.g. "
304+
"cases where a structure can be empty and this causes buggy behavior).",
305+
false)
306+
297307
ANALYZER_OPTION(
298308
bool, ShouldDisplayNotesAsEvents, "notes-as-events",
299309
"Whether the bug reporter should transparently treat extra note diagnostic "

clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2810,13 +2810,17 @@ void ExprEngine::processBranch(
28102810
if (StTrue && StFalse)
28112811
assert(!isa<ObjCForCollectionStmt>(Condition));
28122812

2813+
bool BothFeasible =
2814+
(StTrue && StFalse) ||
2815+
didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
2816+
28132817
if (StTrue) {
2814-
// If we are processing a loop condition where two iterations have
2815-
// already been completed and the false branch is also feasible, then
2816-
// don't assume a third iteration because it is a redundant execution
2817-
// path (unlikely to be different from earlier loop exits) and can cause
2818-
// false positives if e.g. the loop iterates over a two-element structure
2819-
// with an opaque condition.
2818+
// In a loop, if both branches are feasible (i.e. the analyzer doesn't
2819+
// understand the loop condition) and two iterations have already been
2820+
// completed, then don't assume a third iteration because it is a
2821+
// redundant execution path (unlikely to be different from earlier loop
2822+
// exits) and can cause false positives if e.g. the loop iterates over a
2823+
// two-element structure with an opaque condition.
28202824
//
28212825
// The iteration count "2" is hardcoded because it's the natural limit:
28222826
// * the fact that the programmer wrote a loop (and not just an `if`)
@@ -2827,10 +2831,7 @@ void ExprEngine::processBranch(
28272831
// two iterations". (This pattern is common in FFMPEG and appears in
28282832
// many other projects as well.)
28292833
bool CompletedTwoIterations = IterationsCompletedInLoop.value_or(0) >= 2;
2830-
bool FalseAlsoFeasible =
2831-
StFalse ||
2832-
didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
2833-
bool SkipTrueBranch = CompletedTwoIterations && FalseAlsoFeasible;
2834+
bool SkipTrueBranch = BothFeasible && CompletedTwoIterations;
28342835

28352836
// FIXME: This "don't assume third iteration" heuristic partially
28362837
// conflicts with the widen-loop analysis option (which is off by
@@ -2840,8 +2841,25 @@ void ExprEngine::processBranch(
28402841
Builder.generateNode(StTrue, true, PredN);
28412842
}
28422843

2843-
if (StFalse)
2844-
Builder.generateNode(StFalse, false, PredN);
2844+
if (StFalse) {
2845+
// In a loop, if both branches are feasible (i.e. the analyzer doesn't
2846+
// understand the loop condition), we are before the first iteration and
2847+
// the analyzer option `assume-one-iteration` is set to `true`, then avoid
2848+
// creating the execution path where the analyzer skips the loop.
2849+
//
2850+
// In some situations this "loop is skipped" execution path is an
2851+
// important corner case that may evade the notice of the developer and
2852+
// hide significant bugs -- however, there are also many situations where
2853+
// it's guaranteed that at least one iteration will happen (e.g. some
2854+
// data structure is always nonempty), but the analyzer cannot realize
2855+
// this and will produce false positives when it assumes that the loop is
2856+
// skipped.
2857+
bool BeforeFirstIteration = IterationsCompletedInLoop == std::optional{0};
2858+
bool SkipFalseBranch = BothFeasible && BeforeFirstIteration &&
2859+
AMgr.options.ShouldAssumeOneIteration;
2860+
if (!SkipFalseBranch)
2861+
Builder.generateNode(StFalse, false, PredN);
2862+
}
28452863
}
28462864
currBldrCtx = nullptr;
28472865
}

clang/test/Analysis/analyzer-config.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
// CHECK-NEXT: alpha.osx.cocoa.DirectIvarAssignment:AnnotatedFunctions = false
1212
// CHECK-NEXT: apply-fixits = false
1313
// CHECK-NEXT: assume-controlled-environment = false
14+
// CHECK-NEXT: assume-one-iteration = false
1415
// CHECK-NEXT: avoid-suppressing-null-argument-paths = false
1516
// CHECK-NEXT: c++-allocator-inlining = true
1617
// CHECK-NEXT: c++-container-inlining = false

clang/test/Analysis/loop-assumptions.c

Lines changed: 65 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,48 @@
11
// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
2-
// RUN: -verify=expected,eagerlyassume %s
2+
// RUN: -verify=expected,noassumeone,eagerlyassume,combo %s
33
// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
44
// RUN: -analyzer-config eagerly-assume=false \
5+
// RUN: -verify=expected,noassumeone,noeagerlyassume,combo %s
6+
// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
7+
// RUN: -analyzer-config assume-one-iteration=true \
8+
// RUN: -verify=expected,eagerlyassume,combo %s
9+
// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
10+
// RUN: -analyzer-config assume-one-iteration=true,eagerly-assume=false \
511
// RUN: -verify=expected,noeagerlyassume %s
612

13+
// The verify tag "combo" is used for one unique warning which is produced in three
14+
// of the four RUN combinations.
15+
716
// These tests validate the logic within `ExprEngine::processBranch` which
817
// ensures that in loops with opaque conditions we don't assume execution paths
918
// if the code does not imply that they are possible.
19+
// In particular, if two (or more) iterations are already completed in a loop,
20+
// we don't assume that there can be another iteration. Moreover, if the
21+
// analyzer option `assume-one-iteration` is enabled, then we don't assume that
22+
// a loop can be skipped completely.
1023

1124
void clang_analyzer_numTimesReached(void);
12-
void clang_analyzer_warnIfReached(void);
1325
void clang_analyzer_dump(int);
1426

15-
void clearCondition(void) {
16-
// If the analyzer can definitely determine the value of the loop condition,
27+
void clearTrueCondition(void) {
28+
// If the analyzer can definitely determine that the loop condition is true,
1729
// then this corrective logic doesn't activate and the engine executes
1830
// `-analyzer-max-loop` iterations (by default, 4).
19-
for (int i = 0; i < 10; i++)
31+
int i;
32+
for (i = 0; i < 10; i++)
2033
clang_analyzer_numTimesReached(); // expected-warning {{4}}
2134

22-
clang_analyzer_warnIfReached(); // unreachable
35+
clang_analyzer_dump(i); // Unreachable, no reports.
36+
}
37+
38+
void clearFalseCondition(void) {
39+
// If the analyzer can definitely determine that the loop condition is false,
40+
// then the loop is (obviously) skipped, even in `assume-one-iteration` mode.
41+
int i;
42+
for (i = 0; i > 10; i++)
43+
clang_analyzer_numTimesReached(); // Unreachable, no report.
44+
45+
clang_analyzer_dump(i); // expected-warning {{0}}
2346
}
2447

2548
void opaqueCondition(int arg) {
@@ -28,10 +51,13 @@ void opaqueCondition(int arg) {
2851
// that more than two iterations are possible. (It _does_ imply that two
2952
// iterations may be possible at least in some cases, because otherwise an
3053
// `if` would've been enough.)
31-
for (int i = 0; i < arg; i++)
54+
// Moreover, if `assume-one-iteration` is enabled, then assume at least one
55+
// iteration.
56+
int i;
57+
for (i = 0; i < arg; i++)
3258
clang_analyzer_numTimesReached(); // expected-warning {{2}}
3359

34-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
60+
clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}}
3561
}
3662

3763
int check(void);
@@ -42,22 +68,26 @@ void opaqueConditionCall(int arg) {
4268
// insert an assertion to guide the analyzer and rule out more than two
4369
// iterations (so the analyzer needs to proactively avoid those unjustified
4470
// branches).
45-
while (check())
71+
int i = 0; // Helper to distinguish the the branches after the loop.
72+
while (check()) {
4673
clang_analyzer_numTimesReached(); // expected-warning {{2}}
74+
i++;
75+
}
4776

48-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
77+
clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}}
4978
}
5079

5180
void opaqueConditionDoWhile(int arg) {
5281
// Same situation as `opaqueCondition()` but with a `do {} while ()` loop.
5382
// This is tested separately because this loop type is a special case in the
5483
// iteration count calculation.
84+
// Obviously, this loop guarantees that at least one iteration will happen.
5585
int i = 0;
5686
do {
5787
clang_analyzer_numTimesReached(); // expected-warning {{2}}
5888
} while (i++ < arg);
5989

60-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
90+
clang_analyzer_dump(i); // expected-warning {{1}} expected-warning {{2}}
6191
}
6292

6393
void dontRememberOldBifurcation(int arg) {
@@ -69,7 +99,7 @@ void dontRememberOldBifurcation(int arg) {
6999
// by default), because the code remembered that there was a bifurcation on
70100
// the first iteration of the loop and didn't realize that this is obsolete.
71101

72-
// NOTE: The variable `i` is introduced to ensure that the iterations of the
102+
// NOTE: The variable `i` is significant to ensure that the iterations of the
73103
// loop change the state -- otherwise the analyzer stops iterating because it
74104
// returns to the same `ExplodedNode`.
75105
int i = 0;
@@ -78,21 +108,23 @@ void dontRememberOldBifurcation(int arg) {
78108
i++;
79109
}
80110

81-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
111+
clang_analyzer_dump(i); // noassumeone-warning {{0}}
82112
}
83113

84114
void dontAssumeFourthIterartion(int arg) {
115+
int i;
116+
85117
if (arg == 2)
86118
return;
87119

88120
// In this function the analyzer cannot leave the loop after exactly two
89121
// iterations (because it knows that `arg != 2` at that point), so it
90122
// performs a third iteration, but it does not assume that a fourth iteration
91123
// is also possible.
92-
for (int i = 0; i < arg; i++)
124+
for (i = 0; i < arg; i++)
93125
clang_analyzer_numTimesReached(); // expected-warning {{3}}
94126

95-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
127+
clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{3}}
96128
}
97129

98130
#define TRUE 1
@@ -108,20 +140,24 @@ void shortCircuitInLoopCondition(int arg) {
108140
// false positive on the ffmpeg codebase. Eventually we should properly
109141
// recognize the full syntactical loop condition expression as "the loop
110142
// condition", but this will be complicated to implement.
111-
for (int i = 0; i < arg && TRUE; i++) {
143+
int i;
144+
for (i = 0; i < arg && TRUE; i++) {
112145
clang_analyzer_numTimesReached(); // expected-warning {{4}}
113146
}
114-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
147+
148+
clang_analyzer_dump(i); // expected-warning {{0}} expected-warning {{1}} expected-warning {{2}} expected-warning {{3}}
115149
}
116150

117151
void shortCircuitInLoopConditionRHS(int arg) {
118152
// Unlike `shortCircuitInLoopCondition()`, this case is handled properly
119153
// because the analyzer thinks that the right hand side of the `&&` is the
120154
// loop condition.
121-
for (int i = 0; TRUE && i < arg; i++) {
155+
int i;
156+
for (i = 0; TRUE && i < arg; i++) {
122157
clang_analyzer_numTimesReached(); // expected-warning {{2}}
123158
}
124-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
159+
160+
clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}}
125161
}
126162

127163
void eagerlyAssumeInSubexpression(int arg) {
@@ -134,16 +170,22 @@ void eagerlyAssumeInSubexpression(int arg) {
134170
// sub-expression of the loop condition (as in this contrived test case).
135171
// FIXME: I don't know a real-world example for this inconsistency, but it
136172
// would be good to eliminate it eventually.
137-
for (int i = 0; (i >= arg) - 1; i++) {
173+
int i;
174+
for (i = 0; (i >= arg) - 1; i++) {
138175
clang_analyzer_numTimesReached(); // eagerlyassume-warning {{4}} noeagerlyassume-warning {{2}}
139176
}
140-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
177+
178+
// The 'combo' warning intentionally appears when `assume-one-iteration` is
179+
// disabled, but also appears as a bug (or at least inaccuracy) when
180+
// `assume-one-iteration` is true but `EagerlyAssume` is also enabled.
181+
clang_analyzer_dump(i); // combo-warning {{0}} expected-warning {{1}} expected-warning {{2}} eagerlyassume-warning {{3}}
141182
}
142183

143184
void calledTwice(int arg, int isFirstCall) {
144185
// This function is called twice (with two different unknown 'arg' values) to
145186
// check the iteration count handling in this situation.
146-
for (int i = 0; i < arg; i++) {
187+
int i;
188+
for (i = 0; i < arg; i++) {
147189
if (isFirstCall) {
148190
clang_analyzer_numTimesReached(); // expected-warning {{2}}
149191
} else {
@@ -215,5 +257,5 @@ void onlyLoopConditions(int arg) {
215257
break;
216258
}
217259

218-
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
260+
clang_analyzer_dump(i); // expected-warning {{1}} expected-warning {{2}} expected-warning {{3}} expected-warning {{4}}
219261
}

0 commit comments

Comments
 (0)