diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index f9f22a9ced650..fab19c76a22fe 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -414,6 +414,19 @@ ANALYZER_OPTION( "any target.", true) +ANALYZER_OPTION( + bool, InlineFunctionsWithAmbiguousLoops, "inline-functions-with-ambiguous-loops", + "If disabled (the default), the analyzer puts functions on a \"do not " + "inline this\" list if it finds an execution path within that function " + "that may potentially perform 'analyzer-max-loop' (= 4 by default) " + "iterations in a loop. (Note that functions that _definitely_ reach the " + "loop limit on some execution path are currently marked as \"do not " + "inline\" even if this option is enabled.) Enabling this option " + "eliminates this (somewhat arbitrary) restriction from the analysis " + "scope, which increases the analysis runtime (on average by ~10%, but " + "a few translation units may see much larger slowdowns).", + false) + //===----------------------------------------------------------------------===// // Unsigned analyzer options. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h index 3ee0d229cfc29..761395260a0cf 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h @@ -81,10 +81,6 @@ class FunctionSummariesTy { I->second.MayInline = 0; } - void markReachedMaxBlockCount(const Decl *D) { - markShouldNotInline(D); - } - std::optional mayInline(const Decl *D) { MapTy::const_iterator I = Map.find(D); if (I != Map.end() && I->second.InlineChecked) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 86e2e8f634bfd..868e5366b7927 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2523,6 +2523,20 @@ bool ExprEngine::replayWithoutInlining(ExplodedNode *N, return true; } +/// Return the innermost location context which is inlined at `Node`, unless +/// it's the top-level (entry point) location context. +static const LocationContext *getInlinedLocationContext(ExplodedNode *Node, + ExplodedGraph &G) { + const LocationContext *CalleeLC = Node->getLocation().getLocationContext(); + const LocationContext *RootLC = + (*G.roots_begin())->getLocation().getLocationContext(); + + if (CalleeLC->getStackFrame() == RootLC->getStackFrame()) + return nullptr; + + return CalleeLC; +} + /// Block entrance. (Update counters). void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, NodeBuilderWithSinks &nodeBuilder, @@ -2570,21 +2584,24 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, const ExplodedNode *Sink = nodeBuilder.generateSink(Pred->getState(), Pred, &tag); - // Check if we stopped at the top level function or not. - // Root node should have the location context of the top most function. - const LocationContext *CalleeLC = Pred->getLocation().getLocationContext(); - const LocationContext *CalleeSF = CalleeLC->getStackFrame(); - const LocationContext *RootLC = - (*G.roots_begin())->getLocation().getLocationContext(); - if (RootLC->getStackFrame() != CalleeSF) { - Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl()); + if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) { + // FIXME: This will unconditionally prevent inlining this function (even + // from other entry points), which is not a reasonable heuristic: even if + // we reached max block count on this particular execution path, there + // may be other execution paths (especially with other parametrizations) + // where the analyzer can reach the end of the function (so there is no + // natural reason to avoid inlining it). However, disabling this would + // significantly increase the analysis time (because more entry points + // would exhaust their allocated budget), so it must be compensated by a + // different (more reasonable) reduction of analysis scope. + Engine.FunctionSummaries->markShouldNotInline( + LC->getStackFrame()->getDecl()); // Re-run the call evaluation without inlining it, by storing the // no-inlining policy in the state and enqueuing the new work item on // the list. Replay should almost never fail. Use the stats to catch it // if it does. - if ((!AMgr.options.NoRetryExhausted && - replayWithoutInlining(Pred, CalleeLC))) + if ((!AMgr.options.NoRetryExhausted && replayWithoutInlining(Pred, LC))) return; NumMaxBlockCountReachedInInlined++; } else @@ -2856,8 +2873,29 @@ void ExprEngine::processBranch( // conflicts with the widen-loop analysis option (which is off by // default). If we intend to support and stabilize the loop widening, // we must ensure that it 'plays nicely' with this logic. - if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops) + if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops) { Builder.generateNode(StTrue, true, PredN); + } else if (!AMgr.options.InlineFunctionsWithAmbiguousLoops) { + // FIXME: There is an ancient and arbitrary heuristic in + // `ExprEngine::processCFGBlockEntrance` which prevents all further + // inlining of a function if it finds an execution path within that + // function which reaches the `MaxBlockVisitOnPath` limit (a/k/a + // `analyzer-max-loop`, by default four iterations in a loop). Adding + // this "don't assume third iteration" logic significantly increased + // the analysis runtime on some inputs because less functions were + // arbitrarily excluded from being inlined, so more entry points used + // up their full allocated budget. As a hacky compensation for this, + // here we apply the "should not inline" mark in cases when the loop + // could potentially reach the `MaxBlockVisitOnPath` limit without the + // "don't assume third iteration" logic. This slightly overcompensates + // (activates if the third iteration can be entered, and will not + // recognize cases where the fourth iteration would't be completed), but + // should be good enough for practical purposes. + if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) { + Engine.FunctionSummaries->markShouldNotInline( + LC->getStackFrame()->getDecl()); + } + } } if (StFalse) { diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index 80cad54b039f4..7936273415ad4 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -89,6 +89,7 @@ // CHECK-NEXT: graph-trim-interval = 1000 // CHECK-NEXT: ignore-bison-generated-files = true // CHECK-NEXT: ignore-flex-generated-files = true +// CHECK-NEXT: inline-functions-with-ambiguous-loops = false // CHECK-NEXT: inline-lambdas = true // CHECK-NEXT: ipa = dynamic-bifurcate // CHECK-NEXT: ipa-always-inline-size = 3 diff --git a/clang/test/Analysis/loop-based-inlining-prevention.c b/clang/test/Analysis/loop-based-inlining-prevention.c new file mode 100644 index 0000000000000..73627112e2d32 --- /dev/null +++ b/clang/test/Analysis/loop-based-inlining-prevention.c @@ -0,0 +1,200 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify=expected,default %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config inline-functions-with-ambiguous-loops=true -verify=expected,enabled %s + +// This file tests some heuristics in the engine that put functions on a +// "do not inline" list if their analyisis reaches the `analyzer-max-loop` +// limit (by default 4 iterations) in a loop. This was almost surely intended +// as memoization optimization for the "retry without inlining" fallback (if we +// had to retry once, next time don't even try inlining), but aggressively +// oversteps the "natural" scope: reaching 4 iterations on _one particular_ +// execution path does not imply that each path would need "retry without +// inlining" especially if a different call receives different arguments. +// +// This heuristic significantly affects the scope/depth of the analysis (and +// therefore the execution time) because without this limitation on the +// inlining significantly more entry points would be able to exhaust their +// `max-nodes` quota. (Trivial thin wrappers around big complex functions are +// common in many projects.) +// +// Unfortunately, this arbitrary heuristic strongly relies on the current loop +// handling model and its many limitations, so improvements in loop handling +// can cause surprising slowdowns by reducing the "do not inline" blacklist. +// In the tests "FIXME-BUT-NEEDED" comments mark "problematic" (aka buggy) +// analyzer behavior which cannot be fixed without also improving the +// heuristics for (not) inlining large functions. + + int getNum(void); // Get an unknown symbolic number. + +void clang_analyzer_dump(int arg); + +//----------------------------------------------------------------------------- +// Simple case: inlined function never reaches `analyzer-max-loop`, so it is +// always inlined. + +int inner_simple(int callIdx) { + clang_analyzer_dump(callIdx); // expected-warning {{1 S32}} + // expected-warning@-1 {{2 S32}} + return 42; +} + +int outer_simple(void) { + int x = inner_simple(1); + int y = inner_simple(2); + return 53 / (x - y); // expected-warning {{Division by zero}} +} + +//----------------------------------------------------------------------------- +// Inlined function always reaches `analyzer-max-loop`, which stops the +// analysis on that path and puts the function on the "do not inline" list. + +int inner_fixed_loop_1(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // expected-warning {{1 S32}} + for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis. + clang_analyzer_dump(callIdx); // no-warning + return 42; +} + +int outer_fixed_loop_1(void) { + int x = inner_fixed_loop_1(1); + int y = inner_fixed_loop_1(2); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function always reaches `analyzer-max-loop`; inlining is prevented +// even for different entry points. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_fixed_loop_2` is analyzed first and it will be the one which is +// able to inline the inner function. + +int inner_fixed_loop_2(int callIdx) { + // Identical copy of inner_fixed_loop_1. + int i; + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis. + clang_analyzer_dump(callIdx); // no-warning + return 42; +} + +int outer_1_fixed_loop_2(void) { + return inner_fixed_loop_2(1); +} + +int outer_2_fixed_loop_2(void) { + return inner_fixed_loop_2(2); +} + +//----------------------------------------------------------------------------- +// Inlined function reaches `analyzer-max-loop` only in its second call. The +// function is inlined twice but the second call doesn't finish and ends up +// being conservatively evaluated. + +int inner_parametrized_loop_1(int count) { + int i; + clang_analyzer_dump(count); // expected-warning {{2 S32}} + // expected-warning@-1 {{10 S32}} + for (i = 0; i < count; i++); + // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4. + clang_analyzer_dump(count); // expected-warning {{2 S32}} + return 42; +} + +int outer_parametrized_loop_1(void) { + int x = inner_parametrized_loop_1(2); + int y = inner_parametrized_loop_1(10); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function reaches `analyzer-max-loop` on its first call, so the +// second call isn't inlined (although it could be fully evaluated). + +int inner_parametrized_loop_2(int count) { + // Identical copy of inner_parametrized_loop_1. + int i; + clang_analyzer_dump(count); // expected-warning {{10 S32}} + for (i = 0; i < count; i++); + // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4. + clang_analyzer_dump(count); // no-warning + return 42; +} + +int outer_parametrized_loop_2(void) { + int y = inner_parametrized_loop_2(10); + int x = inner_parametrized_loop_2(2); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function may or may not reach `analyzer-max-loop` depending on an +// ambiguous check before the loop. This is very similar to the "fixed loop" +// cases: the function is placed on the "don't inline" list when any execution +// path reaches `analyzer-max-loop` (even if other execution paths reach the +// end of the function). +// NOTE: This is tested with two separate entry points to ensure that one +// inlined call is fully evaluated before we try to inline the other call. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_conditional_loop` is analyzed first and it will be the one which +// is able to inline the inner function. + +int inner_conditional_loop(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + if (getNum() == 777) { + for (i = 0; i < 10; i++); + } + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + return 42; +} + +int outer_1_conditional_loop(void) { + return inner_conditional_loop(1); +} + +int outer_2_conditional_loop(void) { + return inner_conditional_loop(2); +} + +//----------------------------------------------------------------------------- +// Inlined function executes an ambiguous loop that may or may not reach +// `analyzer-max-loop`. Historically, before the "don't assume third iteration" +// commit (bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849) this worked like the +// `conditional_loop` cases: the analyzer was able to find a path reaching +// `analyzer-max-loop` so inlining was disabled. After that commit the analyzer +// does not _assume_ a third (or later) iteration (i.e. does not enter those +// iterations if the loop condition is an unknown value), so e.g. this test +// function does not reach `analyzer-max-loop` iterations and the inlining is +// not disabled. +// Unfortunately this change significantly increased the workload and +// runtime of the analyzer (more entry points used up their budget), so the +// option `inline-functions-with-ambiguous-loops` was introduced and disabled +// by default to suppress the inlining in situations where the "don't assume +// third iteration" logic activates. +// NOTE: This is tested with two separate entry points to ensure that one +// inlined call is fully evaluated before we try to inline the other call. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_ambiguous_loop` is analyzed first and it will be the one which +// is able to inline the inner function. + +int inner_ambiguous_loop(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // default-warning {{2 S32}} + // enabled-warning@-1 {{1 S32}} + // enabled-warning@-2 {{2 S32}} + for (i = 0; i < getNum(); i++); + return i; +} + +int outer_1_ambiguous_loop(void) { + return inner_ambiguous_loop(1); +} +int outer_2_ambiguous_loop(void) { + return inner_ambiguous_loop(2); +} diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp index bf05a7739ce48..ebae81e000c7a 100644 --- a/clang/test/Analysis/loop-unrolling.cpp +++ b/clang/test/Analysis/loop-unrolling.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify -std=c++14 -DDFS=1 %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify=expected,default -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify=expected,dfs -std=c++14 %s void clang_analyzer_numTimesReached(); void clang_analyzer_warnIfReached(); @@ -337,6 +337,7 @@ int nested_both_unrolled() { } int simple_known_bound_loop() { + // Iteration count visible: can be unrolled and fully executed. for (int i = 2; i < 12; i++) { // This function is inlined in nested_inlined_unroll1() clang_analyzer_numTimesReached(); // expected-warning {{90}} @@ -345,27 +346,42 @@ int simple_known_bound_loop() { } int simple_unknown_bound_loop() { + // Iteration count unknown: unrolling won't happen and the execution will be + // split two times: + // (1) split between skipped loop (immediate exit) and entering the loop + // (2) split between exit after 1 iteration and entering the second iteration + // After these there is no third state split because the "don't assume third + // iteration" logic in `ExprEngine::processBranch` prevents it; but the + // `legacy-inlining-prevention` logic will put this function onto the list of + // functions that may not be inlined in the future. + // The exploration strategy apparently influences the number of times this + // function can be inlined before it's placed on the "don't inline" list. for (int i = 2; i < getNum(); i++) { - clang_analyzer_numTimesReached(); // expected-warning {{8}} + clang_analyzer_numTimesReached(); // default-warning {{4}} dfs-warning {{8}} } return 0; } int nested_inlined_unroll1() { + // Here the analyzer can unroll and fully execute both the outer loop and the + // inner loop within simple_known_bound_loop(). int k; for (int i = 0; i < 9; i++) { clang_analyzer_numTimesReached(); // expected-warning {{9}} - k = simple_known_bound_loop(); // no reevaluation without inlining + k = simple_known_bound_loop(); } int a = 22 / k; // expected-warning {{Division by zero}} return 0; } int nested_inlined_no_unroll1() { + // Here no unrolling happens and we only run `analyzer-max-loop` (= 4) + // iterations of the loop within this function, but some state splits happen + // in `simple_unknown_bound_loop()` calls. int k; - for (int i = 0; i < 9; i++) { - clang_analyzer_numTimesReached(); // expected-warning {{10}} - k = simple_unknown_bound_loop(); // reevaluation without inlining, splits the state as well + for (int i = 0; i < 40; i++) { + clang_analyzer_numTimesReached(); // default-warning {{9}} dfs-warning {{12}} + k = simple_unknown_bound_loop(); } int a = 22 / k; // no-warning return 0;