Skip to content

Commit 895f548

Browse files
committed
Python: Added recursion guard
to ensure that the call graph seen by type tracking does not include summary calls resolved by type tracking. (I tried inserting a similar test into the Ruby codebase, and it still compiled) To get this to compile, I had to move the resolution of summary calls out of the data flow nodes and into the `viableCallable` predicate. This means that we now have a potential summary call for each cfg call node. (I tried using the base class, `DataFlowCall`, for this but calls to `map` got identified as class calls and would no longer be associated with a summary.) It is possible that the "NonLIbrary"-layers the were inserted into the hierarchy can be removed again.
1 parent 1649ec7 commit 895f548

File tree

3 files changed

+50
-8
lines changed

3 files changed

+50
-8
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -636,16 +636,22 @@ class SpecialCall extends DataFlowSourceCall, TSpecialCall {
636636
}
637637
}
638638

639-
/** A call to a summarized callable. */
639+
/**
640+
* A call to a summarized callable, a `LibraryCallable`.
641+
*
642+
* This is a potential call, really. It has an empty charpred, so any `CallNode` is allowed.
643+
* It is here to stake out territory, as otherwise a call to `map` would be considered a `ClassCall`
644+
* and not be available for a summary.
645+
*/
640646
class LibraryCall extends NormalCall {
641-
LibraryCallable callable;
642-
643-
LibraryCall() { call = callable.getACall() }
644-
645647
// TODO: Implement Python calling convention?
646648
override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
647649

648-
override DataFlowCallable getCallable() { result.asLibraryCallable() = callable }
650+
// We cannot refer to a `LibraryCallable` here,
651+
// as that could in turn refer to type tracking.
652+
// This call will be tied to a `LibraryCallable` via
653+
// `getViableCallabe` when the global data flow is assembled.
654+
override DataFlowCallable getCallable() { none() }
649655
}
650656

651657
/**
@@ -747,7 +753,18 @@ private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
747753
}
748754

749755
/** Gets a viable run-time target for the call `call`. */
750-
DataFlowCallable viableCallable(DataFlowSourceCall call) { result = call.getCallable() }
756+
DataFlowCallable viableCallable(DataFlowSourceCall call) {
757+
result = call.getCallable()
758+
or
759+
// A call to a library callable with a flow summary
760+
// In this situation we can not resolve the callable from the call,
761+
// as that would make data flow depend on type tracking.
762+
// Instead we reolve the call from the summary.
763+
exists(LibraryCallable callable |
764+
result = TLibraryCallable(callable) and
765+
call.getNode() = callable.getACall()
766+
)
767+
}
751768

752769
private newtype TReturnKind = TNormalReturnKind()
753770

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,13 @@ private DataFlowPrivate::DataFlowCallable getCallableForArgument(
4343
)
4444
}
4545

46-
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
46+
/**
47+
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
48+
*
49+
* Flow into summarized library methods is not included, as that will lead to negative
50+
* recursion (or, at best, terrible performance), since identifying calls to library
51+
* methods is done using API graphs (which uses type tracking).
52+
*/
4753
predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
4854
// TODO: Support special methods?
4955
exists(DataFlowPrivate::DataFlowCallable callable, int i |

python/ql/test/experimental/dataflow/summaries/TestSummaries.qll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,25 @@ private import python
22
private import semmle.python.dataflow.new.FlowSummary
33
private import semmle.python.ApiGraphs
44

5+
/** This module ensures that the `callStep` predicate in
6+
* our type tracker implelemtation does not refer to the
7+
* `getACall` predicate on `SummarizedCallable`.
8+
*/
9+
module RecursionGuard {
10+
private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT
11+
12+
private class RecursionGuard extends SummarizedCallable {
13+
RecursionGuard() { this = "RecursionGuard" }
14+
15+
override CallNode getACall() {
16+
result.getFunction().(NameNode).getId() = this and
17+
(TT::callStep(_, _) implies any())
18+
}
19+
20+
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
21+
}
22+
}
23+
524
private class SummarizedCallableIdentity extends SummarizedCallable {
625
SummarizedCallableIdentity() { this = "identity" }
726

0 commit comments

Comments
 (0)