Skip to content

Commit 9798e60

Browse files
authored
Merge pull request github#5203 from tausbn/python-add-typebacktrackers
Python: Add `TypeBackTracker`
2 parents 9721182 + 880451f commit 9798e60

File tree

3 files changed

+156
-0
lines changed

3 files changed

+156
-0
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added support for type backtracking. Dual to the `TypeTracker` class, the `TypeBackTracker` class allows the propagation of user-defined type information backwards from a set of data flow nodes.

python/ql/src/semmle/python/dataflow/new/TypeTracker.qll

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,3 +324,144 @@ module TypeTracker {
324324
*/
325325
TypeTracker end() { result.end() }
326326
}
327+
328+
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalAttributeName attr)
329+
330+
/**
331+
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
332+
*
333+
* This can for example be used to track callbacks that are passed to a certain API,
334+
* so we can model specific parameters of that callback as having a certain type.
335+
*
336+
* Note that type back-tracking does not provide a source/sink relation, that is,
337+
* it may determine that a node will be used in an API call somewhere, but it won't
338+
* determine exactly where that use was, or the path that led to the use.
339+
*
340+
* It is recommended that all uses of this type are written in the following form,
341+
* for back-tracking some callback type `myCallback`:
342+
*
343+
* ```
344+
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
345+
* t.start() and
346+
* result = (< some API call >).getArgument(< n >).getALocalSource()
347+
* or
348+
* exists (DataFlow::TypeBackTracker t2 |
349+
* result = myCallback(t2).backtrack(t2, t)
350+
* )
351+
* }
352+
*
353+
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
354+
* ```
355+
*
356+
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
357+
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
358+
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
359+
*/
360+
class TypeBackTracker extends TTypeBackTracker {
361+
Boolean hasReturn;
362+
string attr;
363+
364+
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, attr) }
365+
366+
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
367+
TypeBackTracker prepend(StepSummary step) {
368+
step = LevelStep() and result = this
369+
or
370+
step = CallStep() and hasReturn = false and result = this
371+
or
372+
step = ReturnStep() and result = MkTypeBackTracker(true, attr)
373+
or
374+
exists(string p | step = LoadStep(p) and attr = "" and result = MkTypeBackTracker(hasReturn, p))
375+
or
376+
step = StoreStep(attr) and result = MkTypeBackTracker(hasReturn, "")
377+
}
378+
379+
/** Gets a textual representation of this summary. */
380+
string toString() {
381+
exists(string withReturn, string withAttr |
382+
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
383+
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
384+
result = "type back-tracker " + withReturn + " return steps" + withAttr
385+
)
386+
}
387+
388+
/**
389+
* Holds if this is the starting point of type tracking.
390+
*/
391+
predicate start() { hasReturn = false and attr = "" }
392+
393+
/**
394+
* Holds if this is the end point of type tracking.
395+
*/
396+
predicate end() { attr = "" }
397+
398+
/**
399+
* INTERNAL. DO NOT USE.
400+
*
401+
* Holds if this type has been back-tracked into a call through return edge.
402+
*/
403+
boolean hasReturn() { result = hasReturn }
404+
405+
/**
406+
* Gets a type tracker that starts where this one has left off to allow continued
407+
* tracking.
408+
*
409+
* This predicate is only defined if the type has not been tracked into an attribute.
410+
*/
411+
TypeBackTracker continue() { attr = "" and result = this }
412+
413+
/**
414+
* Gets the summary that corresponds to having taken a backwards
415+
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
416+
*/
417+
pragma[inline]
418+
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
419+
exists(StepSummary summary |
420+
StepSummary::step(nodeFrom, nodeTo, summary) and
421+
this = result.prepend(summary)
422+
)
423+
}
424+
425+
/**
426+
* Gets the summary that corresponds to having taken a backwards
427+
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
428+
*
429+
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
430+
* in the flowgraph, and not just the edges between
431+
* `LocalSourceNode`s. It may therefore be less performant.
432+
*
433+
* Type tracking predicates using small steps typically take the following form:
434+
* ```ql
435+
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
436+
* t.start() and
437+
* result = < some API call >.getArgument(< n >)
438+
* or
439+
* exists (DataFlow::TypeBackTracker t2 |
440+
* t = t2.smallstep(result, myType(t2))
441+
* )
442+
* }
443+
*
444+
* DataFlow::Node myType() {
445+
* result = myType(DataFlow::TypeBackTracker::end())
446+
* }
447+
* ```
448+
*/
449+
pragma[inline]
450+
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
451+
exists(StepSummary summary |
452+
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
453+
this = result.prepend(summary)
454+
)
455+
or
456+
typePreservingStep(nodeFrom, nodeTo) and
457+
this = result
458+
}
459+
}
460+
461+
/** Provides predicates for implementing custom `TypeBackTracker`s. */
462+
module TypeBackTracker {
463+
/**
464+
* Gets a valid end point of type back-tracking.
465+
*/
466+
TypeBackTracker end() { result.end() }
467+
}

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,19 @@ class Node extends TNode {
125125
*/
126126
pragma[inline]
127127
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
128+
129+
/**
130+
* Gets a node that may flow into this one using one heap and/or interprocedural step.
131+
*
132+
* See `TypeBackTracker` for more details about how to use this.
133+
*/
134+
pragma[inline]
135+
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
136+
137+
/**
138+
* Gets a local source node from which data may flow to this node in zero or more local steps.
139+
*/
140+
LocalSourceNode getALocalSource() { result.flowsTo(this) }
128141
}
129142

130143
/** A data-flow node corresponding to an SSA variable. */

0 commit comments

Comments
 (0)