Skip to content

Commit 9b80563

Browse files
committed
Python: Make the type tracking implementation shareable
1 parent b1d0b9a commit 9b80563

File tree

2 files changed

+488
-0
lines changed

2 files changed

+488
-0
lines changed
Lines changed: 393 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,393 @@
1+
/** Step Summaries and Type Tracking */
2+
3+
private import TypeTrackerPrivate
4+
5+
/** Any string that may appear as the name of a piece of content. */
6+
class ContentName extends string {
7+
ContentName() { this = getPossibleContentName() }
8+
}
9+
10+
/** Either a content name, or the empty string (representing no content). */
11+
class OptionalContentName extends string {
12+
OptionalContentName() { this instanceof ContentName or this = "" }
13+
}
14+
15+
/**
16+
* A description of a step on an inter-procedural data flow path.
17+
*/
18+
private newtype TStepSummary =
19+
LevelStep() or
20+
CallStep() or
21+
ReturnStep() or
22+
StoreStep(ContentName content) or
23+
LoadStep(ContentName content)
24+
25+
/**
26+
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
27+
*
28+
* A description of a step on an inter-procedural data flow path.
29+
*/
30+
class StepSummary extends TStepSummary {
31+
/** Gets a textual representation of this step summary. */
32+
string toString() {
33+
this instanceof LevelStep and result = "level"
34+
or
35+
this instanceof CallStep and result = "call"
36+
or
37+
this instanceof ReturnStep and result = "return"
38+
or
39+
exists(string content | this = StoreStep(content) | result = "store " + content)
40+
or
41+
exists(string content | this = LoadStep(content) | result = "load " + content)
42+
}
43+
}
44+
45+
/** Provides predicates for updating step summaries (`StepSummary`s). */
46+
module StepSummary {
47+
/**
48+
* Gets the summary that corresponds to having taken a forwards
49+
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
50+
*/
51+
cached
52+
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
53+
exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
54+
}
55+
56+
/**
57+
* Gets the summary that corresponds to having taken a forwards
58+
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
59+
*
60+
* Unlike `StepSummary::step`, this predicate does not compress
61+
* type-preserving steps.
62+
*/
63+
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
64+
typePreservingStep(nodeFrom, nodeTo) and
65+
summary = LevelStep()
66+
or
67+
callStep(nodeFrom, nodeTo) and summary = CallStep()
68+
or
69+
returnStep(nodeFrom, nodeTo) and
70+
summary = ReturnStep()
71+
or
72+
exists(string content |
73+
basicStoreStep(nodeFrom, nodeTo, content) and
74+
summary = StoreStep(content)
75+
or
76+
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
77+
)
78+
}
79+
}
80+
81+
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
82+
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
83+
simpleLocalFlowStep(nodeFrom, nodeTo) or
84+
jumpStep(nodeFrom, nodeTo)
85+
}
86+
87+
/**
88+
* A utility class that is equivalent to `boolean` but does not require type joining.
89+
*/
90+
private class Boolean extends boolean {
91+
Boolean() { this = true or this = false }
92+
}
93+
94+
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
95+
96+
/**
97+
* Summary of the steps needed to track a value to a given dataflow node.
98+
*
99+
* This can be used to track objects that implement a certain API in order to
100+
* recognize calls to that API. Note that type-tracking does not by itself provide a
101+
* source/sink relation, that is, it may determine that a node has a given type,
102+
* but it won't determine where that type came from.
103+
*
104+
* It is recommended that all uses of this type are written in the following form,
105+
* for tracking some type `myType`:
106+
* ```
107+
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
108+
* t.start() and
109+
* result = < source of myType >
110+
* or
111+
* exists (DataFlow::TypeTracker t2 |
112+
* result = myType(t2).track(t2, t)
113+
* )
114+
* }
115+
*
116+
* DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
117+
* ```
118+
*
119+
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
120+
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
121+
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
122+
*/
123+
class TypeTracker extends TTypeTracker {
124+
Boolean hasCall;
125+
OptionalContentName content;
126+
127+
TypeTracker() { this = MkTypeTracker(hasCall, content) }
128+
129+
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
130+
cached
131+
TypeTracker append(StepSummary step) {
132+
step = LevelStep() and result = this
133+
or
134+
step = CallStep() and result = MkTypeTracker(true, content)
135+
or
136+
step = ReturnStep() and hasCall = false and result = this
137+
or
138+
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
139+
or
140+
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
141+
}
142+
143+
/** Gets a textual representation of this summary. */
144+
string toString() {
145+
exists(string withCall, string withContent |
146+
(if hasCall = true then withCall = "with" else withCall = "without") and
147+
(if content != "" then withContent = " with content " + content else withContent = "") and
148+
result = "type tracker " + withCall + " call steps" + withContent
149+
)
150+
}
151+
152+
/**
153+
* Holds if this is the starting point of type tracking.
154+
*/
155+
predicate start() { hasCall = false and content = "" }
156+
157+
/**
158+
* Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
159+
* The type tracking only ends after the content has been loaded.
160+
*/
161+
predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
162+
163+
/**
164+
* Holds if this is the starting point of type tracking
165+
* when tracking a parameter into a call, but not out of it.
166+
*/
167+
predicate call() { hasCall = true and content = "" }
168+
169+
/**
170+
* Holds if this is the end point of type tracking.
171+
*/
172+
predicate end() { content = "" }
173+
174+
/**
175+
* INTERNAL. DO NOT USE.
176+
*
177+
* Holds if this type has been tracked into a call.
178+
*/
179+
boolean hasCall() { result = hasCall }
180+
181+
/**
182+
* INTERNAL. DO NOT USE.
183+
*
184+
* Gets the content associated with this type tracker.
185+
*/
186+
string getContent() { result = content }
187+
188+
/**
189+
* Gets a type tracker that starts where this one has left off to allow continued
190+
* tracking.
191+
*
192+
* This predicate is only defined if the type is not associated to a piece of content.
193+
*/
194+
TypeTracker continue() { content = "" and result = this }
195+
196+
/**
197+
* Gets the summary that corresponds to having taken a forwards
198+
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
199+
*/
200+
pragma[inline]
201+
TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
202+
exists(StepSummary summary |
203+
StepSummary::step(nodeFrom, nodeTo, summary) and
204+
result = this.append(summary)
205+
)
206+
}
207+
208+
/**
209+
* Gets the summary that corresponds to having taken a forwards
210+
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
211+
*
212+
* Unlike `TypeTracker::step`, this predicate exposes all edges
213+
* in the flow graph, and not just the edges between `Node`s.
214+
* It may therefore be less performant.
215+
*
216+
* Type tracking predicates using small steps typically take the following form:
217+
* ```ql
218+
* DataFlow::Node myType(DataFlow::TypeTracker t) {
219+
* t.start() and
220+
* result = < source of myType >
221+
* or
222+
* exists (DataFlow::TypeTracker t2 |
223+
* t = t2.smallstep(myType(t2), result)
224+
* )
225+
* }
226+
*
227+
* DataFlow::Node myType() {
228+
* result = myType(DataFlow::TypeTracker::end())
229+
* }
230+
* ```
231+
*/
232+
pragma[inline]
233+
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
234+
exists(StepSummary summary |
235+
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
236+
result = this.append(summary)
237+
)
238+
or
239+
typePreservingStep(nodeFrom, nodeTo) and
240+
result = this
241+
}
242+
}
243+
244+
/** Provides predicates for implementing custom `TypeTracker`s. */
245+
module TypeTracker {
246+
/**
247+
* Gets a valid end point of type tracking.
248+
*/
249+
TypeTracker end() { result.end() }
250+
}
251+
252+
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
253+
254+
/**
255+
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
256+
*
257+
* This can for example be used to track callbacks that are passed to a certain API,
258+
* so we can model specific parameters of that callback as having a certain type.
259+
*
260+
* Note that type back-tracking does not provide a source/sink relation, that is,
261+
* it may determine that a node will be used in an API call somewhere, but it won't
262+
* determine exactly where that use was, or the path that led to the use.
263+
*
264+
* It is recommended that all uses of this type are written in the following form,
265+
* for back-tracking some callback type `myCallback`:
266+
*
267+
* ```
268+
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
269+
* t.start() and
270+
* result = (< some API call >).getArgument(< n >).getALocalSource()
271+
* or
272+
* exists (DataFlow::TypeBackTracker t2 |
273+
* result = myCallback(t2).backtrack(t2, t)
274+
* )
275+
* }
276+
*
277+
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
278+
* ```
279+
*
280+
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
281+
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
282+
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
283+
*/
284+
class TypeBackTracker extends TTypeBackTracker {
285+
Boolean hasReturn;
286+
string content;
287+
288+
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
289+
290+
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
291+
TypeBackTracker prepend(StepSummary step) {
292+
step = LevelStep() and result = this
293+
or
294+
step = CallStep() and hasReturn = false and result = this
295+
or
296+
step = ReturnStep() and result = MkTypeBackTracker(true, content)
297+
or
298+
exists(string p |
299+
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
300+
)
301+
or
302+
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
303+
}
304+
305+
/** Gets a textual representation of this summary. */
306+
string toString() {
307+
exists(string withReturn, string withContent |
308+
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
309+
(if content != "" then withContent = " with content " + content else withContent = "") and
310+
result = "type back-tracker " + withReturn + " return steps" + withContent
311+
)
312+
}
313+
314+
/**
315+
* Holds if this is the starting point of type tracking.
316+
*/
317+
predicate start() { hasReturn = false and content = "" }
318+
319+
/**
320+
* Holds if this is the end point of type tracking.
321+
*/
322+
predicate end() { content = "" }
323+
324+
/**
325+
* INTERNAL. DO NOT USE.
326+
*
327+
* Holds if this type has been back-tracked into a call through return edge.
328+
*/
329+
boolean hasReturn() { result = hasReturn }
330+
331+
/**
332+
* Gets a type tracker that starts where this one has left off to allow continued
333+
* tracking.
334+
*
335+
* This predicate is only defined if the type has not been tracked into a piece of content.
336+
*/
337+
TypeBackTracker continue() { content = "" and result = this }
338+
339+
/**
340+
* Gets the summary that corresponds to having taken a backwards
341+
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
342+
*/
343+
pragma[inline]
344+
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
345+
exists(StepSummary summary |
346+
StepSummary::step(nodeFrom, nodeTo, summary) and
347+
this = result.prepend(summary)
348+
)
349+
}
350+
351+
/**
352+
* Gets the summary that corresponds to having taken a backwards
353+
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
354+
*
355+
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
356+
* in the flowgraph, and not just the edges between
357+
* `LocalSourceNode`s. It may therefore be less performant.
358+
*
359+
* Type tracking predicates using small steps typically take the following form:
360+
* ```ql
361+
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
362+
* t.start() and
363+
* result = < some API call >.getArgument(< n >)
364+
* or
365+
* exists (DataFlow::TypeBackTracker t2 |
366+
* t = t2.smallstep(result, myType(t2))
367+
* )
368+
* }
369+
*
370+
* DataFlow::Node myType() {
371+
* result = myType(DataFlow::TypeBackTracker::end())
372+
* }
373+
* ```
374+
*/
375+
pragma[inline]
376+
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
377+
exists(StepSummary summary |
378+
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
379+
this = result.prepend(summary)
380+
)
381+
or
382+
typePreservingStep(nodeFrom, nodeTo) and
383+
this = result
384+
}
385+
}
386+
387+
/** Provides predicates for implementing custom `TypeBackTracker`s. */
388+
module TypeBackTracker {
389+
/**
390+
* Gets a valid end point of type back-tracking.
391+
*/
392+
TypeBackTracker end() { result.end() }
393+
}

0 commit comments

Comments
 (0)