Skip to content

Commit 3149517

Browse files
committed
PS: Add shared files for API graphs.
1 parent 5852fe4 commit 3149517

File tree

2 files changed

+397
-0
lines changed

2 files changed

+397
-0
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/**
2+
* Defines extensible predicates for contributing library models from data extensions.
3+
*/
4+
5+
/**
6+
* Holds if the value at `(type, path)` should be seen as a flow
7+
* source of the given `kind`.
8+
*
9+
* The kind `remote` represents a general remote flow source.
10+
*/
11+
extensible predicate sourceModel(
12+
string type, string path, string kind, QlBuiltins::ExtensionId madId
13+
);
14+
15+
/**
16+
* Holds if the value at `(type, path)` should be seen as a sink
17+
* of the given `kind`.
18+
*/
19+
extensible predicate sinkModel(string type, string path, string kind, QlBuiltins::ExtensionId madId);
20+
21+
/**
22+
* Holds if in calls to `(type, path)`, the value referred to by `input`
23+
* can flow to the value referred to by `output`.
24+
*
25+
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
26+
* respectively.
27+
*/
28+
extensible predicate summaryModel(
29+
string type, string path, string input, string output, string kind, QlBuiltins::ExtensionId madId
30+
);
31+
32+
/**
33+
* Holds if calls to `(type, path)` should be considered neutral. The meaning of this depends on the `kind`.
34+
* If `kind` is `summary`, the call does not propagate data flow. If `kind` is `source`, the call is not a source.
35+
* If `kind` is `sink`, the call is not a sink.
36+
*/
37+
extensible predicate neutralModel(string type, string path, string kind);
38+
39+
/**
40+
* Holds if `(type2, path)` should be seen as an instance of `type1`.
41+
*/
42+
extensible predicate typeModel(string type1, string type2, string path);
43+
44+
/**
45+
* Holds if `path` can be substituted for a token `TypeVar[name]`.
46+
*/
47+
extensible predicate typeVariableModel(string name, string path);
48+
49+
/**
50+
* Holds if the given extension tuple `madId` should pretty-print as `model`.
51+
*
52+
* This predicate should only be used in tests.
53+
*/
54+
predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) {
55+
exists(string type, string path, string kind |
56+
sourceModel(type, path, kind, madId) and
57+
model = "Source: " + type + "; " + path + "; " + kind
58+
)
59+
or
60+
exists(string type, string path, string kind |
61+
sinkModel(type, path, kind, madId) and
62+
model = "Sink: " + type + "; " + path + "; " + kind
63+
)
64+
or
65+
exists(string type, string path, string input, string output, string kind |
66+
summaryModel(type, path, input, output, kind, madId) and
67+
model = "Summary: " + type + "; " + path + "; " + input + "; " + output + "; " + kind
68+
)
69+
}
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
/**
2+
* Parts of API graphs that can be shared with other dynamic languages.
3+
*
4+
* Depends on TypeTrackerSpecific for the corresponding language.
5+
*/
6+
7+
private import codeql.util.Location
8+
private import semmle.code.powershell.dataflow.DataFlow
9+
private import semmle.code.powershell.typetracking.internal.TypeTrackingImpl
10+
11+
/**
12+
* The signature to use when instantiating `ApiGraphShared`.
13+
*
14+
* The implementor should define a newtype with at least three branches as follows:
15+
* ```ql
16+
* newtype TApiNode =
17+
* MkForwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or
18+
* MkBackwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or
19+
* MkSinkNode(Node node) { ... } or
20+
* ...
21+
* ```
22+
*
23+
* The three branches should be exposed through `getForwardNode`, `getBackwardNode`, and `getSinkNode`, respectively.
24+
*/
25+
signature module ApiGraphSharedSig {
26+
/** A node in the API graph. */
27+
class ApiNode {
28+
/** Gets a string representation of this API node. */
29+
string toString();
30+
31+
/** Gets the location associated with this API node, if any. */
32+
Location getLocation();
33+
}
34+
35+
/**
36+
* Gets the forward node with the given type-tracking state.
37+
*
38+
* This node will have outgoing epsilon edges to its type-tracking successors.
39+
*/
40+
ApiNode getForwardNode(DataFlow::LocalSourceNode node, TypeTracker t);
41+
42+
/**
43+
* Gets the backward node with the given type-tracking state.
44+
*
45+
* This node will have outgoing epsilon edges to its type-tracking predecessors.
46+
*/
47+
ApiNode getBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t);
48+
49+
/**
50+
* Gets the sink node corresponding to `node`.
51+
*
52+
* Since sinks are not generally `LocalSourceNode`s, such nodes are materialised separately in order for
53+
* the API graph to include representatives for sinks. Note that there is no corresponding case for "source"
54+
* nodes as these are represented as forward nodes with initial-state type-trackers.
55+
*
56+
* Sink nodes have outgoing epsilon edges to the backward nodes corresponding to their local sources.
57+
*/
58+
ApiNode getSinkNode(DataFlow::Node node);
59+
60+
/**
61+
* Holds if a language-specific epsilon edge `pred -> succ` should be generated.
62+
*/
63+
predicate specificEpsilonEdge(ApiNode pred, ApiNode succ);
64+
}
65+
66+
/**
67+
* Parts of API graphs that can be shared between language implementations.
68+
*/
69+
module ApiGraphShared<ApiGraphSharedSig S> {
70+
private import S
71+
72+
/** Gets a local source of `node`. */
73+
bindingset[node]
74+
pragma[inline_late]
75+
DataFlow::LocalSourceNode getALocalSourceStrict(DataFlow::Node node) {
76+
result = node.getALocalSource()
77+
}
78+
79+
cached
80+
private module Cached {
81+
/**
82+
* Holds if there is an epsilon edge `pred -> succ`.
83+
*
84+
* That relation is reflexive, so `fastTC` produces the equivalent of a reflexive, transitive closure.
85+
*/
86+
pragma[noopt]
87+
cached
88+
predicate epsilonEdge(ApiNode pred, ApiNode succ) {
89+
exists(
90+
StepSummary summary, DataFlow::LocalSourceNode predNode, TypeTracker predState,
91+
DataFlow::LocalSourceNode succNode, TypeTracker succState
92+
|
93+
step(predNode, succNode, summary)
94+
|
95+
pred = getForwardNode(predNode, predState) and
96+
succState = append(predState, summary) and
97+
succ = getForwardNode(succNode, succState)
98+
or
99+
succ = getBackwardNode(predNode, predState) and // swap order for backward flow
100+
succState = append(predState, summary) and
101+
pred = getBackwardNode(succNode, succState) // swap order for backward flow
102+
)
103+
or
104+
exists(DataFlow::Node sink, DataFlow::LocalSourceNode localSource |
105+
pred = getSinkNode(sink) and
106+
localSource = getALocalSourceStrict(sink) and
107+
succ = getBackwardStartNode(localSource)
108+
)
109+
or
110+
specificEpsilonEdge(pred, succ)
111+
or
112+
succ instanceof ApiNode and
113+
succ = pred
114+
}
115+
116+
/**
117+
* Holds if `pred` can reach `succ` by zero or more epsilon edges.
118+
*/
119+
cached
120+
predicate epsilonStar(ApiNode pred, ApiNode succ) = fastTC(epsilonEdge/2)(pred, succ)
121+
122+
/** Gets the API node to use when starting forward flow from `source` */
123+
cached
124+
ApiNode forwardStartNode(DataFlow::LocalSourceNode source) {
125+
result = getForwardNode(source, noContentTypeTracker(false))
126+
}
127+
128+
/** Gets the API node to use when starting backward flow from `sink` */
129+
cached
130+
ApiNode backwardStartNode(DataFlow::LocalSourceNode sink) {
131+
// There is backward flow A->B iff there is forward flow B->A.
132+
// The starting point of backward flow corresponds to the end of a forward flow, and vice versa.
133+
result = getBackwardNode(sink, noContentTypeTracker(_))
134+
}
135+
136+
/** Gets `node` as a data flow source. */
137+
cached
138+
DataFlow::LocalSourceNode asSourceCached(ApiNode node) { node = forwardEndNode(result) }
139+
140+
/** Gets `node` as a data flow sink. */
141+
cached
142+
DataFlow::Node asSinkCached(ApiNode node) { node = getSinkNode(result) }
143+
}
144+
145+
private import Cached
146+
147+
/** Gets an API node corresponding to the end of forward-tracking to `localSource`. */
148+
pragma[nomagic]
149+
private ApiNode forwardEndNode(DataFlow::LocalSourceNode localSource) {
150+
result = getForwardNode(localSource, noContentTypeTracker(_))
151+
}
152+
153+
/** Gets an API node corresponding to the end of backtracking to `localSource`. */
154+
pragma[nomagic]
155+
private ApiNode backwardEndNode(DataFlow::LocalSourceNode localSource) {
156+
result = getBackwardNode(localSource, noContentTypeTracker(false))
157+
}
158+
159+
/** Gets a node reachable from `node` by zero or more epsilon edges, including `node` itself. */
160+
bindingset[node]
161+
pragma[inline_late]
162+
ApiNode getAnEpsilonSuccessorInline(ApiNode node) { epsilonStar(node, result) }
163+
164+
/** Gets `node` as a data flow sink. */
165+
bindingset[node]
166+
pragma[inline_late]
167+
DataFlow::Node asSinkInline(ApiNode node) { result = asSinkCached(node) }
168+
169+
/** Gets `node` as a data flow source. */
170+
bindingset[node]
171+
pragma[inline_late]
172+
DataFlow::LocalSourceNode asSourceInline(ApiNode node) { result = asSourceCached(node) }
173+
174+
/** Gets a value reachable from `source`. */
175+
bindingset[source]
176+
pragma[inline_late]
177+
DataFlow::Node getAValueReachableFromSourceInline(ApiNode source) {
178+
exists(DataFlow::LocalSourceNode src |
179+
src = asSourceInline(getAnEpsilonSuccessorInline(source)) and
180+
src.flowsTo(pragma[only_bind_into](result))
181+
)
182+
}
183+
184+
/** Gets a value that can reach `sink`. */
185+
bindingset[sink]
186+
pragma[inline_late]
187+
DataFlow::Node getAValueReachingSinkInline(ApiNode sink) {
188+
backwardStartNode(result) = getAnEpsilonSuccessorInline(sink)
189+
}
190+
191+
/**
192+
* Gets the starting point for forward-tracking at `node`.
193+
*
194+
* Should be used to obtain the successor of an edge when constructing labelled edges.
195+
*/
196+
bindingset[node]
197+
pragma[inline_late]
198+
ApiNode getForwardStartNode(DataFlow::Node node) { result = forwardStartNode(node) }
199+
200+
/**
201+
* Gets the starting point of backtracking from `node`.
202+
*
203+
* Should be used to obtain the successor of an edge when constructing labelled edges.
204+
*/
205+
bindingset[node]
206+
pragma[inline_late]
207+
ApiNode getBackwardStartNode(DataFlow::Node node) { result = backwardStartNode(node) }
208+
209+
/**
210+
* Gets a possible ending point of forward-tracking at `node`.
211+
*
212+
* Should be used to obtain the predecessor of an edge when constructing labelled edges.
213+
*
214+
* This is not backed by a `cached` predicate, and should only be used for materialising `cached`
215+
* predicates in the API graph implementation - it should not be called in later stages.
216+
*/
217+
bindingset[node]
218+
pragma[inline_late]
219+
ApiNode getForwardEndNode(DataFlow::Node node) { result = forwardEndNode(node) }
220+
221+
/**
222+
* Gets a possible ending point backtracking to `node`.
223+
*
224+
* Should be used to obtain the predecessor of an edge when constructing labelled edges.
225+
*
226+
* This is not backed by a `cached` predicate, and should only be used for materialising `cached`
227+
* predicates in the API graph implementation - it should not be called in later stages.
228+
*/
229+
bindingset[node]
230+
pragma[inline_late]
231+
ApiNode getBackwardEndNode(DataFlow::Node node) { result = backwardEndNode(node) }
232+
233+
/**
234+
* Gets a possible eding point of forward or backward tracking at `node`.
235+
*
236+
* Should be used to obtain the predecessor of an edge generated from store or load edges.
237+
*/
238+
bindingset[node]
239+
pragma[inline_late]
240+
ApiNode getForwardOrBackwardEndNode(DataFlow::Node node) {
241+
result = getForwardEndNode(node) or result = getBackwardEndNode(node)
242+
}
243+
244+
/** Gets an API node for tracking forward starting at `node`. This is the implementation of `DataFlow::LocalSourceNode.track()` */
245+
bindingset[node]
246+
pragma[inline_late]
247+
ApiNode getNodeForForwardTracking(DataFlow::Node node) { result = forwardStartNode(node) }
248+
249+
/** Gets an API node for backtracking starting at `node`. The implementation of `DataFlow::Node.backtrack()`. */
250+
bindingset[node]
251+
pragma[inline_late]
252+
ApiNode getNodeForBacktracking(DataFlow::Node node) {
253+
result = getBackwardStartNode(getALocalSourceStrict(node))
254+
}
255+
256+
/** Parts of the shared module to be re-exported by the user-facing `API` module. */
257+
module Public {
258+
/**
259+
* The signature to use when instantiating the `ExplainFlow` module.
260+
*/
261+
signature module ExplainFlowSig {
262+
/** Holds if `node` should be a source. */
263+
predicate isSource(ApiNode node);
264+
265+
/** Holds if `node` should be a sink. */
266+
default predicate isSink(ApiNode node) { any() }
267+
268+
/** Holds if `node` should be skipped in the generated paths. */
269+
default predicate isHidden(ApiNode node) { none() }
270+
}
271+
272+
/**
273+
* Module to help debug and visualize the data flows underlying API graphs.
274+
*
275+
* This module exports the query predicates for a path-problem query, and should be imported
276+
* into the top-level of such a query.
277+
*
278+
* The module argument should specify source and sink API nodes, and the resulting query
279+
* will show paths of epsilon edges that go from a source to a sink. Only epsilon edges are visualized.
280+
*
281+
* To condense the output a bit, paths in which the source and sink are the same node are omitted.
282+
*/
283+
module ExplainFlow<ExplainFlowSig T> {
284+
private import T
285+
286+
private ApiNode relevantNode() {
287+
isSink(result) and
288+
result = getAnEpsilonSuccessorInline(any(ApiNode node | isSource(node)))
289+
or
290+
epsilonEdge(result, relevantNode())
291+
}
292+
293+
/** Holds if `node` is part of the graph to visualize. */
294+
query predicate nodes(ApiNode node) { node = relevantNode() and not isHidden(node) }
295+
296+
private predicate edgeToHiddenNode(ApiNode pred, ApiNode succ) {
297+
epsilonEdge(pred, succ) and
298+
isHidden(succ) and
299+
pred = relevantNode() and
300+
succ = relevantNode()
301+
}
302+
303+
/** Holds if `pred -> succ` is an edge in the graph to visualize. */
304+
query predicate edges(ApiNode pred, ApiNode succ) {
305+
nodes(pred) and
306+
nodes(succ) and
307+
exists(ApiNode mid |
308+
edgeToHiddenNode*(pred, mid) and
309+
epsilonEdge(mid, succ)
310+
)
311+
}
312+
313+
/** Holds for each source/sink pair to visualize in the graph. */
314+
query predicate problems(
315+
ApiNode location, ApiNode sourceNode, ApiNode sinkNode, string message
316+
) {
317+
nodes(sourceNode) and
318+
nodes(sinkNode) and
319+
isSource(sourceNode) and
320+
isSink(sinkNode) and
321+
sinkNode = getAnEpsilonSuccessorInline(sourceNode) and
322+
sourceNode != sinkNode and
323+
location = sinkNode and
324+
message = "Node flows here"
325+
}
326+
}
327+
}
328+
}

0 commit comments

Comments
 (0)