Skip to content

Commit f5b2eb9

Browse files
authored
Merge pull request github#10783 from yoff/python/subscript-nodes
Python: API graph improvements for subscripts
2 parents 0281bfe + 2a56fb5 commit f5b2eb9

File tree

11 files changed

+187
-103
lines changed

11 files changed

+187
-103
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Fixed labels in the API graph pertaining to definitions of subscripts. Previously, these were found by `getMember` rather than `getASubscript`.
5+
* Added edges for indices of subscripts to the API graph. Now a subscripted API node will have an edge to the API node for the index expression. So if `foo` is matched by API node `A`, then `"key"` in `foo["key"]` will be matched by the API node `A.getIndex()`. This can be used to track the origin of the index.
6+
* Added member predicate `getSubscriptAt(API::Node index)` to `API::Node`. Like `getASubscript()`, this will return an API node that matches a subscript of the node, but here it will be restricted to subscripts where the index matches the `index` parameter.
7+
* Added convenience predicate `getSubscript("key")` to obtain a subscript at a specific index, when the index happens to be a statically known string.

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 104 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,60 @@ module API {
249249
*/
250250
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
251251

252+
/**
253+
* Gets a node representing an index of a subscript of this node.
254+
* For example, in `obj[x]`, `x` is an index of `obj`.
255+
*/
256+
Node getIndex() { result = this.getASuccessor(Label::index()) }
257+
258+
/**
259+
* Gets a node representing a subscript of this node at (string) index `key`.
260+
* This requires that the index can be statically determined.
261+
*
262+
* For example, the subscripts of `a` and `b` below would be found using
263+
* the index `foo`:
264+
* ```py
265+
* a["foo"]
266+
* x = "foo" if cond else "bar"
267+
* b[x]
268+
* ```
269+
*/
270+
Node getSubscript(string key) {
271+
exists(API::Node index | result = this.getSubscriptAt(index) |
272+
key = index.getAValueReachingSink().asExpr().(PY::StrConst).getText()
273+
)
274+
}
275+
276+
/**
277+
* Gets a node representing a subscript of this node at index `index`.
278+
*/
279+
Node getSubscriptAt(API::Node index) {
280+
result = this.getASubscript() and
281+
index = this.getIndex() and
282+
(
283+
// subscripting
284+
exists(PY::SubscriptNode subscript |
285+
subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and
286+
subscript.getIndex() = index.asSink().asCfgNode()
287+
|
288+
// reading
289+
subscript = result.asSource().asCfgNode()
290+
or
291+
// writing
292+
subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode()
293+
)
294+
or
295+
// dictionary literals
296+
exists(PY::Dict dict, PY::KeyValuePair item |
297+
dict = this.getAValueReachingSink().asExpr() and
298+
dict.getItem(_) = item and
299+
item.getKey() = index.asSink().asExpr()
300+
|
301+
item.getValue() = result.asSink().asExpr()
302+
)
303+
)
304+
}
305+
252306
/**
253307
* Gets a string representation of the lexicographically least among all shortest access paths
254308
* from the root to this node.
@@ -405,7 +459,7 @@ module API {
405459
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
406460

407461
/**
408-
* An `CallCfgNode` that is connected to the API graph.
462+
* A `CallCfgNode` that is connected to the API graph.
409463
*
410464
* Can be used to reason about calls to an external API in which the correlation between
411465
* parameters and/or return values must be retained.
@@ -694,12 +748,31 @@ module API {
694748
rhs = aw.getValue()
695749
)
696750
or
697-
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
751+
// dictionary literals
698752
exists(PY::Dict dict, PY::KeyValuePair item |
699753
dict = pred.(DataFlow::ExprNode).getNode().getNode() and
700-
dict.getItem(_) = item and
701-
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
702-
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue()
754+
dict.getItem(_) = item
755+
|
756+
// from `x` to `{ "key": x }`
757+
// TODO: once convenient, this should be done at a higher level than the AST,
758+
// at least at the CFG layer, to take splitting into account.
759+
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue() and
760+
lbl = Label::subscript()
761+
or
762+
// from `"key"` to `{ "key": x }`
763+
// TODO: once convenient, this should be done at a higher level than the AST,
764+
// at least at the CFG layer, to take splitting into account.
765+
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getKey() and
766+
lbl = Label::index()
767+
)
768+
or
769+
// list literals, from `x` to `[x]`
770+
// TODO: once convenient, this should be done at a higher level than the AST,
771+
// at least at the CFG layer, to take splitting into account.
772+
// Also consider `SequenceNode for generality.
773+
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
774+
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
775+
lbl = Label::subscript()
703776
)
704777
or
705778
exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() |
@@ -720,6 +793,20 @@ module API {
720793
lbl = Label::memberFromRef(aw)
721794
)
722795
or
796+
// subscripting
797+
exists(DataFlow::LocalSourceNode src, DataFlow::Node subscript, DataFlow::Node index |
798+
use(base, src) and
799+
subscript = trackUseNode(src).getSubscript(index)
800+
|
801+
// from `x` to a definition of `x[...]`
802+
rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and
803+
lbl = Label::subscript()
804+
or
805+
// from `x` to `"key"` in `x["key"]`
806+
rhs = index and
807+
lbl = Label::index()
808+
)
809+
or
723810
exists(EntryPoint entry |
724811
base = root() and
725812
lbl = Label::entryPoint(entry) and
@@ -757,7 +844,8 @@ module API {
757844
or
758845
// Subscripting a node that is a use of `base`
759846
lbl = Label::subscript() and
760-
ref = pred.getASubscript()
847+
ref = pred.getSubscript(_) and
848+
ref.asCfgNode().isLoad()
761849
or
762850
// Subclassing a node
763851
lbl = Label::subclass() and
@@ -973,8 +1061,7 @@ module API {
9731061
member = any(DataFlow::AttrRef pr).getAttributeName() or
9741062
exists(Builtins::likelyBuiltin(member)) or
9751063
ImportStar::namePossiblyDefinedInImportStar(_, member, _) or
976-
Impl::prefix_member(_, member, _) or
977-
member = any(PY::Dict d).getAnItem().(PY::KeyValuePair).getKey().(PY::StrConst).getS()
1064+
Impl::prefix_member(_, member, _)
9781065
} or
9791066
MkLabelUnknownMember() or
9801067
MkLabelParameter(int i) {
@@ -992,6 +1079,7 @@ module API {
9921079
MkLabelSubclass() or
9931080
MkLabelAwait() or
9941081
MkLabelSubscript() or
1082+
MkLabelIndex() or
9951083
MkLabelEntryPoint(EntryPoint ep)
9961084

9971085
/** A label for a module. */
@@ -1072,6 +1160,11 @@ module API {
10721160
override string toString() { result = "getASubscript()" }
10731161
}
10741162

1163+
/** A label that gets the index of a subscript. */
1164+
class LabelIndex extends ApiLabel, MkLabelIndex {
1165+
override string toString() { result = "getIndex()" }
1166+
}
1167+
10751168
/** A label for entry points. */
10761169
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
10771170
private EntryPoint entry;
@@ -1120,6 +1213,9 @@ module API {
11201213
/** Gets the `subscript` edge label. */
11211214
LabelSubscript subscript() { any() }
11221215

1216+
/** Gets the `subscript` edge label. */
1217+
LabelIndex index() { any() }
1218+
11231219
/** Gets the label going from the root node to the nodes associated with the given entry point. */
11241220
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
11251221
}

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class LocalSourceNode extends Node {
104104
/**
105105
* Gets a subscript of this node.
106106
*/
107-
Node getASubscript() { Cached::subscript(this, result) }
107+
Node getSubscript(Node index) { Cached::subscript(this, result, index) }
108108

109109
/**
110110
* Gets a call to the method `methodName` on this node.
@@ -249,13 +249,14 @@ private module Cached {
249249
}
250250

251251
/**
252-
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
252+
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript with index/key `index`.
253253
*/
254254
cached
255-
predicate subscript(LocalSourceNode node, CfgNode subscript) {
255+
predicate subscript(LocalSourceNode node, CfgNode subscript, CfgNode index) {
256256
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
257257
node.flowsTo(seq) and
258-
seq.getNode() = subscriptNode.getObject()
258+
seq.getNode() = subscriptNode.getObject() and
259+
index.getNode() = subscriptNode.getIndex()
259260
)
260261
}
261262
}

python/ql/lib/semmle/python/frameworks/Aiohttp.qll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -621,15 +621,12 @@ module AiohttpWebModel {
621621
DataFlow::Node value;
622622

623623
AiohttpResponseCookieSubscriptWrite() {
624-
exists(SubscriptNode subscript |
624+
exists(API::Node i |
625+
value = aiohttpResponseInstance().getMember("cookies").getSubscriptAt(i).asSink() and
626+
index = i.asSink() and
625627
// To give `this` a value, we need to choose between either LHS or RHS,
626-
// and just go with the LHS
627-
this.asCfgNode() = subscript
628-
|
629-
subscript.getObject() =
630-
aiohttpResponseInstance().getMember("cookies").getAValueReachableFromSource().asCfgNode() and
631-
value.asCfgNode() = subscript.(DefinitionNode).getValue() and
632-
index.asCfgNode() = subscript.getIndex()
628+
// and just go with the RHS as it is readily available
629+
this = value
633630
)
634631
}
635632

python/ql/src/experimental/semmle/python/frameworks/Django.qll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,10 @@ private module ExperimentalPrivateDjango {
9191
result = baseClassRef().getReturn().getAMember()
9292
}
9393

94-
/** Gets a reference to a header instance call with `__setitem__`. */
95-
API::Node headerSetItem() {
96-
result = headerInstance() and
97-
result.asSource().(DataFlow::AttrRead).getAttributeName() = "__setitem__"
98-
}
99-
10094
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
101-
DjangoResponseSetItemCall() { this = headerSetItem().getACall() }
95+
DjangoResponseSetItemCall() {
96+
this = baseClassRef().getReturn().getMember("__setitem__").getACall()
97+
}
10298

10399
override DataFlow::Node getNameArg() { result = this.getArg(0) }
104100

@@ -109,8 +105,7 @@ private module ExperimentalPrivateDjango {
109105
DataFlow::Node headerInput;
110106

111107
DjangoResponseDefinition() {
112-
this.asCfgNode().(DefinitionNode) =
113-
headerInstance().getAValueReachableFromSource().asCfgNode() and
108+
headerInput = headerInstance().asSink() and
114109
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
115110
}
116111

python/ql/src/experimental/semmle/python/frameworks/Sendgrid.qll

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ private module Sendgrid {
2626
}
2727

2828
/** Gets a reference to a `SendGridAPIClient` instance call with `send` or `post`. */
29-
private DataFlow::CallCfgNode sendgridApiSendCall() {
29+
private API::CallNode sendgridApiSendCall() {
3030
result = sendgridApiClient().getMember("send").getACall()
3131
or
3232
result =
@@ -62,7 +62,7 @@ private module Sendgrid {
6262
* * `getFrom()`'s result would be `"[email protected]"`.
6363
* * `getSubject()`'s result would be `"Sending with SendGrid is Fun"`.
6464
*/
65-
private class SendGridMail extends DataFlow::CallCfgNode, EmailSender::Range {
65+
private class SendGridMail extends API::CallNode, EmailSender::Range {
6666
SendGridMail() { this = sendgridApiSendCall() }
6767

6868
private DataFlow::CallCfgNode getMailCall() {
@@ -118,40 +118,28 @@ private module Sendgrid {
118118
or
119119
result = this.sendgridWrite("html_content")
120120
or
121-
exists(KeyValuePair content, Dict generalDict, KeyValuePair typePair, KeyValuePair valuePair |
122-
content.getKey().(StrConst).getText() = "content" and
123-
content.getValue().(List).getAnElt() = generalDict and
124-
// declare KeyValuePairs keys and values
125-
typePair.getKey().(StrConst).getText() = "type" and
126-
typePair.getValue().(StrConst).getText() = ["text/html", "text/x-amp-html"] and
127-
valuePair.getKey().(StrConst).getText() = "value" and
128-
result.asExpr() = valuePair.getValue() and
129-
// correlate generalDict with previously set KeyValuePairs
130-
generalDict.getAnItem() in [typePair, valuePair] and
131-
[this.getArg(0), this.getArgByName("request_body")].getALocalSource().asExpr() =
132-
any(Dict d | d.getAnItem() = content)
121+
exists(API::Node contentElement |
122+
contentElement =
123+
this.getKeywordParameter("request_body").getSubscript("content").getASubscript()
124+
|
125+
contentElement.getSubscript("type").getAValueReachingSink().asExpr().(StrConst).getText() =
126+
["text/html", "text/x-amp-html"] and
127+
result = contentElement.getSubscript("value").getAValueReachingSink()
133128
)
134129
or
135-
exists(KeyValuePair footer, Dict generalDict, KeyValuePair enablePair, KeyValuePair htmlPair |
136-
footer.getKey().(StrConst).getText() = ["footer", "subscription_tracking"] and
137-
footer.getValue() = generalDict and
138-
// check footer is enabled
139-
enablePair.getKey().(StrConst).getText() = "enable" and
140-
exists(enablePair.getValue().(True)) and
141-
// get html content
142-
htmlPair.getKey().(StrConst).getText() = "html" and
143-
result.asExpr() = htmlPair.getValue() and
144-
// correlate generalDict with previously set KeyValuePairs
145-
generalDict.getAnItem() in [enablePair, htmlPair] and
146-
exists(KeyValuePair k |
147-
k.getKey() =
148-
[this.getArg(0), this.getArgByName("request_body")]
149-
.getALocalSource()
150-
.asExpr()
151-
.(Dict)
152-
.getAKey() and
153-
k.getValue() = any(Dict d | d.getAKey() = footer.getKey())
154-
)
130+
exists(API::Node html |
131+
html =
132+
this.getKeywordParameter("request_body")
133+
.getSubscript("tracking_settings")
134+
.getSubscript("subscription_tracking")
135+
or
136+
html =
137+
this.getKeywordParameter("request_body")
138+
.getSubscript("mail_settings")
139+
.getSubscript("footer")
140+
|
141+
html.getSubscript("enable").getAValueReachingSink().asExpr() instanceof True and
142+
result = html.getSubscript("html").getAValueReachingSink()
155143
)
156144
}
157145

0 commit comments

Comments
 (0)