Skip to content

Commit 0b8e908

Browse files
committed
Python: fix def nodes for subscript
We were using `getMember` for dictionaries, these are now getIndex Also add convenience predicate for string keys
1 parent 99b9101 commit 0b8e908

File tree

8 files changed

+139
-29
lines changed

8 files changed

+139
-29
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Fixed labels in the API graph pertaining to definitions of subscripts. Previously, these were found by `getMember` rather than `getASubscript`.
5+
* Added edges for indices of subscripts to the API graph. Now a subscripted API node will have an edge to the API node for the index expression. So if `foo` is matched by API node `A`, then `"key"` in `foo["key"]` will be matched by the API node `A.getIndex()`. This can be used to track the origin of the index.
6+
* Added member predicate `getSubscriptAt(API::Node index)` to `API::Node`. Like `getASubscript()`, this will return an API node that matches a subscript of the node, but here it will be restircted to subscripts where the index matches the `index` parameter.
7+
* Added convenience predicate `getSubscript("key")` to obtain a subscript at a specific index, when the index happens to be a statically known string.

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,60 @@ module API {
249249
*/
250250
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
251251

252+
/**
253+
* Gets a node representing an index of a subscript of this node.
254+
* For example, in `obj[x]`, `x` is an index of `obj`.
255+
*/
256+
Node getIndex() { result = this.getASuccessor(Label::index()) }
257+
258+
/**
259+
* Gets a node representing a subscript of this node at (string) index `key`.
260+
* This requires that the index can be statically determined.
261+
*
262+
* For example, the subscripts of `a` and `b` below would be found using
263+
* the index `foo`:
264+
* ```py
265+
* a["foo"]
266+
* x = "foo" if cond else "bar"
267+
* b[x]
268+
* ```
269+
*/
270+
Node getSubscript(string key) {
271+
exists(API::Node index | result = this.getSubscriptAt(index) |
272+
key = index.getAValueReachingSink().asExpr().(PY::StrConst).getText()
273+
)
274+
}
275+
276+
/**
277+
* Gets a node representing a subscript of this node at index `index`.
278+
*/
279+
Node getSubscriptAt(API::Node index) {
280+
result = this.getASubscript() and
281+
index = this.getIndex() and
282+
(
283+
// subscripting
284+
exists(PY::SubscriptNode subscript |
285+
subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and
286+
subscript.getIndex() = index.asSink().asCfgNode()
287+
|
288+
// reading
289+
subscript = result.asSource().asCfgNode()
290+
or
291+
// writing
292+
subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode()
293+
)
294+
or
295+
// dictionary literals
296+
exists(PY::Dict dict, PY::KeyValuePair item |
297+
dict = this.getAValueReachingSink().asExpr() and
298+
dict.getItem(_) = item and
299+
item.getKey() = index.asSink().asExpr()
300+
|
301+
item.getValue() = result.asSink().asExpr()
302+
)
303+
)
304+
}
305+
252306
/**
253307
* Gets a string representation of the lexicographically least among all shortest access paths
254308
* from the root to this node.
@@ -405,7 +459,7 @@ module API {
405459
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
406460

407461
/**
408-
* An `CallCfgNode` that is connected to the API graph.
462+
* A `CallCfgNode` that is connected to the API graph.
409463
*
410464
* Can be used to reason about calls to an external API in which the correlation between
411465
* parameters and/or return values must be retained.
@@ -694,12 +748,24 @@ module API {
694748
rhs = aw.getValue()
695749
)
696750
or
697-
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
751+
// dictionary literals
698752
exists(PY::Dict dict, PY::KeyValuePair item |
699753
dict = pred.(DataFlow::ExprNode).getNode().getNode() and
700-
dict.getItem(_) = item and
701-
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
702-
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue()
754+
dict.getItem(_) = item
755+
|
756+
// from `x` to `{ "key": x }`
757+
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue() and
758+
lbl = Label::subscript()
759+
or
760+
// from `"key"` to `{ "key": x }`
761+
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getKey() and
762+
lbl = Label::index()
763+
)
764+
or
765+
// list literals, from `x` to `[x]`
766+
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
767+
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
768+
lbl = Label::subscript()
703769
)
704770
or
705771
exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() |
@@ -720,6 +786,20 @@ module API {
720786
lbl = Label::memberFromRef(aw)
721787
)
722788
or
789+
// subscripting
790+
exists(DataFlow::LocalSourceNode src, DataFlow::Node subscript, DataFlow::Node index |
791+
use(base, src) and
792+
subscript = trackUseNode(src).getSubscript(index)
793+
|
794+
// from `x` to a definition of `x[...]`
795+
rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and
796+
lbl = Label::subscript()
797+
or
798+
// from `x` to `"key"` in `x["key"]`
799+
rhs = index and
800+
lbl = Label::index()
801+
)
802+
or
723803
exists(EntryPoint entry |
724804
base = root() and
725805
lbl = Label::entryPoint(entry) and
@@ -757,7 +837,8 @@ module API {
757837
or
758838
// Subscripting a node that is a use of `base`
759839
lbl = Label::subscript() and
760-
ref = pred.getASubscript()
840+
ref = pred.getSubscript(_) and
841+
ref.asCfgNode().isLoad()
761842
or
762843
// Subclassing a node
763844
lbl = Label::subclass() and
@@ -973,8 +1054,7 @@ module API {
9731054
member = any(DataFlow::AttrRef pr).getAttributeName() or
9741055
exists(Builtins::likelyBuiltin(member)) or
9751056
ImportStar::namePossiblyDefinedInImportStar(_, member, _) or
976-
Impl::prefix_member(_, member, _) or
977-
member = any(PY::Dict d).getAnItem().(PY::KeyValuePair).getKey().(PY::StrConst).getS()
1057+
Impl::prefix_member(_, member, _)
9781058
} or
9791059
MkLabelUnknownMember() or
9801060
MkLabelParameter(int i) {
@@ -992,6 +1072,7 @@ module API {
9921072
MkLabelSubclass() or
9931073
MkLabelAwait() or
9941074
MkLabelSubscript() or
1075+
MkLabelIndex() or
9951076
MkLabelEntryPoint(EntryPoint ep)
9961077

9971078
/** A label for a module. */
@@ -1072,6 +1153,11 @@ module API {
10721153
override string toString() { result = "getASubscript()" }
10731154
}
10741155

1156+
/** A label that gets the index of a subscript. */
1157+
class LabelIndex extends ApiLabel, MkLabelIndex {
1158+
override string toString() { result = "getIndex()" }
1159+
}
1160+
10751161
/** A label for entry points. */
10761162
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
10771163
private EntryPoint entry;
@@ -1120,6 +1206,9 @@ module API {
11201206
/** Gets the `subscript` edge label. */
11211207
LabelSubscript subscript() { any() }
11221208

1209+
/** Gets the `subscript` edge label. */
1210+
LabelIndex index() { any() }
1211+
11231212
/** Gets the label going from the root node to the nodes associated with the given entry point. */
11241213
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
11251214
}

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class LocalSourceNode extends Node {
104104
/**
105105
* Gets a subscript of this node.
106106
*/
107-
Node getASubscript() { Cached::subscript(this, result) }
107+
Node getSubscript(Node index) { Cached::subscript(this, result, index) }
108108

109109
/**
110110
* Gets a call to the method `methodName` on this node.
@@ -249,13 +249,14 @@ private module Cached {
249249
}
250250

251251
/**
252-
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
252+
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript with index/key `index`.
253253
*/
254254
cached
255-
predicate subscript(LocalSourceNode node, CfgNode subscript) {
255+
predicate subscript(LocalSourceNode node, CfgNode subscript, CfgNode index) {
256256
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
257257
node.flowsTo(seq) and
258-
seq.getNode() = subscriptNode.getObject()
258+
seq.getNode() = subscriptNode.getObject() and
259+
index.getNode() = subscriptNode.getIndex()
259260
)
260261
}
261262
}

python/ql/src/experimental/semmle/python/frameworks/Django.qll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,10 @@ private module ExperimentalPrivateDjango {
9191
result = baseClassRef().getReturn().getAMember()
9292
}
9393

94-
/** Gets a reference to a header instance call with `__setitem__`. */
95-
API::Node headerSetItem() {
96-
result = headerInstance() and
97-
result.asSource().(DataFlow::AttrRead).getAttributeName() = "__setitem__"
98-
}
99-
10094
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
101-
DjangoResponseSetItemCall() { this = headerSetItem().getACall() }
95+
DjangoResponseSetItemCall() {
96+
this = baseClassRef().getReturn().getMember("__setitem__").getACall()
97+
}
10298

10399
override DataFlow::Node getNameArg() { result = this.getArg(0) }
104100

@@ -109,8 +105,7 @@ private module ExperimentalPrivateDjango {
109105
DataFlow::Node headerInput;
110106

111107
DjangoResponseDefinition() {
112-
this.asCfgNode().(DefinitionNode) =
113-
headerInstance().getAValueReachableFromSource().asCfgNode() and
108+
headerInput = headerInstance().asSink() and
114109
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
115110
}
116111

python/ql/test/library-tests/ApiGraphs/py3/deftest1.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ def callback(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("bar").
55

66
foo.bar(callback) #$ def=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("bar").getReturn()
77

8-
def callback2(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c").getParameter(0)
9-
x.baz2() #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c").getParameter(0).getMember("baz2").getReturn()
8+
def callback2(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript().getParameter(0)
9+
x.baz2() #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript().getParameter(0).getMember("baz2").getReturn()
1010

1111
mydict = {
12-
"c": callback2, #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c")
13-
"other": "whatever" #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("other")
12+
"c": callback2, #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript()
13+
"other": "whatever" #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript()
1414
}
1515

1616
foo.baz(mydict) #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("baz").getReturn()
@@ -34,11 +34,11 @@ def callback4(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("quack
3434

3535
foo.quack(otherDict.fourth) #$ def=moduleImport("mypkg").getMember("foo").getMember("quack").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("quack").getReturn()
3636

37-
def namedCallback(myName, otherName):
38-
# Using named parameters:
37+
def namedCallback(myName, otherName):
38+
# Using named parameters:
3939
myName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getKeywordParameter("myName").getReturn()
4040
otherName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getKeywordParameter("otherName").getReturn()
41-
# Using numbered parameters:
41+
# Using numbered parameters:
4242
myName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getParameter(0).getReturn()
4343
otherName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getParameter(1).getReturn()
4444

@@ -58,4 +58,4 @@ def recusisionCallback(x):
5858
recursiveDict.rec1 = recursiveDict;
5959
recursiveDict.rec2 = recursiveDict;
6060

61-
foo.rec(recursiveDict); #$ def=moduleImport("mypkg").getMember("foo").getMember("rec").getParameter(0)
61+
foo.rec(recursiveDict); #$ def=moduleImport("mypkg").getMember("foo").getMember("rec").getParameter(0)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
| test_subscript.py:4:11:4:28 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
2+
| test_subscript.py:5:26:5:27 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
3+
| test_subscript.py:6:5:6:22 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
4+
| test_subscript.py:6:5:6:28 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
5+
| test_subscript.py:7:5:7:22 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
6+
| test_subscript.py:7:5:7:28 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import python
2+
import semmle.python.ApiGraphs
3+
4+
select API::moduleImport("mypkg").getMember("foo").getReturn().getSubscript(["bar", "baz", "qux"])
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import mypkg
2+
3+
def test_subscript():
4+
bar = mypkg.foo()["bar"] #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
5+
mypkg.foo()["baz"] = 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
6+
mypkg.foo()["qux"] += 42 #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
7+
mypkg.foo()["qux"] += 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
8+
mypkg.foo()[mypkg.index] = mypkg.value #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()

0 commit comments

Comments
 (0)