Skip to content

Commit 358c741

Browse files
authored
Merge pull request #16490 from yoff/python/rich-type-column-MaD
Python: Rich `type` column in MaD
2 parents bddc69e + 0ecefd6 commit 358c741

File tree

5 files changed

+75
-12
lines changed

5 files changed

+75
-12
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: feature
3+
---
4+
* A Python MaD (Models as Data) row may now contain a dotted path in the `type` column. Like in Ruby, a path to a class will refer to instances of that class. This means that the summary `["foo", "Member[MyClass].Instance.Member[instance_method]", "Argument[0]", "ReturnValue", "value"]` can now be written `["foo.MS_Class", "Member[instance_method]", "Argument[0]", "ReturnValue", "value"]`. To refer to an actual class, one may add a `!` at the end of the path.

python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
2929
/**
3030
* Holds if models describing `type` may be relevant for the analysis of this database.
3131
*/
32-
predicate isTypeUsed(string type) { API::moduleImportExists(type) }
32+
bindingset[type]
33+
predicate isTypeUsed(string type) {
34+
// If `type` is a path, then it is the first component that should be imported.
35+
API::moduleImportExists(type.splitAt(".", 0))
36+
}
3337

3438
/**
3539
* Holds if `type` can be obtained from an instance of `otherType` due to
@@ -41,8 +45,59 @@ predicate hasImplicitTypeModel(string type, string otherType) { none() }
4145
bindingset[type, path]
4246
API::Node getExtraNodeFromPath(string type, AccessPath path, int n) { none() }
4347

48+
/**
49+
* Holds if `type` = `typePath`+`suffix` and `suffix` is either empty or "!".
50+
*/
51+
bindingset[type]
52+
private predicate parseType(string type, string typePath, string suffix) {
53+
exists(string regexp |
54+
regexp = "([^!]+)(!|)" and
55+
typePath = type.regexpCapture(regexp, 1) and
56+
suffix = type.regexpCapture(regexp, 2)
57+
)
58+
}
59+
60+
private predicate parseRelevantType(string type, string typePath, string suffix) {
61+
isRelevantType(type) and
62+
parseType(type, typePath, suffix)
63+
}
64+
65+
pragma[nomagic]
66+
private string getTypePathComponent(string typePath, int n) {
67+
parseRelevantType(_, typePath, _) and
68+
result = typePath.splitAt(".", n)
69+
}
70+
71+
private int getNumTypePathComponents(string typePath) {
72+
result = strictcount(int n | exists(getTypePathComponent(typePath, n)))
73+
}
74+
75+
private API::Node getNodeFromTypePath(string typePath, int n) {
76+
n = 1 and
77+
result = API::moduleImport(getTypePathComponent(typePath, 0))
78+
or
79+
result = getNodeFromTypePath(typePath, n - 1).getMember(getTypePathComponent(typePath, n - 1))
80+
}
81+
82+
private API::Node getNodeFromTypePath(string typePath) {
83+
result = getNodeFromTypePath(typePath, getNumTypePathComponents(typePath))
84+
}
85+
4486
/** Gets a Python-specific interpretation of the given `type`. */
45-
API::Node getExtraNodeFromType(string type) { result = API::moduleImport(type) }
87+
API::Node getExtraNodeFromType(string type) {
88+
result = API::moduleImport(type)
89+
or
90+
exists(string typePath, string suffix, API::Node node |
91+
parseRelevantType(type, typePath, suffix) and
92+
node = getNodeFromTypePath(typePath)
93+
|
94+
suffix = "!" and
95+
result = node
96+
or
97+
suffix = "" and
98+
result = node.getAnInstance()
99+
)
100+
}
46101

47102
/**
48103
* Gets a Python-specific API graph successor of `node` reachable by resolving `token`.

python/ql/test/library-tests/dataflow/model-summaries/InlineTaintTest.ext.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ extensions:
1818
- ["foo", "Member[MS_spread]", "Argument[0]", "ReturnValue.TupleElement[0]", "value"]
1919
- ["foo", "Member[MS_spread]", "Argument[1]", "ReturnValue.TupleElement[1]", "value"]
2020
- ["foo", "Member[MS_spread_all]", "Argument[0]", "ReturnValue.TupleElement[0,1]", "value"]
21-
- ["foo", "Member[MS_Class].Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
22-
- ["foo", "Member[MS_Class_transitive].Subclass.Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
23-
- ["foo", "Member[MS_Class].Instance.Member[instance_method]", "Argument[self]", "ReturnValue.TupleElement[0]", "value"]
24-
- ["foo", "Member[MS_Class].Instance.Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
25-
- ["foo", "Member[MS_Class].Instance.Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
21+
- ["foo.MS_Class!", "Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
22+
- ["foo.MS_Class_transitive!", "Subclass.Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
23+
- ["foo.MS_Class_transitive", "Member[instance_method]", "Argument[0]", "ReturnValue", "value"]
24+
- ["foo.MS_Class", "Member[instance_method]", "Argument[self]", "ReturnValue.TupleElement[0]", "value"]
25+
- ["foo.MS_Class", "Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
26+
- ["foo.MS_Class", "Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
2627
- ["json", "Member[MS_loads]", "Argument[0]", "ReturnValue", "taint"]

python/ql/test/library-tests/dataflow/model-summaries/NormalDataflowTest.ext.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ extensions:
1818
- ["foo", "Member[MS_spread]", "Argument[0]", "ReturnValue.TupleElement[0]", "value"]
1919
- ["foo", "Member[MS_spread]", "Argument[1]", "ReturnValue.TupleElement[1]", "value"]
2020
- ["foo", "Member[MS_spread_all]", "Argument[0]", "ReturnValue.TupleElement[0,1]", "value"]
21-
- ["foo", "Member[MS_Class].Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
22-
- ["foo", "Member[MS_Class_transitive].Subclass.Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
23-
- ["foo", "Member[MS_Class].Instance.Member[instance_method]", "Argument[self]", "ReturnValue.TupleElement[0]", "value"]
24-
- ["foo", "Member[MS_Class].Instance.Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
25-
- ["foo", "Member[MS_Class].Instance.Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
21+
- ["foo.MS_Class!", "Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
22+
- ["foo.MS_Class_transitive!", "Subclass.Call", "Argument[0, x:]", "ReturnValue.Attribute[config]", "value"]
23+
- ["foo.MS_Class_transitive", "Member[instance_method]", "Argument[0]", "ReturnValue", "value"]
24+
- ["foo.MS_Class", "Member[instance_method]", "Argument[self]", "ReturnValue.TupleElement[0]", "value"]
25+
- ["foo.MS_Class", "Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
26+
- ["foo.MS_Class", "Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
2627
- ["json", "Member[MS_loads]", "Argument[0]", "ReturnValue", "taint"]

python/ql/test/library-tests/dataflow/model-summaries/model_summaries.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ class C(MS_Class_transitive):
140140
subclass_via_kw = C(x = SOURCE)
141141
SINK(subclass_via_kw.config) # $ flow="SOURCE, l:-1 -> subclass_via_kw.config"
142142

143+
SINK(subclass_via_kw.instance_method(SOURCE)) # $ flow="SOURCE -> subclass_via_kw.instance_method(..)"
144+
143145
class D(MS_Class_transitive):
144146
def __init__(x, y):
145147
# special handling of y

0 commit comments

Comments
 (0)