Skip to content

Commit 9c5aff3

Browse files
authored
Merge pull request #12581 from yoff/python/enable-summaries-from-models
python: enable summaries from model
2 parents 65dee80 + 257f991 commit 9c5aff3

File tree

11 files changed

+221
-36
lines changed

11 files changed

+221
-36
lines changed

javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,15 @@ module ModelOutput {
643643
baseNode = getInvocationFromPath(type, path)
644644
}
645645

646+
/**
647+
* Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row.
648+
*/
649+
cached
650+
predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) {
651+
summaryModel(type, path, _, _, _) and
652+
baseNode = getNodeFromPath(type, path)
653+
}
654+
646655
/**
647656
* Holds if `node` is seen as an instance of `type` due to a type definition
648657
* contributed by a CSV model.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: feature
3+
---
4+
* It is now possible to specify flow summaries in the format "MyPkg;Member[list_map];Argument[1].ListElement;Argument[0].Parameter[0];value"

python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -90,39 +90,32 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari
9090
}
9191

9292
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
93-
// // This gives access to getNodeFromPath, which is not constrained to `CallNode`s
94-
// // as `resolvedSummaryBase` is.
95-
// private import semmle.python.frameworks.data.internal.ApiGraphModels as AGM
96-
//
97-
// private class SummarizedCallableFromModel extends SummarizedCallable {
98-
// string package;
99-
// string type;
100-
// string path;
101-
// SummarizedCallableFromModel() {
102-
// ModelOutput::relevantSummaryModel(package, type, path, _, _, _) and
103-
// this = package + ";" + type + ";" + path
104-
// }
105-
// override CallCfgNode getACall() {
106-
// exists(API::CallNode base |
107-
// ModelOutput::resolvedSummaryBase(package, type, path, base) and
108-
// result = base.getACall()
109-
// )
110-
// }
111-
// override ArgumentNode getACallback() {
112-
// exists(API::Node base |
113-
// base = AGM::getNodeFromPath(package, type, path) and
114-
// result = base.getAValueReachableFromSource()
115-
// )
116-
// }
117-
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
118-
// exists(string kind |
119-
// ModelOutput::relevantSummaryModel(package, type, path, input, output, kind)
120-
// |
121-
// kind = "value" and
122-
// preservesValue = true
123-
// or
124-
// kind = "taint" and
125-
// preservesValue = false
126-
// )
127-
// }
128-
// }
93+
94+
private class SummarizedCallableFromModel extends SummarizedCallable {
95+
string type;
96+
string path;
97+
98+
SummarizedCallableFromModel() {
99+
ModelOutput::relevantSummaryModel(type, path, _, _, _) and
100+
this = type + ";" + path
101+
}
102+
103+
override CallCfgNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) }
104+
105+
override ArgumentNode getACallback() {
106+
exists(API::Node base |
107+
ModelOutput::resolvedSummaryRefBase(type, path, base) and
108+
result = base.getAValueReachableFromSource()
109+
)
110+
}
111+
112+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
113+
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) |
114+
kind = "value" and
115+
preservesValue = true
116+
or
117+
kind = "taint" and
118+
preservesValue = false
119+
)
120+
}
121+
}

python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,15 @@ module ModelOutput {
643643
baseNode = getInvocationFromPath(type, path)
644644
}
645645

646+
/**
647+
* Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row.
648+
*/
649+
cached
650+
predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) {
651+
summaryModel(type, path, _, _, _) and
652+
baseNode = getNodeFromPath(type, path)
653+
}
654+
646655
/**
647656
* Holds if `node` is seen as an instance of `type` due to a type definition
648657
* contributed by a CSV model.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
failures
2+
argumentToEnsureNotTaintedNotMarkedAsSpurious
3+
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
4+
testFailures
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import python
2+
private import TestSummaries
3+
import experimental.meta.InlineTaintTest
4+
import MakeInlineTaintTest<TestTaintTrackingConfig>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
missingAnnotationOnSink
2+
failures
3+
testFailures
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import python
2+
private import TestSummaries
3+
import experimental.dataflow.TestUtil.NormalDataflowTest
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
private import python
2+
private import semmle.python.dataflow.new.FlowSummary
3+
private import semmle.python.frameworks.data.ModelsAsData
4+
private import semmle.python.ApiGraphs
5+
6+
private class StepsFromModel extends ModelInput::SummaryModelCsv {
7+
override predicate row(string row) {
8+
row =
9+
[
10+
"foo;Member[MS_identity];Argument[0];ReturnValue;value",
11+
"foo;Member[MS_apply_lambda];Argument[1];Argument[0].Parameter[0];value",
12+
"foo;Member[MS_apply_lambda];Argument[0].ReturnValue;ReturnValue;value",
13+
"foo;Member[MS_reversed];Argument[0].ListElement;ReturnValue.ListElement;value",
14+
"foo;Member[MS_reversed];Argument[0];ReturnValue;taint",
15+
"foo;Member[MS_list_map];Argument[1].ListElement;Argument[0].Parameter[0];value",
16+
"foo;Member[MS_list_map];Argument[0].ReturnValue;ReturnValue.ListElement;value",
17+
"foo;Member[MS_list_map];Argument[1];ReturnValue;taint",
18+
"foo;Member[MS_append_to_list];Argument[0].ListElement;ReturnValue.ListElement;value",
19+
"foo;Member[MS_append_to_list];Argument[1];ReturnValue.ListElement;value",
20+
"foo;Member[MS_append_to_list];Argument[0];ReturnValue;taint",
21+
"foo;Member[MS_append_to_list];Argument[1];ReturnValue;taint",
22+
"json;Member[MS_loads];Argument[0];ReturnValue;taint"
23+
]
24+
}
25+
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
2+
import sys
3+
import os
4+
5+
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
6+
from testlib import expects
7+
8+
# These are defined so that we can evaluate the test code.
9+
NONSOURCE = "not a source"
10+
SOURCE = "source"
11+
12+
13+
def is_source(x):
14+
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
15+
16+
17+
def SINK(x):
18+
if is_source(x):
19+
print("OK")
20+
else:
21+
print("Unexpected flow", x)
22+
23+
24+
def SINK_F(x):
25+
if is_source(x):
26+
print("Unexpected flow", x)
27+
else:
28+
print("OK")
29+
30+
ensure_tainted = ensure_not_tainted = print
31+
TAINTED_STRING = "TAINTED_STRING"
32+
33+
from foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list
34+
35+
# Simple summary
36+
via_identity = MS_identity(SOURCE)
37+
SINK(via_identity) # $ flow="SOURCE, l:-1 -> via_identity"
38+
39+
# Lambda summary
40+
via_lambda = MS_apply_lambda(lambda x: [x], SOURCE)
41+
SINK(via_lambda[0]) # $ flow="SOURCE, l:-1 -> via_lambda[0]"
42+
43+
# A lambda that breaks the flow
44+
not_via_lambda = MS_apply_lambda(lambda x: 1, SOURCE)
45+
SINK_F(not_via_lambda)
46+
47+
48+
# Collection summaries
49+
via_reversed = MS_reversed([SOURCE])
50+
SINK(via_reversed[0]) # $ flow="SOURCE, l:-1 -> via_reversed[0]"
51+
52+
tainted_list = MS_reversed(TAINTED_LIST)
53+
ensure_tainted(
54+
tainted_list, # $ tainted
55+
tainted_list[0], # $ tainted
56+
)
57+
58+
# Complex summaries
59+
def box(x):
60+
return [x]
61+
62+
via_map = MS_list_map(box, [SOURCE])
63+
SINK(via_map[0][0]) # $ flow="SOURCE, l:-1 -> via_map[0][0]"
64+
65+
tainted_mapped = MS_list_map(box, TAINTED_LIST)
66+
ensure_tainted(
67+
tainted_mapped, # $ tainted
68+
tainted_mapped[0][0], # $ tainted
69+
)
70+
71+
def explicit_identity(x):
72+
return x
73+
74+
via_map_explicit = MS_list_map(explicit_identity, [SOURCE])
75+
SINK(via_map_explicit[0]) # $ flow="SOURCE, l:-1 -> via_map_explicit[0]"
76+
77+
tainted_mapped_explicit = MS_list_map(explicit_identity, TAINTED_LIST)
78+
ensure_tainted(
79+
tainted_mapped_explicit, # $ tainted
80+
tainted_mapped_explicit[0], # $ tainted
81+
)
82+
83+
via_map_summary = MS_list_map(MS_identity, [SOURCE])
84+
SINK(via_map_summary[0]) # $ flow="SOURCE, l:-1 -> via_map_summary[0]"
85+
86+
tainted_mapped_summary = MS_list_map(MS_identity, TAINTED_LIST)
87+
ensure_tainted(
88+
tainted_mapped_summary, # $ tainted
89+
tainted_mapped_summary[0], # $ tainted
90+
)
91+
92+
via_append_el = MS_append_to_list([], SOURCE)
93+
SINK(via_append_el[0]) # $ flow="SOURCE, l:-1 -> via_append_el[0]"
94+
95+
tainted_list_el = MS_append_to_list([], TAINTED_STRING)
96+
ensure_tainted(
97+
tainted_list_el, # $ tainted
98+
tainted_list_el[0], # $ tainted
99+
)
100+
101+
via_append = MS_append_to_list([SOURCE], NONSOURCE)
102+
SINK(via_append[0]) # $ flow="SOURCE, l:-1 -> via_append[0]"
103+
104+
tainted_list_implicit = MS_append_to_list(TAINTED_LIST, NONSOURCE)
105+
ensure_tainted(
106+
tainted_list, # $ tainted
107+
tainted_list[0], # $ tainted
108+
)
109+
110+
# Modeled flow-summary is not value preserving
111+
from json import MS_loads as json_loads
112+
113+
# so no data-flow
114+
SINK_F(json_loads(SOURCE))
115+
SINK_F(json_loads(SOURCE)[0])
116+
117+
# but has taint-flow
118+
tainted_resultlist = json_loads(TAINTED_STRING)
119+
ensure_tainted(
120+
tainted_resultlist, # $ tainted
121+
tainted_resultlist[0], # $ tainted
122+
)

0 commit comments

Comments
 (0)