Skip to content

Commit c2a6b81

Browse files
committed
Python: Add modeling of ujson PyPI package
The problem with `tainted_filelike` not having taint, is that in the call `ujson.dump(tainted_obj, tainted_filelike)` there is no PostUpdateNote for `tainted_filelike` :( The reason is that points-to is not able to resolve the call, so none of the clauses in `argumentPreUpdateNode` matches See https://github.com/github/codeql/blob/08731fc6cf4ba6951cd4e8f239eac1f3388d3957/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll#L101-L111 Let's deal with that issue in an other PR though
1 parent 72d08f4 commit c2a6b81

File tree

9 files changed

+130
-0
lines changed

9 files changed

+130
-0
lines changed

docs/codeql/support/reusables/frameworks.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ Python built-in support
157157
PyYAML, Serialization
158158
dill, Serialization
159159
simplejson, Serialization
160+
ujson, Serialization
160161
fabric, Utility library
161162
invoke, Utility library
162163
idna, Utility library
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of the PyPI package `ujson`.

python/ql/src/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ private import semmle.python.frameworks.PyMySQL
1919
private import semmle.python.frameworks.Simplejson
2020
private import semmle.python.frameworks.Stdlib
2121
private import semmle.python.frameworks.Tornado
22+
private import semmle.python.frameworks.Ujson
2223
private import semmle.python.frameworks.Yaml
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `ujson` PyPI package.
3+
* See https://pypi.org/project/ujson/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.TaintTracking
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `ujson` PyPI package.
14+
* See https://pypi.org/project/ujson/.
15+
*/
16+
private module UjsonModel {
17+
/**
18+
* A call to `usjon.dumps` or `ujson.encode`.
19+
*/
20+
private class UjsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
21+
UjsonDumpsCall() { this = API::moduleImport("ujson").getMember(["dumps", "encode"]).getACall() }
22+
23+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
24+
25+
override DataFlow::Node getOutput() { result = this }
26+
27+
override string getFormat() { result = "JSON" }
28+
}
29+
30+
/**
31+
* A call to `ujson.dump`.
32+
*/
33+
private class UjsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
34+
UjsonDumpCall() { this = API::moduleImport("ujson").getMember("dump").getACall() }
35+
36+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
37+
38+
override DataFlow::Node getOutput() {
39+
result.(DataFlow::PostUpdateNode).getPreUpdateNode() = this.getArg(1)
40+
}
41+
42+
override string getFormat() { result = "JSON" }
43+
}
44+
45+
/**
46+
* A call to `ujson.loads` or `ujson.decode`.
47+
*/
48+
private class UjsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
49+
UjsonLoadsCall() { this = API::moduleImport("ujson").getMember(["loads", "decode"]).getACall() }
50+
51+
// Note: Most other JSON libraries allow the keyword argument `s`, but as of version
52+
// 4.0.2 `ujson` uses `obj` instead.
53+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
54+
55+
override DataFlow::Node getOutput() { result = this }
56+
57+
override string getFormat() { result = "JSON" }
58+
59+
override predicate mayExecuteInput() { none() }
60+
}
61+
62+
/**
63+
* A call to `ujson.load`.
64+
*/
65+
private class UjsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
66+
UjsonLoadCall() { this = API::moduleImport("ujson").getMember("load").getACall() }
67+
68+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
69+
70+
override DataFlow::Node getOutput() { result = this }
71+
72+
override string getFormat() { result = "JSON" }
73+
74+
override predicate mayExecuteInput() { none() }
75+
}
76+
}

python/ql/test/library-tests/frameworks/ujson/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
argumentToEnsureNotTaintedNotMarkedAsSpurious
2+
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
3+
failures
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import experimental.meta.InlineTaintTest
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import ujson
2+
from io import StringIO
3+
4+
def test():
5+
ts = TAINTED_STRING
6+
tainted_obj = {"foo": ts}
7+
8+
encoded = ujson.dumps(tainted_obj) # $ encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
9+
10+
ensure_tainted(
11+
encoded, # $ tainted
12+
ujson.dumps(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
13+
ujson.dumps(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
14+
ujson.loads(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
15+
ujson.loads(obj=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
16+
17+
ujson.encode(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
18+
ujson.encode(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
19+
ujson.decode(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
20+
ujson.decode(obj=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
21+
)
22+
23+
# load/dump with file-like
24+
tainted_filelike = StringIO()
25+
ujson.dump(tainted_obj, tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj
26+
27+
tainted_filelike.seek(0)
28+
ensure_tainted(
29+
tainted_filelike, # $ MISSING: tainted
30+
ujson.load(tainted_filelike), # $ decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike MISSING: tainted
31+
)
32+
33+
# load/dump with file-like using keyword-args does not work in `ujson`
34+
35+
36+
# To make things runable
37+
38+
TAINTED_STRING = "TAINTED_STRING"
39+
def ensure_tainted(*args):
40+
print("- ensure_tainted")
41+
for i, arg in enumerate(args):
42+
print("arg {}: {!r}".format(i, arg))
43+
44+
test()

0 commit comments

Comments
 (0)