Skip to content

Commit 23e8092

Browse files
authored
Merge pull request github#5864 from RasmusWL/some-framework-modeling
Approved by tausbn
2 parents e66b555 + 1b0d505 commit 23e8092

25 files changed

+390
-32
lines changed

docs/codeql/support/reusables/frameworks.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,11 @@ Python built-in support
157157
Tornado, Web framework
158158
PyYAML, Serialization
159159
dill, Serialization
160+
simplejson, Serialization
161+
ujson, Serialization
160162
fabric, Utility library
161163
invoke, Utility library
164+
idna, Utility library
162165
mysql-connector-python, Database
163166
MySQLdb, Database
164167
psycopg2, Database
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of the PyPI package `idna`, for encoding/decoding Internationalised Domain Names in Applications.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of the PyPI package `simplejson`.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of the PyPI package `ujson`.

python/ql/src/semmle/python/Frameworks.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,22 @@
22
* Helper file that imports all framework modeling.
33
*/
44

5+
// If you add modeling of a new framework/library, remember to add it it to the docs in
6+
// `docs/codeql/support/reusables/frameworks.rst`
57
private import semmle.python.frameworks.Cryptodome
68
private import semmle.python.frameworks.Cryptography
79
private import semmle.python.frameworks.Dill
810
private import semmle.python.frameworks.Django
911
private import semmle.python.frameworks.Fabric
1012
private import semmle.python.frameworks.Flask
13+
private import semmle.python.frameworks.Idna
1114
private import semmle.python.frameworks.Invoke
1215
private import semmle.python.frameworks.MysqlConnectorPython
1316
private import semmle.python.frameworks.MySQLdb
1417
private import semmle.python.frameworks.Psycopg2
1518
private import semmle.python.frameworks.PyMySQL
19+
private import semmle.python.frameworks.Simplejson
1620
private import semmle.python.frameworks.Stdlib
1721
private import semmle.python.frameworks.Tornado
22+
private import semmle.python.frameworks.Ujson
1823
private import semmle.python.frameworks.Yaml
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `idna` PyPI package.
3+
* See https://pypi.org/project/idna/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.TaintTracking
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `idna` PyPI package.
14+
* See https://pypi.org/project/idna/.
15+
*/
16+
private module IdnaModel {
17+
/** A call to `idna.encode`. */
18+
private class IdnaEncodeCall extends Encoding::Range, DataFlow::CallCfgNode {
19+
IdnaEncodeCall() { this = API::moduleImport("idna").getMember("encode").getACall() }
20+
21+
override DataFlow::Node getAnInput() { result = [this.getArg(0), this.getArgByName("s")] }
22+
23+
override DataFlow::Node getOutput() { result = this }
24+
25+
override string getFormat() { result = "IDNA" }
26+
}
27+
28+
/** A call to `idna.decode`. */
29+
private class IdnaDecodeCall extends Decoding::Range, DataFlow::CallCfgNode {
30+
IdnaDecodeCall() { this = API::moduleImport("idna").getMember("decode").getACall() }
31+
32+
override DataFlow::Node getAnInput() { result = [this.getArg(0), this.getArgByName("s")] }
33+
34+
override DataFlow::Node getOutput() { result = this }
35+
36+
override string getFormat() { result = "IDNA" }
37+
38+
override predicate mayExecuteInput() { none() }
39+
}
40+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `simplejson` PyPI package.
3+
* See https://simplejson.readthedocs.io/en/latest/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.TaintTracking
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `simplejson` PyPI package.
14+
* See https://simplejson.readthedocs.io/en/latest/.
15+
*/
16+
private module SimplejsonModel {
17+
/**
18+
* A call to `simplejson.dumps`.
19+
*
20+
* See https://simplejson.readthedocs.io/en/latest/#simplejson.dumps
21+
*/
22+
private class SimplejsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
23+
SimplejsonDumpsCall() { this = API::moduleImport("simplejson").getMember("dumps").getACall() }
24+
25+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
26+
27+
override DataFlow::Node getOutput() { result = this }
28+
29+
override string getFormat() { result = "JSON" }
30+
}
31+
32+
/**
33+
* A call to `simplejson.dump`.
34+
*
35+
* See https://simplejson.readthedocs.io/en/latest/#simplejson.dump
36+
*/
37+
private class SimplejsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
38+
SimplejsonDumpCall() { this = API::moduleImport("simplejson").getMember("dump").getACall() }
39+
40+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
41+
42+
override DataFlow::Node getOutput() {
43+
result.(DataFlow::PostUpdateNode).getPreUpdateNode() in [
44+
this.getArg(1), this.getArgByName("fp")
45+
]
46+
}
47+
48+
override string getFormat() { result = "JSON" }
49+
}
50+
51+
/**
52+
* A call to `simplejson.loads`.
53+
*
54+
* See https://simplejson.readthedocs.io/en/latest/#simplejson.loads
55+
*/
56+
private class SimplejsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
57+
SimplejsonLoadsCall() { this = API::moduleImport("simplejson").getMember("loads").getACall() }
58+
59+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
60+
61+
override DataFlow::Node getOutput() { result = this }
62+
63+
override string getFormat() { result = "JSON" }
64+
65+
override predicate mayExecuteInput() { none() }
66+
}
67+
68+
/**
69+
* A call to `simplejson.load`.
70+
*
71+
* See https://simplejson.readthedocs.io/en/latest/#simplejson.load
72+
*/
73+
private class SimplejsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
74+
SimplejsonLoadCall() { this = API::moduleImport("simplejson").getMember("load").getACall() }
75+
76+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("fp")] }
77+
78+
override DataFlow::Node getOutput() { result = this }
79+
80+
override string getFormat() { result = "JSON" }
81+
82+
override predicate mayExecuteInput() { none() }
83+
}
84+
}

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,23 @@ private module Stdlib {
511511

512512
override predicate mayExecuteInput() { none() }
513513

514-
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
514+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
515+
516+
override DataFlow::Node getOutput() { result = this }
517+
518+
override string getFormat() { result = "JSON" }
519+
}
520+
521+
/**
522+
* A call to `json.load`
523+
* See https://docs.python.org/3/library/json.html#json.load
524+
*/
525+
private class JsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
526+
JsonLoadCall() { this = json().getMember("load").getACall() }
527+
528+
override predicate mayExecuteInput() { none() }
529+
530+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("fp")] }
515531

516532
override DataFlow::Node getOutput() { result = this }
517533

@@ -525,13 +541,31 @@ private module Stdlib {
525541
private class JsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
526542
JsonDumpsCall() { this = json().getMember("dumps").getACall() }
527543

528-
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
544+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
529545

530546
override DataFlow::Node getOutput() { result = this }
531547

532548
override string getFormat() { result = "JSON" }
533549
}
534550

551+
/**
552+
* A call to `json.dump`
553+
* See https://docs.python.org/3/library/json.html#json.dump
554+
*/
555+
private class JsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
556+
JsonDumpCall() { this = json().getMember("dump").getACall() }
557+
558+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
559+
560+
override DataFlow::Node getOutput() {
561+
result.(DataFlow::PostUpdateNode).getPreUpdateNode() in [
562+
this.getArg(1), this.getArgByName("fp")
563+
]
564+
}
565+
566+
override string getFormat() { result = "JSON" }
567+
}
568+
535569
// ---------------------------------------------------------------------------
536570
// cgi
537571
// ---------------------------------------------------------------------------
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `ujson` PyPI package.
3+
* See https://pypi.org/project/ujson/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.TaintTracking
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `ujson` PyPI package.
14+
* See https://pypi.org/project/ujson/.
15+
*/
16+
private module UjsonModel {
17+
/**
18+
* A call to `usjon.dumps` or `ujson.encode`.
19+
*/
20+
private class UjsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
21+
UjsonDumpsCall() { this = API::moduleImport("ujson").getMember(["dumps", "encode"]).getACall() }
22+
23+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
24+
25+
override DataFlow::Node getOutput() { result = this }
26+
27+
override string getFormat() { result = "JSON" }
28+
}
29+
30+
/**
31+
* A call to `ujson.dump`.
32+
*/
33+
private class UjsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
34+
UjsonDumpCall() { this = API::moduleImport("ujson").getMember("dump").getACall() }
35+
36+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
37+
38+
override DataFlow::Node getOutput() {
39+
result.(DataFlow::PostUpdateNode).getPreUpdateNode() = this.getArg(1)
40+
}
41+
42+
override string getFormat() { result = "JSON" }
43+
}
44+
45+
/**
46+
* A call to `ujson.loads` or `ujson.decode`.
47+
*/
48+
private class UjsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
49+
UjsonLoadsCall() { this = API::moduleImport("ujson").getMember(["loads", "decode"]).getACall() }
50+
51+
// Note: Most other JSON libraries allow the keyword argument `s`, but as of version
52+
// 4.0.2 `ujson` uses `obj` instead.
53+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
54+
55+
override DataFlow::Node getOutput() { result = this }
56+
57+
override string getFormat() { result = "JSON" }
58+
59+
override predicate mayExecuteInput() { none() }
60+
}
61+
62+
/**
63+
* A call to `ujson.load`.
64+
*/
65+
private class UjsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
66+
UjsonLoadCall() { this = API::moduleImport("ujson").getMember("load").getACall() }
67+
68+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
69+
70+
override DataFlow::Node getOutput() { result = this }
71+
72+
override string getFormat() { result = "JSON" }
73+
74+
override predicate mayExecuteInput() { none() }
75+
}
76+
}

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,54 +11,43 @@
1111
# Actual tests
1212

1313
from io import StringIO
14-
15-
# Workaround for Python3 not having unicode
16-
import sys
17-
if sys.version_info[0] == 3:
18-
unicode = str
14+
import json
1915

2016
def test():
2117
print("\n# test")
2218
ts = TAINTED_STRING
23-
import json
19+
20+
encoded = json.dumps(ts)
2421

2522
ensure_tainted(
23+
encoded, # $ tainted
2624
json.dumps(ts), # $ tainted
27-
json.loads(json.dumps(ts)), # $ tainted
25+
json.dumps(obj=ts), # $ tainted
26+
json.loads(encoded), # $ tainted
27+
json.loads(s=encoded), # $ tainted
2828
)
2929

30-
# For Python2, need to convert to unicode for StringIO to work
31-
tainted_filelike = StringIO(unicode(json.dumps(ts)))
30+
# load/dump with file-like
31+
tainted_filelike = StringIO()
32+
json.dump(ts, tainted_filelike)
3233

34+
tainted_filelike.seek(0)
3335
ensure_tainted(
34-
tainted_filelike, # $ MISSING: tainted
35-
json.load(tainted_filelike), # $ MISSING: tainted
36+
tainted_filelike, # $ tainted
37+
json.load(tainted_filelike), # $ tainted
3638
)
3739

38-
def non_syntacical():
39-
print("\n# non_syntacical")
40-
ts = TAINTED_STRING
41-
42-
# a less syntactical approach
43-
from json import load, loads, dumps
44-
45-
dumps_alias = dumps
40+
# load/dump with file-like using keyword-args
41+
tainted_filelike = StringIO()
42+
json.dump(obj=ts, fp=tainted_filelike)
4643

44+
tainted_filelike.seek(0)
4745
ensure_tainted(
48-
dumps(ts), # $ tainted
49-
dumps_alias(ts), # $ tainted
50-
loads(dumps(ts)), # $ tainted
46+
tainted_filelike, # $ tainted
47+
json.load(fp=tainted_filelike), # $ tainted
5148
)
5249

53-
# For Python2, need to convert to unicode for StringIO to work
54-
tainted_filelike = StringIO(unicode(dumps(ts)))
55-
56-
ensure_tainted(
57-
tainted_filelike, # $ MISSING: tainted
58-
load(tainted_filelike), # $ MISSING: tainted
59-
)
6050

6151
# Make tests runable
6252

6353
test()
64-
non_syntacical()

0 commit comments

Comments
 (0)