Skip to content

Commit a178218

Browse files
committed
Python: Add unsafe deserialization sinks (CWE-502)
1 parent 9b0d7f3 commit a178218

File tree

15 files changed

+161
-0
lines changed

15 files changed

+161
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
category: minorAnalysis
2+
---
3+
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Add support for `pandas.read_pickle`.

python/ql/lib/semmle/python/Frameworks.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,6 @@ private import semmle.python.frameworks.Urllib3
6262
private import semmle.python.frameworks.Xmltodict
6363
private import semmle.python.frameworks.Yaml
6464
private import semmle.python.frameworks.Yarl
65+
private import semmle.python.frameworks.Pandas
66+
private import semmle.python.frameworks.Numpy
67+
private import semmle.python.frameworks.Joblib
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `joblib` PyPI package.
3+
* See https://pypi.org/project/joblib/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.RemoteFlowSources
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `joblib` PyPI package.
14+
* See https://pypi.org/project/joblib/.
15+
*/
16+
private module Joblib {
17+
/**
18+
* A call to `joblib.load`
19+
* See https://pypi.org/project/joblib/
20+
*
21+
* Claiming there is decoding of the input to `joblib.load` is a bit questionable, since
22+
* it's not the filename, but the contents of the file that is decoded.
23+
*
24+
* However, we definitely want to be able to alert if a user is able to control what
25+
* file is used, since that can lead to code execution (even if that file is free of
26+
* path injection).
27+
*
28+
* So right now the best way we have of modeling this seems to be to treat the filename
29+
* argument as being deserialized...
30+
*/
31+
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
32+
PandasReadPickleCall() { this = API::moduleImport("joblib").getMember("load").getACall() }
33+
34+
override predicate mayExecuteInput() { any() }
35+
36+
override DataFlow::Node getAnInput() {
37+
result in [this.getArg(0), this.getArgByName("filename")]
38+
}
39+
40+
override DataFlow::Node getOutput() { result = this }
41+
42+
override string getFormat() { result = "joblib" }
43+
}
44+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `numpy` PyPI package.
3+
* See https://pypi.org/project/numpy/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.RemoteFlowSources
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `numpy` PyPI package.
14+
* See https://pypi.org/project/numpy/.
15+
*/
16+
private module Numpy {
17+
/**
18+
* A call to `numpy.load`
19+
* See https://pypi.org/project/numpy/
20+
*
21+
* Claiming there is decoding of the input to `numpy.load` is a bit questionable, since
22+
* it's not the filename, but the contents of the file that is decoded.
23+
*
24+
* However, we definitely want to be able to alert if a user is able to control what
25+
* file is used, since that can lead to code execution (even if that file is free of
26+
* path injection).
27+
*
28+
* So right now the best way we have of modeling this seems to be to treat the filename
29+
* argument as being deserialized...
30+
*/
31+
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
32+
PandasReadPickleCall() {
33+
this = API::moduleImport("numpy").getMember("load").getACall() and
34+
this.getArgByName("allow_pickle").asExpr() = any(True t)
35+
}
36+
37+
override predicate mayExecuteInput() { any() }
38+
39+
override DataFlow::Node getAnInput() {
40+
result in [this.getArg(0), this.getArgByName("filename")]
41+
}
42+
43+
override DataFlow::Node getOutput() { result = this }
44+
45+
override string getFormat() { result = "numpy" }
46+
}
47+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `pandas` PyPI package.
3+
* See https://pypi.org/project/pandas/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.RemoteFlowSources
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
12+
/**
13+
* Provides models for the `pandas` PyPI package.
14+
* See https://pypi.org/project/pandas/.
15+
*/
16+
private module Pandas {
17+
/**
18+
* A call to `pandas.read_pickle`
19+
* See https://pypi.org/project/pandas/ (which currently refers you
20+
* to https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_pickle.html)
21+
*/
22+
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
23+
PandasReadPickleCall() {
24+
this = API::moduleImport("pandas").getMember("read_pickle").getACall()
25+
}
26+
27+
override predicate mayExecuteInput() { any() }
28+
29+
override DataFlow::Node getAnInput() {
30+
result in [this.getArg(0), this.getArgByName("filepath_or_buffer")]
31+
}
32+
33+
override DataFlow::Node getOutput() { result = this }
34+
35+
override string getFormat() { result = "pandas" }
36+
}
37+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
failures
2+
testFailures
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import joblib
2+
3+
joblib.load(file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput
4+
joblib.load(filename=file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
failures
2+
testFailures
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest

0 commit comments

Comments
 (0)