Skip to content

Commit 9c9c5c0

Browse files
authored
Merge pull request #6837 from RasmusWL/more-unsafe-deserialization-sinks
Python: More unsafe deserialization sinks
2 parents f6122c8 + fd0c386 commit 9c9c5c0

File tree

5 files changed

+147
-22
lines changed

5 files changed

+147
-22
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Now we fully support `pickle.load`, `pickle.loads`, `pickle.Unpickler`, `marshal.load`, `marshal.loads`, `dill.load`, `dill.loads`, `shelve.open`.
Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Provides classes modeling security-relevant aspects of the 'dill' package.
2+
* Provides classes modeling security-relevant aspects of the `dill` PyPI package.
33
* See https://pypi.org/project/dill/.
44
*/
55

@@ -10,18 +10,41 @@ private import semmle.python.Concepts
1010
private import semmle.python.ApiGraphs
1111

1212
/**
13-
* A call to `dill.loads`
14-
* See https://pypi.org/project/dill/ (which currently refers you
15-
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
13+
* Provides models for the `dill` PyPI package.
14+
* See https://pypi.org/project/dill/.
1615
*/
17-
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
18-
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
16+
private module Dill {
17+
/**
18+
* A call to `dill.load`
19+
* See https://pypi.org/project/dill/ (which currently refers you
20+
* to https://docs.python.org/3/library/pickle.html#pickle.load)
21+
*/
22+
private class DillLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
23+
DillLoadCall() { this = API::moduleImport("dill").getMember("load").getACall() }
24+
25+
override predicate mayExecuteInput() { any() }
26+
27+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
28+
29+
override DataFlow::Node getOutput() { result = this }
30+
31+
override string getFormat() { result = "dill" }
32+
}
33+
34+
/**
35+
* A call to `dill.loads`
36+
* See https://pypi.org/project/dill/ (which currently refers you
37+
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
38+
*/
39+
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
40+
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
1941

20-
override predicate mayExecuteInput() { any() }
42+
override predicate mayExecuteInput() { any() }
2143

22-
override DataFlow::Node getAnInput() { result = this.getArg(0) }
44+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("str")] }
2345

24-
override DataFlow::Node getOutput() { result = this }
46+
override DataFlow::Node getOutput() { result = this }
2547

26-
override string getFormat() { result = "dill" }
48+
override string getFormat() { result = "dill" }
49+
}
2750
}

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,22 @@ private module StdlibPrivate {
428428
// ---------------------------------------------------------------------------
429429
// marshal
430430
// ---------------------------------------------------------------------------
431+
/**
432+
* A call to `marshal.load`
433+
* See https://docs.python.org/3/library/marshal.html#marshal.load
434+
*/
435+
private class MarshalLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
436+
MarshalLoadCall() { this = API::moduleImport("marshal").getMember("load").getACall() }
437+
438+
override predicate mayExecuteInput() { any() }
439+
440+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
441+
442+
override DataFlow::Node getOutput() { result = this }
443+
444+
override string getFormat() { result = "marshal" }
445+
}
446+
431447
/**
432448
* A call to `marshal.loads`
433449
* See https://docs.python.org/3/library/marshal.html#marshal.loads
@@ -447,27 +463,87 @@ private module StdlibPrivate {
447463
// ---------------------------------------------------------------------------
448464
// pickle
449465
// ---------------------------------------------------------------------------
450-
/** Gets a reference to the `pickle` module. */
451-
DataFlow::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getAUse() }
452-
453-
/** Provides models for the `pickle` module. */
454-
module pickle {
455-
/** Gets a reference to the `pickle.loads` function. */
456-
DataFlow::Node loads() {
457-
result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getMember("loads").getAUse()
458-
}
466+
/** Gets a reference to any of the `pickle` modules. */
467+
API::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]) }
468+
469+
/**
470+
* A call to `pickle.load`
471+
* See https://docs.python.org/3/library/pickle.html#pickle.load
472+
*/
473+
private class PickleLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
474+
PickleLoadCall() { this = pickle().getMember("load").getACall() }
475+
476+
override predicate mayExecuteInput() { any() }
477+
478+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
479+
480+
override DataFlow::Node getOutput() { result = this }
481+
482+
override string getFormat() { result = "pickle" }
459483
}
460484

461485
/**
462486
* A call to `pickle.loads`
463487
* See https://docs.python.org/3/library/pickle.html#pickle.loads
464488
*/
465489
private class PickleLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
466-
PickleLoadsCall() { this.getFunction() = pickle::loads() }
490+
PickleLoadsCall() { this = pickle().getMember("loads").getACall() }
467491

468492
override predicate mayExecuteInput() { any() }
469493

470-
override DataFlow::Node getAnInput() { result = this.getArg(0) }
494+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
495+
496+
override DataFlow::Node getOutput() { result = this }
497+
498+
override string getFormat() { result = "pickle" }
499+
}
500+
501+
/**
502+
* A construction of a `pickle.Unpickler`
503+
* See https://docs.python.org/3/library/pickle.html#pickle.Unpickler
504+
*/
505+
private class PickleUnpicklerCall extends Decoding::Range, DataFlow::CallCfgNode {
506+
PickleUnpicklerCall() { this = pickle().getMember("Unpickler").getACall() }
507+
508+
override predicate mayExecuteInput() { any() }
509+
510+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
511+
512+
override DataFlow::Node getOutput() { result = this.getAMethodCall("load") }
513+
514+
override string getFormat() { result = "pickle" }
515+
}
516+
517+
// ---------------------------------------------------------------------------
518+
// shelve
519+
// ---------------------------------------------------------------------------
520+
/**
521+
* A call to `shelve.open`
522+
* See https://docs.python.org/3/library/shelve.html#shelve.open
523+
*
524+
* Claiming there is decoding of the input to `shelve.open` is a bit questionable, since
525+
* it's not the filename, but the contents of the file that is decoded.
526+
*
527+
* However, we definitely want to be able to alert if a user is able to control what
528+
* file is used, since that can lead to code execution (even if that file is free of
529+
* path injection).
530+
*
531+
* So right now the best way we have of modeling this seems to be to treat the filename
532+
* argument as being deserialized...
533+
*/
534+
private class ShelveOpenCall extends Decoding::Range, FileSystemAccess::Range,
535+
DataFlow::CallCfgNode {
536+
ShelveOpenCall() { this = API::moduleImport("shelve").getMember("open").getACall() }
537+
538+
override predicate mayExecuteInput() { any() }
539+
540+
override DataFlow::Node getAnInput() {
541+
result in [this.getArg(0), this.getArgByName("filename")]
542+
}
543+
544+
override DataFlow::Node getAPathArgument() {
545+
result in [this.getArg(0), this.getArgByName("filename")]
546+
}
471547

472548
override DataFlow::Node getOutput() { result = this }
473549

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
import dill
22

3-
dill.loads(payload) # $decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput
3+
dill.load(file_) # $ decodeInput=file_ decodeOutput=dill.load(..) decodeFormat=dill decodeMayExecuteInput
4+
dill.load(file=file_) # $ decodeInput=file_ decodeOutput=dill.load(..) decodeFormat=dill decodeMayExecuteInput
5+
dill.loads(payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput
6+
dill.loads(str=payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput

python/ql/test/library-tests/frameworks/stdlib/Decoding.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,31 @@
11
import pickle
22
import marshal
3+
import shelve
34
import base64
45

6+
pickle.load(file_) # $ decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput
7+
pickle.load(file=file_) # $ decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput
58
pickle.loads(payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput
9+
# using this keyword argument is disallowed from Python 3.9
10+
pickle.loads(data=payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput
11+
12+
# We don't really have a good way to model a decode happening over multiple statements
13+
# like this. Since the important bit for `py/unsafe-deserialization` is the input, that
14+
# is the main focus. We do a best effort to model the output though (but that will only
15+
# work in local scope).
16+
unpickler = pickle.Unpickler(file_) # $ decodeInput=file_ decodeFormat=pickle decodeMayExecuteInput
17+
unpickler.load() # $ decodeOutput=unpickler.load()
18+
unpickler = pickle.Unpickler(file=file_) # $ decodeInput=file_ decodeFormat=pickle decodeMayExecuteInput
19+
20+
marshal.load(file_) # $ decodeInput=file_ decodeOutput=marshal.load(..) decodeFormat=marshal decodeMayExecuteInput
621
marshal.loads(payload) # $ decodeInput=payload decodeOutput=marshal.loads(..) decodeFormat=marshal decodeMayExecuteInput
722

23+
24+
# if the file opened has been controlled by an attacker, this can lead to code
25+
# execution. (underlying file format is pickle)
26+
shelve.open(filepath) # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath
27+
shelve.open(filename=filepath) # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath
28+
829
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
930
base64.b64decode(payload) # $ decodeInput=payload decodeOutput=base64.b64decode(..) decodeFormat=Base64
1031
base64.standard_b64decode(payload) # $ decodeInput=payload decodeOutput=base64.standard_b64decode(..) decodeFormat=Base64

0 commit comments

Comments
 (0)