Skip to content

Commit 37036b5

Browse files
authored
Merge pull request github#5437 from RasmusWL/small-pyyaml-improvements
Python: Small PyYAML improvements
2 parents fc7f19f + 7543f10 commit 37036b5

File tree

3 files changed

+80
-104
lines changed

3 files changed

+80
-104
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Improved modeling of the `PyYAML` PyPI package, so we now correctly treat `CSafeLoader` and `CBaseLoader` as being safe loaders that can not lead to code execution.
Lines changed: 50 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,119 +1,72 @@
11
/**
2-
* Provides classes modeling security-relevant aspects of the PyYAML package
3-
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
2+
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
3+
* via `import yaml`)
4+
*
5+
* See
6+
* - https://pyyaml.org/wiki/PyYAMLDocumentation
7+
* - https://pyyaml.docsforge.com/master/documentation/
48
*/
59

610
private import python
711
private import semmle.python.dataflow.new.DataFlow
812
private import semmle.python.dataflow.new.RemoteFlowSources
913
private import semmle.python.Concepts
14+
private import semmle.python.ApiGraphs
1015

16+
/**
17+
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
18+
* via `import yaml`)
19+
*
20+
* See
21+
* - https://pyyaml.org/wiki/PyYAMLDocumentation
22+
* - https://pyyaml.docsforge.com/master/documentation/
23+
*/
1124
private module Yaml {
12-
/** Gets a reference to the `yaml` module. */
13-
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
14-
t.start() and
15-
result = DataFlow::importNode("yaml")
16-
or
17-
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
18-
}
19-
20-
/** Gets a reference to the `yaml` module. */
21-
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
25+
/**
26+
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
27+
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
28+
*
29+
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
30+
*/
31+
private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
32+
override CallNode node;
33+
string func_name;
2234

23-
/** Provides models for the `yaml` module. */
24-
module yaml {
25-
/**
26-
* Gets a reference to the attribute `attr_name` of the `yaml` module.
27-
* WARNING: Only holds for a few predefined attributes.
28-
*
29-
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
30-
*/
31-
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
32-
attr_name in [
33-
// functions
35+
YamlLoadCall() {
36+
func_name in [
3437
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
35-
"safe_load", "safe_load_all",
36-
// Classes
37-
"SafeLoader", "BaseLoader"
38+
"safe_load", "safe_load_all"
3839
] and
39-
(
40-
t.start() and
41-
result = DataFlow::importNode("yaml." + attr_name)
42-
or
43-
t.startInAttr(attr_name) and
44-
result = yaml()
45-
)
46-
or
47-
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
48-
// we have inlined that code and forced a join
49-
exists(DataFlow::TypeTracker t2 |
50-
exists(DataFlow::StepSummary summary |
51-
yaml_attr_first_join(t2, attr_name, result, summary) and
52-
t = t2.append(summary)
53-
)
54-
)
55-
}
56-
57-
pragma[nomagic]
58-
private predicate yaml_attr_first_join(
59-
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
60-
) {
61-
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
40+
this = API::moduleImport("yaml").getMember(func_name).getACall()
6241
}
6342

6443
/**
65-
* Gets a reference to the attribute `attr_name` of the `yaml` module.
66-
* WARNING: Only holds for a few predefined attributes.
67-
*
68-
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
44+
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
45+
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
46+
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
47+
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
48+
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
6949
*/
70-
DataFlow::Node yaml_attr(string attr_name) {
71-
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
50+
override predicate mayExecuteInput() {
51+
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
52+
or
53+
func_name in ["load", "load_all"] and
54+
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
55+
// then the default loader will be used, which is not safe.
56+
not exists(DataFlow::Node loader_arg |
57+
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
58+
|
59+
loader_arg =
60+
API::moduleImport("yaml")
61+
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
62+
.getAUse()
63+
)
7264
}
73-
}
74-
}
7565

76-
/**
77-
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
78-
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
79-
*
80-
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
81-
*/
82-
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
83-
override CallNode node;
84-
string func_name;
66+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
8567

86-
YamlLoadCall() {
87-
func_name in [
88-
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
89-
"safe_load", "safe_load_all"
90-
] and
91-
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
92-
}
68+
override DataFlow::Node getOutput() { result = this }
9369

94-
/**
95-
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
96-
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
97-
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
98-
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
99-
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
100-
*/
101-
override predicate mayExecuteInput() {
102-
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
103-
or
104-
func_name in ["load", "load_all"] and
105-
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
106-
// then the default loader will be used, which is not safe.
107-
not exists(DataFlow::Node loader_arg |
108-
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
109-
|
110-
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
111-
)
70+
override string getFormat() { result = "YAML" }
11271
}
113-
114-
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
115-
116-
override DataFlow::Node getOutput() { result = this }
117-
118-
override string getFormat() { result = "YAML" }
11972
}
Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,37 @@
11
import yaml
2-
from yaml import SafeLoader
32

3+
# Unsafe:
44
yaml.load(payload) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
5-
yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
6-
yaml.load(payload, Loader=SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
7-
yaml.load(payload, Loader=yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
8-
9-
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
5+
yaml.load(payload, yaml.Loader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
106
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
117
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
128

9+
# Safe:
10+
yaml.load(payload, yaml.SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
11+
yaml.load(payload, Loader=yaml.SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
12+
yaml.load(payload, yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
13+
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
14+
15+
################################################################################
16+
# load_all variants
17+
################################################################################
18+
19+
# Unsafe:
1320
yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
14-
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
1521
yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
1622
yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
23+
24+
# Safe:
25+
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
26+
27+
################################################################################
28+
# C-based loaders with `libyaml`
29+
################################################################################
30+
31+
# Unsafe:
32+
yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
33+
yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
34+
35+
# Safe:
36+
yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
37+
yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML

0 commit comments

Comments
 (0)