Skip to content

Commit 9eed17f

Browse files
authored
Merge pull request github#5152 from RasmusWL/improve-pyyaml-support
Python: Improve pyyaml support
2 parents c5ae8d2 + 6e2445c commit 9eed17f

File tree

4 files changed

+41
-5
lines changed

4 files changed

+41
-5
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Improved modeling of the `PyYAML` PyPI package (imported as `yaml`) now includes `safe_load`, `unsafe_load`, and `full_load` (as well as the `..._load_all` functions). In the current version of PyYAML (5.4.1), only `safe_load` and `safe_load_all` are known to be safe from code execution exploits. Consequently, calls to the other functions are modeled as sinks of the _Deserializing untrusted input_ (`py/unsafe-deserialization`) query.

python/ql/src/Security/CWE-502/UnsafeDeserialization.qhelp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ Avoid deserialization of untrusted data if at all possible. If the
2424
architecture permits it then use other formats instead of serialized objects,
2525
for example JSON.
2626
</p>
27+
<p>
28+
If you need to use YAML, use the <code>yaml.safe_load</code> function.
29+
</p>
2730
</recommendation>
2831

2932
<example>

python/ql/src/semmle/python/frameworks/Yaml.qll

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ private module Yaml {
2929
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
3030
*/
3131
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
32-
attr_name in ["load", "SafeLoader", "BaseLoader"] and
32+
attr_name in [
33+
// functions
34+
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
35+
"safe_load", "safe_load_all",
36+
// Classes
37+
"SafeLoader", "BaseLoader"
38+
] and
3339
(
3440
t.start() and
3541
result = DataFlow::importNode("yaml." + attr_name)
@@ -68,13 +74,22 @@ private module Yaml {
6874
}
6975

7076
/**
71-
* A call to `yaml.load`
77+
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
78+
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
79+
*
7280
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
7381
*/
7482
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
7583
override CallNode node;
84+
string func_name;
7685

77-
YamlLoadCall() { node.getFunction() = Yaml::yaml::yaml_attr("load").asCfgNode() }
86+
YamlLoadCall() {
87+
func_name in [
88+
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
89+
"safe_load", "safe_load_all"
90+
] and
91+
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
92+
}
7893

7994
/**
8095
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
@@ -84,10 +99,16 @@ private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
8499
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
85100
*/
86101
override predicate mayExecuteInput() {
102+
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
103+
or
104+
func_name in ["load", "load_all"] and
87105
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
88106
// then the default loader will be used, which is not safe.
89-
not node.getArgByName("Loader") =
90-
Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]).asCfgNode()
107+
not exists(DataFlow::Node loader_arg |
108+
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
109+
|
110+
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
111+
)
91112
}
92113

93114
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }

python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,15 @@
22
from yaml import SafeLoader
33

44
yaml.load(payload) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
5+
yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
56
yaml.load(payload, Loader=SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
67
yaml.load(payload, Loader=yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
8+
9+
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
10+
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
11+
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
12+
13+
yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
14+
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
15+
yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
16+
yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput

0 commit comments

Comments
 (0)