Skip to content

Commit 9478faf

Browse files
authored
Merge pull request #6967 from RasmusWL/ruamel.yaml
Python: Model `ruamel.yaml` PyPI package
2 parents cee80f7 + 89e713a commit 9478faf

File tree

11 files changed

+232
-8
lines changed

11 files changed

+232
-8
lines changed

docs/codeql/support/reusables/frameworks.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,10 @@ Python built-in support
158158
Flask, Web framework
159159
Tornado, Web framework
160160
Twisted, Web framework
161-
PyYAML, Serialization
161+
starlette, Asynchronous Server Gateway Interface (ASGI)
162162
dill, Serialization
163+
PyYAML, Serialization
164+
ruamel.yaml, Serialization
163165
simplejson, Serialization
164166
ujson, Serialization
165167
fabric, Utility library
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Added modeling of the `ruamel.yaml` PyPI package, resulting in additional sinks for the _Deserializing untrusted input_ (`py/unsafe-deserialization`) query (since `ruamel.yaml.load` can lead to code execution).

python/ql/lib/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ private import semmle.python.frameworks.Peewee
2525
private import semmle.python.frameworks.Psycopg2
2626
private import semmle.python.frameworks.PyMySQL
2727
private import semmle.python.frameworks.Rsa
28+
private import semmle.python.frameworks.RuamelYaml
2829
private import semmle.python.frameworks.Simplejson
2930
private import semmle.python.frameworks.SqlAlchemy
3031
private import semmle.python.frameworks.Stdlib
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `ruamel.yaml` PyPI package
3+
*
4+
* See
5+
* - https://pypi.org/project/ruamel.yaml/
6+
*/
7+
8+
private import python
9+
private import semmle.python.dataflow.new.DataFlow
10+
private import semmle.python.Concepts
11+
private import semmle.python.ApiGraphs
12+
13+
/**
14+
* Provides models for the `ruamel.yaml` PyPI package.
15+
*
16+
* See
17+
* - https://pypi.org/project/ruamel.yaml/
18+
*/
19+
private module RuamelYaml {
20+
// Note: `ruamel.yaml` is a fork of the `PyYAML` PyPI package, so that's why the
21+
// interface is so similar.
22+
/**
23+
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `safe_load`, `safe_load_all`)
24+
*
25+
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
26+
*/
27+
private class RuamelYamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
28+
string func_name;
29+
30+
RuamelYamlLoadCall() {
31+
func_name in ["load", "load_all", "safe_load", "safe_load_all"] and
32+
this = API::moduleImport("ruamel").getMember("yaml").getMember(func_name).getACall()
33+
}
34+
35+
override predicate mayExecuteInput() {
36+
func_name in ["load", "load_all"] and
37+
// If the `Loader` argument is not set, the default loader will be used, which is
38+
// not safe. The only safe loaders are `SafeLoader` or `BaseLoader` (and their
39+
// variants with C implementation).
40+
not exists(DataFlow::Node loader_arg |
41+
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
42+
|
43+
loader_arg =
44+
API::moduleImport("ruamel")
45+
.getMember("yaml")
46+
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
47+
.getAUse()
48+
)
49+
}
50+
51+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("stream")] }
52+
53+
override DataFlow::Node getOutput() { result = this }
54+
55+
override string getFormat() { result = "YAML" }
56+
}
57+
}

python/ql/lib/semmle/python/frameworks/Yaml.qll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
private import python
1111
private import semmle.python.dataflow.new.DataFlow
12-
private import semmle.python.dataflow.new.RemoteFlowSources
1312
private import semmle.python.Concepts
1413
private import semmle.python.ApiGraphs
1514

@@ -41,11 +40,17 @@ private module Yaml {
4140
}
4241

4342
/**
44-
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
45-
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
46-
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
47-
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
48-
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
43+
* This function was thought safe from the 5.1 release in 2017, when the default
44+
* loader was changed to `FullLoader` (see
45+
* https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation).
46+
*
47+
* In 2020 new exploits were found, meaning it's not safe. With the 6.0 release (see
48+
* https://github.com/yaml/pyyaml/commit/8cdff2c80573b8be8e8ad28929264a913a63aa33),
49+
* when using `load` and `load_all` you are now required to specify a Loader. But
50+
* from what I (@RasmusWL) can gather, `FullLoader` is not to be considered safe,
51+
* although known exploits have been mitigated (is at least my impression). Also see
52+
* https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389 for more
53+
* details.
4954
*/
5055
override predicate mayExecuteInput() {
5156
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
@@ -63,7 +68,7 @@ private module Yaml {
6368
)
6469
}
6570

66-
override DataFlow::Node getAnInput() { result = this.getArg(0) }
71+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("stream")] }
6772

6873
override DataFlow::Node getOutput() { result = this }
6974

python/ql/test/library-tests/frameworks/ruamel.yaml/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import ruamel.yaml
2+
3+
# Unsafe:
4+
ruamel.yaml.load(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
5+
ruamel.yaml.load(stream=payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
6+
ruamel.yaml.load(payload, ruamel.yaml.Loader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
7+
8+
# Safe:
9+
ruamel.yaml.load(payload, ruamel.yaml.SafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
10+
ruamel.yaml.load(payload, Loader=ruamel.yaml.SafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
11+
ruamel.yaml.load(payload, ruamel.yaml.BaseLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
12+
ruamel.yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.safe_load(..) decodeFormat=YAML
13+
14+
################################################################################
15+
# load_all variants
16+
################################################################################
17+
18+
# Unsafe:
19+
ruamel.yaml.load_all(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load_all(..) decodeFormat=YAML decodeMayExecuteInput
20+
21+
# Safe:
22+
ruamel.yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.safe_load_all(..) decodeFormat=YAML
23+
24+
################################################################################
25+
# C-based loaders with `libyaml`
26+
################################################################################
27+
28+
# Unsafe:
29+
ruamel.yaml.load(payload, ruamel.yaml.CLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
30+
31+
# Safe:
32+
ruamel.yaml.load(payload, ruamel.yaml.CSafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
33+
ruamel.yaml.load(payload, ruamel.yaml.CBaseLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python3
2+
3+
# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
4+
# doesn't have to be annotated
5+
6+
# This file is just a Proof of Concept for how code execution can be triggered.
7+
8+
import os
9+
import ruamel.yaml
10+
11+
class Exploit(object):
12+
def __reduce__(self):
13+
return (os.system, ('ls',))
14+
15+
data = Exploit()
16+
serialized_data = ruamel.yaml.dump(data)
17+
18+
# All these will execute `ls`
19+
print("!!! ruamel.yaml.load")
20+
ruamel.yaml.load(serialized_data)
21+
22+
print("!!! ruamel.yaml.load kwarg")
23+
ruamel.yaml.load(stream=serialized_data)
24+
25+
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.Loader")
26+
ruamel.yaml.load(serialized_data, ruamel.yaml.Loader)
27+
28+
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.UnsafeLoader")
29+
ruamel.yaml.load(serialized_data, ruamel.yaml.UnsafeLoader)
30+
31+
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.CLoader")
32+
ruamel.yaml.load(serialized_data, ruamel.yaml.CLoader)
33+
34+
# you need to iterate through the result for it to execute... but it still works
35+
print("!!! ruamel.yaml.load_all")
36+
for _ in ruamel.yaml.load_all(serialized_data):
37+
pass
38+
39+
# check that the safe version is actually safe
40+
print("\n" + "-"*80)
41+
print("safe versions")
42+
print("-" * 80)
43+
44+
print("!!! ruamel.yaml.safe_load")
45+
try:
46+
ruamel.yaml.safe_load(serialized_data)
47+
raise Exception("should not happen")
48+
except ruamel.yaml.constructor.ConstructorError:
49+
pass
50+
51+
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.SafeLoader")
52+
try:
53+
ruamel.yaml.load(serialized_data, ruamel.yaml.SafeLoader)
54+
raise Exception("should not happen")
55+
except ruamel.yaml.constructor.ConstructorError:
56+
pass
57+
58+
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.CSafeLoader")
59+
try:
60+
ruamel.yaml.load(serialized_data, ruamel.yaml.CSafeLoader)
61+
raise Exception("should not happen")
62+
except ruamel.yaml.constructor.ConstructorError:
63+
pass

python/ql/test/library-tests/frameworks/yaml/Decoding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
# Unsafe:
44
yaml.load(payload) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
5+
yaml.load(stream=payload) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
56
yaml.load(payload, yaml.Loader) # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
67
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=yaml.unsafe_load(..) decodeFormat=YAML decodeMayExecuteInput
78
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=yaml.full_load(..) decodeFormat=YAML decodeMayExecuteInput

0 commit comments

Comments
 (0)