|
1 | 1 | /**
|
2 |
| - * Provides classes modeling security-relevant aspects of the PyYAML package |
3 |
| - * https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`). |
| 2 | + * Provides classes modeling security-relevant aspects of the PyYAML package (obtained |
| 3 | + * via `import yaml`) |
| 4 | + * |
| 5 | + * See |
| 6 | + * - https://pyyaml.org/wiki/PyYAMLDocumentation |
| 7 | + * - https://pyyaml.docsforge.com/master/documentation/ |
4 | 8 | */
|
5 | 9 |
|
6 | 10 | private import python
|
7 | 11 | private import semmle.python.dataflow.new.DataFlow
|
8 | 12 | private import semmle.python.dataflow.new.RemoteFlowSources
|
9 | 13 | private import semmle.python.Concepts
|
| 14 | +private import semmle.python.ApiGraphs |
10 | 15 |
|
| 16 | +/** |
| 17 | + * Provides classes modeling security-relevant aspects of the PyYAML package (obtained |
| 18 | + * via `import yaml`) |
| 19 | + * |
| 20 | + * See |
| 21 | + * - https://pyyaml.org/wiki/PyYAMLDocumentation |
| 22 | + * - https://pyyaml.docsforge.com/master/documentation/ |
| 23 | + */ |
11 | 24 | private module Yaml {
|
12 |
| - /** Gets a reference to the `yaml` module. */ |
13 |
| - private DataFlow::Node yaml(DataFlow::TypeTracker t) { |
14 |
| - t.start() and |
15 |
| - result = DataFlow::importNode("yaml") |
16 |
| - or |
17 |
| - exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t)) |
18 |
| - } |
19 |
| - |
20 |
| - /** Gets a reference to the `yaml` module. */ |
21 |
| - DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) } |
| 25 | + /** |
| 26 | + * A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`, |
| 27 | + * `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`) |
| 28 | + * |
| 29 | + * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). |
| 30 | + */ |
| 31 | + private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode { |
| 32 | + override CallNode node; |
| 33 | + string func_name; |
22 | 34 |
|
23 |
| - /** Provides models for the `yaml` module. */ |
24 |
| - module yaml { |
25 |
| - /** |
26 |
| - * Gets a reference to the attribute `attr_name` of the `yaml` module. |
27 |
| - * WARNING: Only holds for a few predefined attributes. |
28 |
| - * |
29 |
| - * For example, using `attr_name = "load"` will get all uses of `yaml.load`. |
30 |
| - */ |
31 |
| - private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) { |
32 |
| - attr_name in [ |
33 |
| - // functions |
| 35 | + YamlLoadCall() { |
| 36 | + func_name in [ |
34 | 37 | "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
|
35 |
| - "safe_load", "safe_load_all", |
36 |
| - // Classes |
37 |
| - "SafeLoader", "BaseLoader" |
| 38 | + "safe_load", "safe_load_all" |
38 | 39 | ] and
|
39 |
| - ( |
40 |
| - t.start() and |
41 |
| - result = DataFlow::importNode("yaml." + attr_name) |
42 |
| - or |
43 |
| - t.startInAttr(attr_name) and |
44 |
| - result = yaml() |
45 |
| - ) |
46 |
| - or |
47 |
| - // Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)` |
48 |
| - // we have inlined that code and forced a join |
49 |
| - exists(DataFlow::TypeTracker t2 | |
50 |
| - exists(DataFlow::StepSummary summary | |
51 |
| - yaml_attr_first_join(t2, attr_name, result, summary) and |
52 |
| - t = t2.append(summary) |
53 |
| - ) |
54 |
| - ) |
55 |
| - } |
56 |
| - |
57 |
| - pragma[nomagic] |
58 |
| - private predicate yaml_attr_first_join( |
59 |
| - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary |
60 |
| - ) { |
61 |
| - DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary) |
| 40 | + this = API::moduleImport("yaml").getMember(func_name).getACall() |
62 | 41 | }
|
63 | 42 |
|
64 | 43 | /**
|
65 |
| - * Gets a reference to the attribute `attr_name` of the `yaml` module. |
66 |
| - * WARNING: Only holds for a few predefined attributes. |
67 |
| - * |
68 |
| - * For example, using `attr_name = "load"` will get all uses of `yaml.load`. |
| 44 | + * This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`. |
| 45 | + * In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0 |
| 46 | + * (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389). |
| 47 | + * Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution. |
| 48 | + * See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details. |
69 | 49 | */
|
70 |
| - DataFlow::Node yaml_attr(string attr_name) { |
71 |
| - result = yaml_attr(DataFlow::TypeTracker::end(), attr_name) |
| 50 | + override predicate mayExecuteInput() { |
| 51 | + func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"] |
| 52 | + or |
| 53 | + func_name in ["load", "load_all"] and |
| 54 | + // If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all, |
| 55 | + // then the default loader will be used, which is not safe. |
| 56 | + not exists(DataFlow::Node loader_arg | |
| 57 | + loader_arg in [this.getArg(1), this.getArgByName("Loader")] |
| 58 | + | |
| 59 | + loader_arg = |
| 60 | + API::moduleImport("yaml") |
| 61 | + .getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"]) |
| 62 | + .getAUse() |
| 63 | + ) |
72 | 64 | }
|
73 |
| - } |
74 |
| -} |
75 | 65 |
|
76 |
| -/** |
77 |
| - * A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`, |
78 |
| - * `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`) |
79 |
| - * |
80 |
| - * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). |
81 |
| - */ |
82 |
| -private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode { |
83 |
| - override CallNode node; |
84 |
| - string func_name; |
| 66 | + override DataFlow::Node getAnInput() { result = this.getArg(0) } |
85 | 67 |
|
86 |
| - YamlLoadCall() { |
87 |
| - func_name in [ |
88 |
| - "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all", |
89 |
| - "safe_load", "safe_load_all" |
90 |
| - ] and |
91 |
| - node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode() |
92 |
| - } |
| 68 | + override DataFlow::Node getOutput() { result = this } |
93 | 69 |
|
94 |
| - /** |
95 |
| - * This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`. |
96 |
| - * In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0 |
97 |
| - * (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389). |
98 |
| - * Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution. |
99 |
| - * See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details. |
100 |
| - */ |
101 |
| - override predicate mayExecuteInput() { |
102 |
| - func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"] |
103 |
| - or |
104 |
| - func_name in ["load", "load_all"] and |
105 |
| - // If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all, |
106 |
| - // then the default loader will be used, which is not safe. |
107 |
| - not exists(DataFlow::Node loader_arg | |
108 |
| - loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")] |
109 |
| - | |
110 |
| - loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]) |
111 |
| - ) |
| 70 | + override string getFormat() { result = "YAML" } |
112 | 71 | }
|
113 |
| - |
114 |
| - override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) } |
115 |
| - |
116 |
| - override DataFlow::Node getOutput() { result = this } |
117 |
| - |
118 |
| - override string getFormat() { result = "YAML" } |
119 | 72 | }
|
0 commit comments