|
| 1 | +/** |
| 2 | + * Provides a taint-tracking configuration for detecting "UnsafeUnpacking" vulnerabilities. |
| 3 | + */ |
| 4 | + |
| 5 | +import python |
| 6 | +import semmle.python.Concepts |
| 7 | +import semmle.python.dataflow.new.internal.DataFlowPublic |
| 8 | +import semmle.python.ApiGraphs |
| 9 | +import semmle.python.dataflow.new.TaintTracking |
| 10 | +import semmle.python.frameworks.Stdlib |
| 11 | +import semmle.python.dataflow.new.RemoteFlowSources |
| 12 | + |
| 13 | +/** |
| 14 | + * Handle those three cases of Tarfile opens: |
| 15 | + * - `tarfile.open()` |
| 16 | + * - `tarfile.TarFile()` |
| 17 | + * - `MKtarfile.Tarfile.open()` |
| 18 | + */ |
| 19 | +API::Node tarfileOpen() { |
| 20 | + result in [ |
| 21 | + API::moduleImport("tarfile").getMember(["open", "TarFile"]), |
| 22 | + API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open") |
| 23 | + ] |
| 24 | +} |
| 25 | + |
| 26 | +/** |
| 27 | + * A class for handling the previous three cases, plus the use of `closing` in with the previous cases |
| 28 | + */ |
| 29 | +class AllTarfileOpens extends API::CallNode { |
| 30 | + AllTarfileOpens() { |
| 31 | + this = tarfileOpen().getACall() |
| 32 | + or |
| 33 | + exists(API::Node closing, Node arg | |
| 34 | + closing = API::moduleImport("contextlib").getMember("closing") and |
| 35 | + this = closing.getACall() and |
| 36 | + arg = this.getArg(0) and |
| 37 | + arg = tarfileOpen().getACall() |
| 38 | + ) |
| 39 | + } |
| 40 | +} |
| 41 | + |
| 42 | +class UnsafeUnpackingConfig extends TaintTracking::Configuration { |
| 43 | + UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" } |
| 44 | + |
| 45 | + override predicate isSource(DataFlow::Node source) { |
| 46 | + // A source coming from a remote location |
| 47 | + source instanceof RemoteFlowSource |
| 48 | + or |
| 49 | + // A source coming from a CLI argparse module |
| 50 | + // see argparse: https://docs.python.org/3/library/argparse.html |
| 51 | + exists(MethodCallNode args | |
| 52 | + args = source.(AttrRead).getObject().getALocalSource() and |
| 53 | + args = |
| 54 | + API::moduleImport("argparse") |
| 55 | + .getMember("ArgumentParser") |
| 56 | + .getReturn() |
| 57 | + .getMember("parse_args") |
| 58 | + .getACall() |
| 59 | + ) |
| 60 | + or |
| 61 | + // A source catching an S3 file download |
| 62 | + // see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file |
| 63 | + source = |
| 64 | + API::moduleImport("boto3") |
| 65 | + .getMember("client") |
| 66 | + .getReturn() |
| 67 | + .getMember(["download_file", "download_fileobj"]) |
| 68 | + .getACall() |
| 69 | + .getArg(2) |
| 70 | + or |
| 71 | + // A source catching an S3 file download |
| 72 | + // see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html |
| 73 | + source = |
| 74 | + API::moduleImport("boto3") |
| 75 | + .getMember("Session") |
| 76 | + .getReturn() |
| 77 | + .getMember("client") |
| 78 | + .getReturn() |
| 79 | + .getMember(["download_file", "download_fileobj"]) |
| 80 | + .getACall() |
| 81 | + .getArg(2) |
| 82 | + or |
| 83 | + // A source download a file using wget |
| 84 | + // see wget: https://pypi.org/project/wget/ |
| 85 | + exists(API::CallNode mcn | |
| 86 | + mcn = API::moduleImport("wget").getMember("download").getACall() and |
| 87 | + if exists(mcn.getArg(1)) then source = mcn.getArg(1) else source = mcn.getReturn().asSource() |
| 88 | + ) |
| 89 | + or |
| 90 | + // catch the Django uploaded files as a source |
| 91 | + // see HttpRequest.FILES: https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.HttpRequest.FILES |
| 92 | + source.(AttrRead).getAttributeName() = "FILES" |
| 93 | + } |
| 94 | + |
| 95 | + override predicate isSink(DataFlow::Node sink) { |
| 96 | + ( |
| 97 | + // A sink capturing method calls to `unpack_archive`. |
| 98 | + sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0) |
| 99 | + or |
| 100 | + // A sink capturing method calls to `extractall` without `members` argument. |
| 101 | + // For a call to `file.extractall` without `members` argument, `file` is considered a sink. |
| 102 | + exists(MethodCallNode call, AllTarfileOpens atfo | |
| 103 | + call = atfo.getReturn().getMember("extractall").getACall() and |
| 104 | + not exists(call.getArgByName("members")) and |
| 105 | + sink = call.getObject() |
| 106 | + ) |
| 107 | + or |
| 108 | + // A sink capturing method calls to `extractall` with `members` argument. |
| 109 | + // For a call to `file.extractall` with `members` argument, `file` is considered a sink if not |
| 110 | + // a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`. |
| 111 | + // Otherwise, the argument of `members` is considered a sink. |
| 112 | + exists(MethodCallNode call, Node arg, AllTarfileOpens atfo | |
| 113 | + call = atfo.getReturn().getMember("extractall").getACall() and |
| 114 | + arg = call.getArgByName("members") and |
| 115 | + if |
| 116 | + arg.asCfgNode() instanceof NameConstantNode or |
| 117 | + arg.asCfgNode() instanceof ListNode |
| 118 | + then sink = call.getObject() |
| 119 | + else |
| 120 | + if arg.(MethodCallNode).getMethodName() = "getmembers" |
| 121 | + then sink = arg.(MethodCallNode).getObject() |
| 122 | + else sink = call.getArgByName("members") |
| 123 | + ) |
| 124 | + or |
| 125 | + // An argument to `extract` is considered a sink. |
| 126 | + exists(AllTarfileOpens atfo | |
| 127 | + sink = atfo.getReturn().getMember("extract").getACall().getArg(0) |
| 128 | + ) |
| 129 | + or |
| 130 | + //An argument to `_extract_member` is considered a sink. |
| 131 | + exists(MethodCallNode call, AllTarfileOpens atfo | |
| 132 | + call = atfo.getReturn().getMember("_extract_member").getACall() and |
| 133 | + call.getArg(1).(AttrRead).accesses(sink, "name") |
| 134 | + ) |
| 135 | + ) and |
| 136 | + not sink.getScope().getLocation().getFile().inStdlib() |
| 137 | + } |
| 138 | + |
| 139 | + override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { |
| 140 | + // Reading the response |
| 141 | + nodeTo.(MethodCallNode).calls(nodeFrom, "read") |
| 142 | + or |
| 143 | + // Open a file for access |
| 144 | + exists(MethodCallNode cn | |
| 145 | + cn.calls(nodeTo, "open") and |
| 146 | + cn.flowsTo(nodeFrom) |
| 147 | + ) |
| 148 | + or |
| 149 | + // Open a file for access using builtin |
| 150 | + exists(API::CallNode cn | |
| 151 | + cn = API::builtin("open").getACall() and |
| 152 | + nodeTo = cn.getArg(0) and |
| 153 | + cn.flowsTo(nodeFrom) |
| 154 | + ) |
| 155 | + or |
| 156 | + // Write access |
| 157 | + exists(MethodCallNode cn | |
| 158 | + cn.calls(nodeTo, "write") and |
| 159 | + nodeFrom = cn.getArg(0) |
| 160 | + ) |
| 161 | + or |
| 162 | + // Retrieve Django uploaded files |
| 163 | + // see getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist |
| 164 | + // see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks |
| 165 | + nodeTo.(MethodCallNode).calls(nodeFrom, ["getlist", "get", "chunks"]) |
| 166 | + or |
| 167 | + // Considering the use of "fs" |
| 168 | + // see fs: https://docs.djangoproject.com/en/4.1/ref/files/storage/#the-filesystemstorage-class |
| 169 | + nodeTo = |
| 170 | + API::moduleImport("django") |
| 171 | + .getMember("core") |
| 172 | + .getMember("files") |
| 173 | + .getMember("storage") |
| 174 | + .getMember("FileSystemStorage") |
| 175 | + .getReturn() |
| 176 | + .getMember(["save", "path"]) |
| 177 | + .getACall() and |
| 178 | + nodeFrom = nodeTo.(MethodCallNode).getArg(0) |
| 179 | + or |
| 180 | + // Accessing the name or raw content |
| 181 | + nodeTo.(AttrRead).accesses(nodeFrom, ["name", "raw"]) |
| 182 | + or |
| 183 | + // Join the base_dir to the filename |
| 184 | + nodeTo = API::moduleImport("os").getMember("path").getMember("join").getACall() and |
| 185 | + nodeFrom = nodeTo.(API::CallNode).getArg(1) |
| 186 | + or |
| 187 | + // Go through an Open for a Tarfile |
| 188 | + nodeTo = tarfileOpen().getACall() and nodeFrom = nodeTo.(MethodCallNode).getArg(0) |
| 189 | + or |
| 190 | + // Handle the case where the getmembers is used. |
| 191 | + nodeTo.(MethodCallNode).calls(nodeFrom, "getmembers") and |
| 192 | + nodeFrom instanceof AllTarfileOpens |
| 193 | + or |
| 194 | + // To handle the case of `with closing(tarfile.open()) as file:` |
| 195 | + // we add a step from the first argument of `closing` to the call to `closing`, |
| 196 | + // whenever that first argument is a return of `tarfile.open()`. |
| 197 | + nodeTo = API::moduleImport("contextlib").getMember("closing").getACall() and |
| 198 | + nodeFrom = nodeTo.(API::CallNode).getArg(0) and |
| 199 | + nodeFrom = tarfileOpen().getReturn().getAValueReachableFromSource() |
| 200 | + or |
| 201 | + // see Path : https://docs.python.org/3/library/pathlib.html#pathlib.Path |
| 202 | + nodeTo = API::moduleImport("pathlib").getMember("Path").getACall() and |
| 203 | + nodeFrom = nodeTo.(API::CallNode).getArg(0) |
| 204 | + or |
| 205 | + // Use of absolutepath |
| 206 | + // see absolute : https://docs.python.org/3/library/pathlib.html#pathlib.Path.absolute |
| 207 | + exists(API::CallNode mcn | |
| 208 | + mcn = API::moduleImport("pathlib").getMember("Path").getACall() and |
| 209 | + nodeTo = mcn.getAMethodCall("absolute") and |
| 210 | + nodeFrom = mcn.getArg(0) |
| 211 | + ) |
| 212 | + } |
| 213 | +} |
0 commit comments