diff --git a/python/ql/lib/change-notes/2025-11-26-model-ParseResult.md b/python/ql/lib/change-notes/2025-11-26-model-ParseResult.md new file mode 100644 index 000000000000..9d4616436133 --- /dev/null +++ b/python/ql/lib/change-notes/2025-11-26-model-ParseResult.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added taint flow model and type model for `urllib.parseurl`. \ No newline at end of file diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index cdbb451b673b..a01bf1b40ba6 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -142,6 +142,8 @@ extensions: - ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs - ["urllib", "Member[parse].Member[parse_qs]", "Argument[0,qs:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse + - ["urllib", "Member[parse].Member[urlparse]", "Argument[0,urlstring:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote - ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus @@ -181,7 +183,9 @@ extensions: - addsTo: pack: codeql/python-all extensible: typeModel - data: [] + data: + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse + - ["urllib.parse.ParseResult~Subclass", 'urllib', 'Member[parse].Member[urlparse]'] - addsTo: pack: codeql/python-all diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index c6b671e8b781..143a575c6e9c 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -245,6 +245,67 @@ module Stdlib { } } + /** + * Provides models for the `urllib.parse.ParseResult` class + * + * See https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.ParseResult. + */ + module ParseResult { + /** Gets a reference to the `urllib.parse.ParseResult` class. */ + API::Node classRef() { + result = API::moduleImport("urllib").getMember("parse").getMember("ParseResult") + or + result = ModelOutput::getATypeNode("urllib.parse.ParseResult~Subclass").getASubclass*() + } + + /** + * A source of instances of `urllib.parse.ParseResult`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `ParseResult::instance()` to get references to instances of `urllib.parse.ParseResult`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `urllib.parse.ParseResult`. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { this = classRef().getACall() } + } + + /** Gets a reference to an instance of `urllib.parse.ParseResult`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `urllib.parse.ParseResult`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * Taint propagation for `urllib.parse.ParseResult`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "urllib.parse.ParseResult" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + "netloc", "path", "params", "query", "fragment", "username", "password", "hostname", + "port" + ] + } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { none() } + } + } + // --------------------------------------------------------------------------- // logging // --------------------------------------------------------------------------- diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index 12d1b8282c73..87b07df086fa 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -1,23 +1,23 @@ edges | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | | | test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:85 | +| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:86 | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | | | test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:85 | +| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:86 | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | | | test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:85 | +| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:86 | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | | | test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:85 | +| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:86 | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | | | test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:85 | +| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:86 | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | | | test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config | diff --git a/python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/HeaderInjection.expected b/python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/HeaderInjection.expected index b5f4ff549c41..cea505fe39db 100644 --- a/python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/HeaderInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/HeaderInjection.expected @@ -11,6 +11,16 @@ edges | flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | provenance | | | flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | provenance | | | flask_tests.py:31:18:31:24 | ControlFlowNode for request | flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | provenance | AdditionalTaintStep | +| http_test.py:5:16:5:19 | ControlFlowNode for self | http_test.py:6:45:6:53 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | http_test.py:7:40:7:56 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | provenance | | +| http_test.py:6:45:6:53 | ControlFlowNode for Attribute | http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | provenance | MaD:77 | +| http_test.py:7:9:7:14 | ControlFlowNode for params | http_test.py:8:23:8:28 | ControlFlowNode for params | provenance | | +| http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | http_test.py:7:9:7:14 | ControlFlowNode for params | provenance | | +| http_test.py:7:40:7:56 | ControlFlowNode for Attribute | http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | provenance | MaD:76 | +| http_test.py:8:9:8:19 | ControlFlowNode for input_value | http_test.py:12:40:12:50 | ControlFlowNode for input_value | provenance | | +| http_test.py:8:23:8:28 | ControlFlowNode for params | http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | provenance | dict.get | +| http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | http_test.py:8:9:8:19 | ControlFlowNode for input_value | provenance | | | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | provenance | | | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:7:5:7:9 | ControlFlowNode for h_val | provenance | | | wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | provenance | | @@ -28,6 +38,17 @@ nodes | flask_tests.py:31:18:31:24 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | semmle.label | ControlFlowNode for rfs_header | | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | semmle.label | ControlFlowNode for rfs_header | +| http_test.py:5:16:5:19 | ControlFlowNode for self | semmle.label | ControlFlowNode for self | +| http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | semmle.label | ControlFlowNode for parsed_path | +| http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| http_test.py:6:45:6:53 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| http_test.py:7:9:7:14 | ControlFlowNode for params | semmle.label | ControlFlowNode for params | +| http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| http_test.py:7:40:7:56 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| http_test.py:8:9:8:19 | ControlFlowNode for input_value | semmle.label | ControlFlowNode for input_value | +| http_test.py:8:23:8:28 | ControlFlowNode for params | semmle.label | ControlFlowNode for params | +| http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| http_test.py:12:40:12:50 | ControlFlowNode for input_value | semmle.label | ControlFlowNode for input_value | | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | semmle.label | ControlFlowNode for environ | | wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | semmle.label | ControlFlowNode for h_name | | wsgiref_tests.py:7:5:7:9 | ControlFlowNode for h_val | semmle.label | ControlFlowNode for h_val | @@ -39,5 +60,6 @@ subpaths | flask_tests.py:20:36:20:61 | ControlFlowNode for Subscript | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:20:36:20:61 | ControlFlowNode for Subscript | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value | | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value | | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value | +| http_test.py:12:40:12:50 | ControlFlowNode for input_value | http_test.py:5:16:5:19 | ControlFlowNode for self | http_test.py:12:40:12:50 | ControlFlowNode for input_value | This HTTP header is constructed from a $@. | http_test.py:5:16:5:19 | ControlFlowNode for self | user-provided value | | wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | This HTTP header is constructed from a $@. | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | user-provided value | | wsgiref_tests.py:8:42:8:46 | ControlFlowNode for h_val | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:8:42:8:46 | ControlFlowNode for h_val | This HTTP header is constructed from a $@. | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/http_test.py b/python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/http_test.py new file mode 100644 index 000000000000..cf46219fc59c --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/http_test.py @@ -0,0 +1,22 @@ +from http.server import HTTPServer, BaseHTTPRequestHandler +import urllib.parse + +class VulnerableHandler(BaseHTTPRequestHandler): + def do_GET(self): + parsed_path = urllib.parse.urlparse(self.path) + params = urllib.parse.parse_qs(parsed_path.query) + input_value = params.get("input", [""])[0] + # Unsafe: Directly including user input in headers + self.send_response(200) + try: + self.send_header("X-Info", input_value) # BAD + except Exception as e: + print(f"[!] Header injection failed: {e}") + self.end_headers() + self.wfile.write(b"Hello world!") + + +# if __name__ == "__main__": +# print("Serving vulnerable app on http://127.0.0.1:8080") +# httpd = HTTPServer(("127.0.0.1", 8080), VulnerableHandler) +# httpd.serve_forever()