Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/ql/lib/change-notes/2025-11-26-model-ParseResult.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added taint flow model and type model for `urllib.parseurl`.
6 changes: 5 additions & 1 deletion python/ql/lib/semmle/python/frameworks/Stdlib.model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ extensions:
- ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs
- ["urllib", "Member[parse].Member[parse_qs]", "Argument[0,qs:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
- ["urllib", "Member[parse].Member[urlparse]", "Argument[0,urlstring:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote
- ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus
Expand Down Expand Up @@ -181,7 +183,9 @@ extensions:
- addsTo:
pack: codeql/python-all
extensible: typeModel
data: []
data:
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
- ["urllib.parse.ParseResult~Subclass", 'urllib', 'Member[parse].Member[urlparse]']

- addsTo:
pack: codeql/python-all
Expand Down
61 changes: 61 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,67 @@ module Stdlib {
}
}

/**
* Provides models for the `urllib.parse.ParseResult` class
*
* See https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.ParseResult.
*/
module ParseResult {
/** Gets a reference to the `urllib.parse.ParseResult` class. */
API::Node classRef() {
result = API::moduleImport("urllib").getMember("parse").getMember("ParseResult")
or
result = ModelOutput::getATypeNode("urllib.parse.ParseResult~Subclass").getASubclass*()
}

/**
* A source of instances of `urllib.parse.ParseResult`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `ParseResult::instance()` to get references to instances of `urllib.parse.ParseResult`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }

/** A direct instantiation of `urllib.parse.ParseResult`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}

/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}

/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }

/**
* Taint propagation for `urllib.parse.ParseResult`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "urllib.parse.ParseResult" }

override DataFlow::Node getInstance() { result = instance() }

override string getAttributeName() {
result in [
"netloc", "path", "params", "query", "fragment", "username", "password", "hostname",
"port"
]
}

override string getMethodName() { none() }

override string getAsyncMethodName() { none() }
}
}

// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
edges
| test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | |
| test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | |
| test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | |
| test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | |
| test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | |
| test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:85 |
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:86 |
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | |
| test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ edges
| flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | provenance | |
| flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | provenance | |
| flask_tests.py:31:18:31:24 | ControlFlowNode for request | flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | provenance | AdditionalTaintStep |
| http_test.py:5:16:5:19 | ControlFlowNode for self | http_test.py:6:45:6:53 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | http_test.py:7:40:7:56 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | provenance | |
| http_test.py:6:45:6:53 | ControlFlowNode for Attribute | http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | provenance | MaD:77 |
| http_test.py:7:9:7:14 | ControlFlowNode for params | http_test.py:8:23:8:28 | ControlFlowNode for params | provenance | |
| http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | http_test.py:7:9:7:14 | ControlFlowNode for params | provenance | |
| http_test.py:7:40:7:56 | ControlFlowNode for Attribute | http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | provenance | MaD:76 |
| http_test.py:8:9:8:19 | ControlFlowNode for input_value | http_test.py:12:40:12:50 | ControlFlowNode for input_value | provenance | |
| http_test.py:8:23:8:28 | ControlFlowNode for params | http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | provenance | dict.get |
| http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | http_test.py:8:9:8:19 | ControlFlowNode for input_value | provenance | |
| wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | provenance | |
| wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:7:5:7:9 | ControlFlowNode for h_val | provenance | |
| wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | provenance | |
Expand All @@ -28,6 +38,17 @@ nodes
| flask_tests.py:31:18:31:24 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | semmle.label | ControlFlowNode for rfs_header |
| flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | semmle.label | ControlFlowNode for rfs_header |
| http_test.py:5:16:5:19 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
| http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | semmle.label | ControlFlowNode for parsed_path |
| http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| http_test.py:6:45:6:53 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| http_test.py:7:9:7:14 | ControlFlowNode for params | semmle.label | ControlFlowNode for params |
| http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| http_test.py:7:40:7:56 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| http_test.py:8:9:8:19 | ControlFlowNode for input_value | semmle.label | ControlFlowNode for input_value |
| http_test.py:8:23:8:28 | ControlFlowNode for params | semmle.label | ControlFlowNode for params |
| http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| http_test.py:12:40:12:50 | ControlFlowNode for input_value | semmle.label | ControlFlowNode for input_value |
| wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | semmle.label | ControlFlowNode for environ |
| wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | semmle.label | ControlFlowNode for h_name |
| wsgiref_tests.py:7:5:7:9 | ControlFlowNode for h_val | semmle.label | ControlFlowNode for h_val |
Expand All @@ -39,5 +60,6 @@ subpaths
| flask_tests.py:20:36:20:61 | ControlFlowNode for Subscript | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:20:36:20:61 | ControlFlowNode for Subscript | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value |
| flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value |
| flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value |
| http_test.py:12:40:12:50 | ControlFlowNode for input_value | http_test.py:5:16:5:19 | ControlFlowNode for self | http_test.py:12:40:12:50 | ControlFlowNode for input_value | This HTTP header is constructed from a $@. | http_test.py:5:16:5:19 | ControlFlowNode for self | user-provided value |
| wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | This HTTP header is constructed from a $@. | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | user-provided value |
| wsgiref_tests.py:8:42:8:46 | ControlFlowNode for h_val | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:8:42:8:46 | ControlFlowNode for h_val | This HTTP header is constructed from a $@. | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | user-provided value |
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from http.server import HTTPServer, BaseHTTPRequestHandler
Copy link

Copilot AI Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import of 'HTTPServer' is not used.

Suggested change
from http.server import HTTPServer, BaseHTTPRequestHandler
from http.server import BaseHTTPRequestHandler

Copilot uses AI. Check for mistakes.
import urllib.parse

class VulnerableHandler(BaseHTTPRequestHandler):
def do_GET(self):
parsed_path = urllib.parse.urlparse(self.path)
params = urllib.parse.parse_qs(parsed_path.query)
input_value = params.get("input", [""])[0]
# Unsafe: Directly including user input in headers
self.send_response(200)
try:
self.send_header("X-Info", input_value) # BAD
except Exception as e:
print(f"[!] Header injection failed: {e}")
self.end_headers()
self.wfile.write(b"Hello world!")


# if __name__ == "__main__":
# print("Serving vulnerable app on http://127.0.0.1:8080")
# httpd = HTTPServer(("127.0.0.1", 8080), VulnerableHandler)
# httpd.serve_forever()
Comment on lines +19 to +22
Copy link

Copilot AI Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment appears to contain commented-out code.

Suggested change
# if __name__ == "__main__":
# print("Serving vulnerable app on http://127.0.0.1:8080")
# httpd = HTTPServer(("127.0.0.1", 8080), VulnerableHandler)
# httpd.serve_forever()
# To run this server manually for testing, use:
# python -m http.server 8080 --bind 127.0.0.1
# Or implement the main block as shown in documentation.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We normally leave such a code block there, so people can easily test for actual vulnerabilities.