From 4ab9283a797973467ad84f1b855854787c63ef52 Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Mon, 4 Aug 2025 11:51:23 +0200 Subject: [PATCH 01/11] Python: Updated PathInjection tests to use inline test expectations --- .../DataflowQueryTest.expected | 2 - .../DataflowQueryTest.ql | 4 -- .../PathInjection.expected | 42 +++++++++---------- .../CWE-022-PathInjection/PathInjection.qlref | 3 +- .../flask_path_injection.py | 4 +- .../CWE-022-PathInjection/path_injection.py | 32 +++++++------- .../CWE-022-PathInjection/pathlib_use.py | 6 +-- .../Security/CWE-022-PathInjection/test.py | 10 ++--- 8 files changed, 49 insertions(+), 54 deletions(-) delete mode 100644 python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.expected delete mode 100644 python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.ql diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.expected b/python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.expected deleted file mode 100644 index 2fad7bb9a843..000000000000 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.expected +++ /dev/null @@ -1,2 +0,0 @@ -missingAnnotationOnSink -testFailures diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.ql b/python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.ql deleted file mode 100644 index e4720596a37c..000000000000 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/DataflowQueryTest.ql +++ /dev/null @@ -1,4 +0,0 @@ -import python -import utils.test.dataflow.DataflowQueryTest -import semmle.python.security.dataflow.PathInjectionQuery -import FromTaintTrackingStateConfig diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected index 37e4dd927d89..368400d505d9 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected @@ -1,3 +1,24 @@ +#select +| flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:31:14:31:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:31:14:31:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:48:14:48:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:48:14:48:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:65:14:65:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:65:14:65:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:87:18:87:37 | ControlFlowNode for possibly_unsafe_path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:87:18:87:37 | ControlFlowNode for possibly_unsafe_path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:94:14:94:17 | ControlFlowNode for path | path_injection.py:91:20:91:25 | ControlFlowNode for foo_id | path_injection.py:94:14:94:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:91:20:91:25 | ControlFlowNode for foo_id | user-provided value | +| path_injection.py:102:14:102:17 | ControlFlowNode for path | path_injection.py:98:20:98:22 | ControlFlowNode for foo | path_injection.py:102:14:102:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:98:20:98:22 | ControlFlowNode for foo | user-provided value | +| path_injection.py:113:14:113:17 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:113:14:113:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:124:14:124:17 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:124:14:124:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:132:14:132:22 | ControlFlowNode for sanitized | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:132:14:132:22 | ControlFlowNode for sanitized | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:142:14:142:17 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:142:14:142:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| path_injection.py:152:18:152:21 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:152:18:152:21 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| pathlib_use.py:14:5:14:5 | ControlFlowNode for p | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | pathlib_use.py:14:5:14:5 | ControlFlowNode for p | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| test.py:19:10:19:10 | ControlFlowNode for x | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:19:10:19:10 | ControlFlowNode for x | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| test.py:26:10:26:10 | ControlFlowNode for y | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:26:10:26:10 | ControlFlowNode for y | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| test.py:33:14:33:14 | ControlFlowNode for x | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:33:14:33:14 | ControlFlowNode for x | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | +| test.py:49:14:49:14 | ControlFlowNode for y | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:49:14:49:14 | ControlFlowNode for y | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | edges | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | provenance | | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | flask_path_injection.py:19:15:19:21 | ControlFlowNode for request | provenance | | @@ -252,24 +273,3 @@ nodes subpaths | test.py:25:19:25:19 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:25:9:25:20 | ControlFlowNode for normalize() | | test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:48:13:48:24 | ControlFlowNode for normalize() | -#select -| flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:31:14:31:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:31:14:31:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:48:14:48:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:48:14:48:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:65:14:65:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:65:14:65:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:87:18:87:37 | ControlFlowNode for possibly_unsafe_path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:87:18:87:37 | ControlFlowNode for possibly_unsafe_path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:94:14:94:17 | ControlFlowNode for path | path_injection.py:91:20:91:25 | ControlFlowNode for foo_id | path_injection.py:94:14:94:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:91:20:91:25 | ControlFlowNode for foo_id | user-provided value | -| path_injection.py:102:14:102:17 | ControlFlowNode for path | path_injection.py:98:20:98:22 | ControlFlowNode for foo | path_injection.py:102:14:102:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:98:20:98:22 | ControlFlowNode for foo | user-provided value | -| path_injection.py:113:14:113:17 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:113:14:113:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:124:14:124:17 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:124:14:124:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:132:14:132:22 | ControlFlowNode for sanitized | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:132:14:132:22 | ControlFlowNode for sanitized | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:142:14:142:17 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:142:14:142:17 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| path_injection.py:152:18:152:21 | ControlFlowNode for path | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:152:18:152:21 | ControlFlowNode for path | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| pathlib_use.py:14:5:14:5 | ControlFlowNode for p | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | pathlib_use.py:14:5:14:5 | ControlFlowNode for p | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| test.py:19:10:19:10 | ControlFlowNode for x | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:19:10:19:10 | ControlFlowNode for x | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| test.py:26:10:26:10 | ControlFlowNode for y | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:26:10:26:10 | ControlFlowNode for y | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| test.py:33:14:33:14 | ControlFlowNode for x | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:33:14:33:14 | ControlFlowNode for x | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | -| test.py:49:14:49:14 | ControlFlowNode for y | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:49:14:49:14 | ControlFlowNode for y | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.qlref b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.qlref index d43482cc509e..6a680f6d5ff5 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.qlref +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.qlref @@ -1 +1,2 @@ -Security/CWE-022/PathInjection.ql +query: Security/CWE-022/PathInjection.ql +postprocess: utils/test/InlineExpectationsTestQuery.ql diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/flask_path_injection.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/flask_path_injection.py index f64d76a3b4da..f289bcc94ec9 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/flask_path_injection.py +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/flask_path_injection.py @@ -1,4 +1,4 @@ -from flask import Flask, request, send_from_directory +from flask import Flask, request, send_from_directory # $Source app = Flask(__name__) @@ -18,4 +18,4 @@ def download_file(): def download_file(): dirname = request.args.get('dirname', '') filename = request.args.get('filename', '') - return send_from_directory(dirname, filename) # $result=BAD result=OK(filename) + return send_from_directory(dirname, filename) # $ Alert diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/path_injection.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/path_injection.py index 39c170374802..37ff20b7de13 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/path_injection.py +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/path_injection.py @@ -1,6 +1,6 @@ import os -from flask import Flask, request +from flask import Flask, request # $ Source app = Flask(__name__) @@ -10,7 +10,7 @@ @app.route("/path1") def path_injection(): filename = request.args.get('filename', '') - f = open(os.path.join(STATIC_DIR, filename)) # $result=BAD + f = open(os.path.join(STATIC_DIR, filename)) # $ Alert @app.route("/path2") @@ -18,7 +18,7 @@ def path_injection(): # Normalized, but not checked filename = request.args.get('filename', '') npath = os.path.normpath(os.path.join(STATIC_DIR, filename)) - f = open(npath) # $result=BAD + f = open(npath) # $ Alert @app.route("/path3") @@ -28,7 +28,7 @@ def unsafe_path_normpath(): npath = os.path.normpath(os.path.join(STATIC_DIR, filename)) if npath.startswith(STATIC_DIR): pass - f = open(npath) # $result=BAD + f = open(npath) # $ Alert @app.route("/path4") @@ -45,7 +45,7 @@ def unsafe_path_realpath(): # Normalized (by `realpath` that also follows symlinks), but not checked properly filename = request.args.get('filename', '') npath = os.path.realpath(os.path.join(STATIC_DIR, filename)) - f = open(npath) # $result=BAD + f = open(npath) # $ Alert @app.route("/path6") @@ -62,7 +62,7 @@ def unsafe_path_abspath(): # Normalized (by `abspath`), but not checked properly filename = request.args.get('filename', '') npath = os.path.abspath(os.path.join(STATIC_DIR, filename)) - f = open(npath) # $result=BAD + f = open(npath) # $ Alert @app.route("/path7") @@ -84,22 +84,22 @@ def safe_path_abspath_tricky(): filename = request.args.get('filename', '') possibly_unsafe_path = os.path.join(STATIC_DIR, filename) if os.path.abspath(possibly_unsafe_path).startswith(STATIC_DIR): - f = open(possibly_unsafe_path) # $SPURIOUS: result=BAD + f = open(possibly_unsafe_path) # $ SPURIOUS: Alert @app.route("/int-only/") -def flask_int_only(foo_id): +def flask_int_only(foo_id): # $ SPURIOUS: Source # This is OK, since the flask routing ensures that `foo_id` MUST be an integer. path = os.path.join(STATIC_DIR, foo_id) - f = open(path) # $spurious: result=BAD + f = open(path) # $ SPURIOUS: Alert @app.route("/not-path/") -def flask_not_path(foo): +def flask_not_path(foo): # $ Source # On UNIX systems, this is OK, since without being marked as ``, flask # routing ensures that `foo` cannot contain forward slashes (not by using %2F either). path = os.path.join(STATIC_DIR, foo) - f = open(path) # $result=BAD // OK if only running on UNIX systems, NOT OK if could be running on windows + f = open(path) # $ Alert // OK if only running on UNIX systems, NOT OK if could be running on windows @app.route("/no-dot-dot") @@ -110,7 +110,7 @@ def no_dot_dot(): # handle if `filename` is an absolute path if '../' in path: return "not this time" - f = open(path) # $result=BAD + f = open(path) # $ Alert @app.route("/no-dot-dot-with-prefix") @@ -121,7 +121,7 @@ def no_dot_dot_with_prefix(): # Therefore, for UNIX-only programs, the `../` check is enough to stop path injections. if '../' in path: return "not this time" - f = open(path) # $result=BAD // OK if only running on UNIX systems, NOT OK if could be running on windows + f = open(path) # $ Alert // OK if only running on UNIX systems, NOT OK if could be running on windows @app.route("/replace-slash") @@ -129,7 +129,7 @@ def replace_slash(): filename = request.args.get('filename', '') path = os.path.join(STATIC_DIR, filename) sanitized = path.replace("/", "_") - f = open(sanitized) # $result=BAD // OK if only running on UNIX systems, NOT OK if could be running on windows + f = open(sanitized) # $ Alert // OK if only running on UNIX systems, NOT OK if could be running on windows @app.route("/stackoverflow-solution") @@ -139,7 +139,7 @@ def stackoverflow_solution(): path = os.path.join(STATIC_DIR, filename) if os.path.commonprefix((os.path.realpath(path), STATIC_DIR)) != STATIC_DIR: return "not this time" - f = open(path) # $SPURIOUS: result=BAD + f = open(path) # $ SPURIOUS: Alert SAFE_FILES = ['foo', 'bar', 'baz'] @@ -149,4 +149,4 @@ def safe_set_of_files(): filename = request.args.get('filename', '') if filename in SAFE_FILES: path = os.path.join(STATIC_DIR, filename) - f = open(path) # $SPURIOUS: result=BAD + f = open(path) # $ SPURIOUS: Alert diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py index 6f703f903dc5..76ce1d1cc99d 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/pathlib_use.py @@ -1,6 +1,6 @@ import pathlib -from flask import Flask, request +from flask import Flask, request # $ Source app = Flask(__name__) @@ -11,7 +11,7 @@ def path_injection(): filename = request.args.get('filename', '') p = STATIC_DIR / filename - p.open() # $ result=BAD + p.open() # $ Alert p2 = pathlib.Path(STATIC_DIR, filename) - p2.open() # $ result=BAD + p2.open() # $ Alert diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/test.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/test.py index 7200cd78f454..c10c257dae53 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/test.py +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/test.py @@ -1,6 +1,6 @@ import os.path -from flask import Flask, request +from flask import Flask, request # $ Source app = Flask(__name__) @@ -16,21 +16,21 @@ def normalize(x): @app.route("/path") def simple(): x = source() - open(x) # $result=BAD + open(x) # $ Alert @app.route("/path") def normalization(): x = source() y = normalize(x) - open(y) # $result=BAD + open(y) # $ Alert @app.route("/path") def check(): x = source() if x.startswith("subfolder/"): - open(x) # $result=BAD + open(x) # $ Alert @app.route("/path") @@ -46,4 +46,4 @@ def check_then_normalize(): x = source() if x.startswith("subfolder/"): y = normalize(x) - open(y) # $result=BAD + open(y) # $ Alert From 104e20e405a5d889651ddaba0948b92ab55cc32a Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Mon, 4 Aug 2025 11:59:09 +0200 Subject: [PATCH 02/11] Python: Added extra test cases for path injection with FastAPI --- .../PathInjection.expected | 18 +++++++ .../fastapi_path_injection.py | 49 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected index 368400d505d9..46b094e024fe 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected @@ -1,4 +1,7 @@ #select +| fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | fastapi_path_injection.py:17:21:17:24 | ControlFlowNode for path | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | This path depends on a $@. | fastapi_path_injection.py:17:21:17:24 | ControlFlowNode for path | user-provided value | +| fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | fastapi_path_injection.py:26:21:26:24 | ControlFlowNode for path | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | This path depends on a $@. | fastapi_path_injection.py:26:21:26:24 | ControlFlowNode for path | user-provided value | +| fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | This path depends on a $@. | fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | user-provided value | | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | | path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | @@ -20,6 +23,13 @@ | test.py:33:14:33:14 | ControlFlowNode for x | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:33:14:33:14 | ControlFlowNode for x | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | | test.py:49:14:49:14 | ControlFlowNode for y | test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:49:14:49:14 | ControlFlowNode for y | This path depends on a $@. | test.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | edges +| fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | provenance | | +| fastapi_path_injection.py:17:21:17:24 | ControlFlowNode for path | fastapi_path_injection.py:20:34:20:37 | ControlFlowNode for path | provenance | | +| fastapi_path_injection.py:20:34:20:37 | ControlFlowNode for path | fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | provenance | | +| fastapi_path_injection.py:26:21:26:24 | ControlFlowNode for path | fastapi_path_injection.py:27:34:27:37 | ControlFlowNode for path | provenance | | +| fastapi_path_injection.py:27:34:27:37 | ControlFlowNode for path | fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | provenance | | +| fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | fastapi_path_injection.py:32:34:32:37 | ControlFlowNode for path | provenance | | +| fastapi_path_injection.py:32:34:32:37 | ControlFlowNode for path | fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | provenance | | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | provenance | | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | flask_path_injection.py:19:15:19:21 | ControlFlowNode for request | provenance | | | flask_path_injection.py:19:5:19:11 | ControlFlowNode for dirname | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | provenance | | @@ -143,6 +153,14 @@ edges | test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | provenance | | | test.py:48:23:48:23 | ControlFlowNode for x | test.py:48:13:48:24 | ControlFlowNode for normalize() | provenance | Config | nodes +| fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath | +| fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath | +| fastapi_path_injection.py:17:21:17:24 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| fastapi_path_injection.py:20:34:20:37 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| fastapi_path_injection.py:26:21:26:24 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| fastapi_path_injection.py:27:34:27:37 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| fastapi_path_injection.py:32:34:32:37 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | flask_path_injection.py:19:5:19:11 | ControlFlowNode for dirname | semmle.label | ControlFlowNode for dirname | diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py new file mode 100644 index 000000000000..8e66bf3ed4c9 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py @@ -0,0 +1,49 @@ +from fastapi import FastAPI, Depends + +app = FastAPI() + +class FileHandler: + def get_data(self, filepath: str): + with open(filepath, "r") as f: # $ Alert + return f.readline() + +file_handler = None + +def init_file_handler(): + global file_handler + file_handler = FileHandler() + +@app.get("/file/") +async def read_item(path: str): # $ Source + if file_handler is None: + init_file_handler() + return file_handler.get_data(path) + +def init_file_handler(): + return FileHandler() + +@app.get("/file2/", dependencies=[Depends(init_file_handler)]) +async def read_item(path: str, file_handler: FileHandler = Depends()): # $ Source + return file_handler.get_data(path) + + +@app.get("/file3/", dependencies=[Depends(init_file_handler)]) +async def read_item(path: str): # $ Source + return file_handler.get_data(path) + + +@app.on_event("startup") +def init_file_handler(): + app.state.file_handler1 = FileHandler() + app.state.file_handler2 = FileHandler() + +def get_data_source(): + return app.state.file_handler1 + +@app.get("/file4/") +async def read_item(path: str, data_source=Depends(get_data_source)): # $ MISSING: Source + return data_source.get_data(path) + +@app.get("/file5/", dependencies=[Depends(init_file_handler)]) +async def read_item(path: str): # $ MISSING: Source + return app.state.file_handler2.get_data(path) From a1b102117667997d1aaf0ace4e3815d92378c1d7 Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Mon, 4 Aug 2025 14:58:19 +0200 Subject: [PATCH 03/11] Python: Added extra test for global variable nested attribute reads/writes. --- .../dataflow/fieldflow/test_global.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/ql/test/library-tests/dataflow/fieldflow/test_global.py b/python/ql/test/library-tests/dataflow/fieldflow/test_global.py index 0e96b37dc33c..57fcd274a33b 100644 --- a/python/ql/test/library-tests/dataflow/fieldflow/test_global.py +++ b/python/ql/test/library-tests/dataflow/fieldflow/test_global.py @@ -146,6 +146,7 @@ def fields_with_local_flow(x): class NestedObj(object): def __init__(self): self.obj = MyObj("OK") + self.obj.foo = "default" def getObj(self): return self.obj @@ -163,6 +164,20 @@ def getObj(self): a2 = NestedObj() a2.getObj().foo = x2 SINK(a2.obj.foo) # $ flow="SOURCE, l:-3 -> a2.obj.foo" + +# Global variable +app = NestedObj() + +def init_global(): + app.obj.foo = SOURCE + +def read_global(): + return app.obj.foo + +def test_global_nested_attributes(): + init_global() + result = read_global() + SINK(result) # $ MISSING: flow="SOURCE, l:-8 -> result" # ------------------------------------------------------------------------------ # Global scope interaction From 1c4a48e79282181b0aabf3d0e629c79449bfe0af Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Mon, 4 Aug 2025 15:06:53 +0200 Subject: [PATCH 04/11] Python: Add global variable nested field jump steps --- .../dataflow/new/internal/DataFlowPrivate.qll | 55 +++++++++++++++++++ .../dataflow/fieldflow/test_global.py | 6 +- .../PathInjection.expected | 5 ++ .../fastapi_path_injection.py | 2 +- 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index b29be706c4fc..0d59cd188fee 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -556,6 +556,61 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) { nodeFrom.asCfgNode() = param.getDefault() and nodeTo.asCfgNode() = param.getDefiningNode() ) + or + // Enhanced global variable field access tracking + globalVariableNestedFieldJumpStep(nodeFrom, nodeTo) +} + +/** + * Holds if there is a jump step from `nodeFrom` to `nodeTo` through global variable field access. + * This supports tracking nested object field access through global variables like `app.obj.foo`. + */ +predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { + exists(GlobalVariable globalVar, AttrWrite write, AttrRead read | + // Match writes and reads on the same global variable attribute path + globalVariableAttrPath(globalVar, write.getObject()) and + globalVariableAttrPath(globalVar, read.getObject()) and + write.getAttributeName() = read.getAttributeName() and + nodeFrom = write.getValue() and + nodeTo = read and + write.getEnclosingCallable() != read.getEnclosingCallable() + ) +} + +/** + * Maximum depth for global variable nested attribute access. + * Depth 0 = globalVar.foo, depth 1 = globalVar.foo.bar, depth 2 = globalVar.foo.bar.baz, etc. + */ +private int getMaxGlobalVariableDepth() { result = 1 } + +/** + * Holds if `node` is an attribute access path starting from global variable `globalVar`. + * Supports configurable nesting depth via getMaxGlobalVariableDepth(). + */ +predicate globalVariableAttrPath(GlobalVariable globalVar, Node node) { + globalVariableAttrPathAtDepth(globalVar, node, _) +} + +/** + * Holds if `node` is an attribute access path starting from global variable `globalVar` at specific `depth`. + */ +predicate globalVariableAttrPathAtDepth(GlobalVariable globalVar, Node node, int depth) { + // Base case: Direct global variable access (depth 0) + depth = 0 and + exists(NameNode name | + name.getId() = globalVar.getId() and + node.asCfgNode() = name and + name.getNode().(Name).getVariable() instanceof GlobalVariable and + not exists(ClassExpr cls | cls.getName() = globalVar.getId()) + ) + or + // Recursive case: Nested attribute access (depth > 0) + exists(AttrRead attr, int parentDepth | + globalVariableAttrPathAtDepth(globalVar, attr.getObject(), parentDepth) and + node = attr and + depth = parentDepth + 1 and + depth <= getMaxGlobalVariableDepth() + ) } //-------- diff --git a/python/ql/test/library-tests/dataflow/fieldflow/test_global.py b/python/ql/test/library-tests/dataflow/fieldflow/test_global.py index 57fcd274a33b..9970c47c962f 100644 --- a/python/ql/test/library-tests/dataflow/fieldflow/test_global.py +++ b/python/ql/test/library-tests/dataflow/fieldflow/test_global.py @@ -177,18 +177,18 @@ def read_global(): def test_global_nested_attributes(): init_global() result = read_global() - SINK(result) # $ MISSING: flow="SOURCE, l:-8 -> result" + SINK(result) # $ flow="SOURCE, l:-8 -> result" # ------------------------------------------------------------------------------ # Global scope interaction # ------------------------------------------------------------------------------ def func_defined_before(): - SINK(global_obj.foo) # $ MISSING: flow="SOURCE, l:+3 -> global_obj.foo" + SINK(global_obj.foo) # $ flow="SOURCE, l:+3 -> global_obj.foo" global_obj = MyObj(NONSOURCE) global_obj.foo = SOURCE SINK(global_obj.foo) # $ flow="SOURCE, l:-1 -> global_obj.foo" def func_defined_after(): - SINK(global_obj.foo) # $ MISSING: flow="SOURCE, l:-4 -> global_obj.foo" + SINK(global_obj.foo) # $ flow="SOURCE, l:-4 -> global_obj.foo" diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected index 46b094e024fe..79b36070c017 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected @@ -2,6 +2,7 @@ | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | fastapi_path_injection.py:17:21:17:24 | ControlFlowNode for path | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | This path depends on a $@. | fastapi_path_injection.py:17:21:17:24 | ControlFlowNode for path | user-provided value | | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | fastapi_path_injection.py:26:21:26:24 | ControlFlowNode for path | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | This path depends on a $@. | fastapi_path_injection.py:26:21:26:24 | ControlFlowNode for path | user-provided value | | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | This path depends on a $@. | fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | user-provided value | +| fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | fastapi_path_injection.py:48:21:48:24 | ControlFlowNode for path | fastapi_path_injection.py:7:19:7:26 | ControlFlowNode for filepath | This path depends on a $@. | fastapi_path_injection.py:48:21:48:24 | ControlFlowNode for path | user-provided value | | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | | path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value | @@ -30,6 +31,8 @@ edges | fastapi_path_injection.py:27:34:27:37 | ControlFlowNode for path | fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | provenance | | | fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | fastapi_path_injection.py:32:34:32:37 | ControlFlowNode for path | provenance | | | fastapi_path_injection.py:32:34:32:37 | ControlFlowNode for path | fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | provenance | | +| fastapi_path_injection.py:48:21:48:24 | ControlFlowNode for path | fastapi_path_injection.py:49:45:49:48 | ControlFlowNode for path | provenance | | +| fastapi_path_injection.py:49:45:49:48 | ControlFlowNode for path | fastapi_path_injection.py:6:24:6:31 | ControlFlowNode for filepath | provenance | | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | provenance | | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | flask_path_injection.py:19:15:19:21 | ControlFlowNode for request | provenance | | | flask_path_injection.py:19:5:19:11 | ControlFlowNode for dirname | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | provenance | | @@ -161,6 +164,8 @@ nodes | fastapi_path_injection.py:27:34:27:37 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | | fastapi_path_injection.py:31:21:31:24 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | | fastapi_path_injection.py:32:34:32:37 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| fastapi_path_injection.py:48:21:48:24 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| fastapi_path_injection.py:49:45:49:48 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | | flask_path_injection.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | flask_path_injection.py:19:5:19:11 | ControlFlowNode for dirname | semmle.label | ControlFlowNode for dirname | diff --git a/python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py b/python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py index 8e66bf3ed4c9..e5d7cae762cb 100644 --- a/python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py +++ b/python/ql/test/query-tests/Security/CWE-022-PathInjection/fastapi_path_injection.py @@ -45,5 +45,5 @@ async def read_item(path: str, data_source=Depends(get_data_source)): # $ MISSIN return data_source.get_data(path) @app.get("/file5/", dependencies=[Depends(init_file_handler)]) -async def read_item(path: str): # $ MISSING: Source +async def read_item(path: str): # $ Source return app.state.file_handler2.get_data(path) From 8617ba2a82f7c7a65523e318c765fc20b05143d9 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 6 Aug 2025 14:36:47 +0000 Subject: [PATCH 05/11] Python: Keep track of access path --- .../dataflow/new/internal/DataFlowPrivate.qll | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 0d59cd188fee..5dceda1cbfcc 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -566,10 +566,12 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) { * This supports tracking nested object field access through global variables like `app.obj.foo`. */ predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { - exists(GlobalVariable globalVar, AttrWrite write, AttrRead read | + exists(ModuleVariableNode globalVar, AttrWrite write, AttrRead read | // Match writes and reads on the same global variable attribute path - globalVariableAttrPath(globalVar, write.getObject()) and - globalVariableAttrPath(globalVar, read.getObject()) and + exists(string accessPath | + globalVariableAttrPath(globalVar, accessPath, write.getObject()) and + globalVariableAttrPath(globalVar, accessPath, read.getObject()) + ) and write.getAttributeName() = read.getAttributeName() and nodeFrom = write.getValue() and nodeTo = read and @@ -587,29 +589,32 @@ private int getMaxGlobalVariableDepth() { result = 1 } * Holds if `node` is an attribute access path starting from global variable `globalVar`. * Supports configurable nesting depth via getMaxGlobalVariableDepth(). */ -predicate globalVariableAttrPath(GlobalVariable globalVar, Node node) { - globalVariableAttrPathAtDepth(globalVar, node, _) +predicate globalVariableAttrPath(ModuleVariableNode globalVar, string accessPath, Node node) { + exists(int depth | + globalVariableAttrPathAtDepth(globalVar, accessPath, node, depth) and + depth > 0 + ) } /** * Holds if `node` is an attribute access path starting from global variable `globalVar` at specific `depth`. */ -predicate globalVariableAttrPathAtDepth(GlobalVariable globalVar, Node node, int depth) { +predicate globalVariableAttrPathAtDepth( + ModuleVariableNode globalVar, string accessPath, Node node, int depth +) { // Base case: Direct global variable access (depth 0) depth = 0 and - exists(NameNode name | - name.getId() = globalVar.getId() and - node.asCfgNode() = name and - name.getNode().(Name).getVariable() instanceof GlobalVariable and - not exists(ClassExpr cls | cls.getName() = globalVar.getId()) - ) + node in [globalVar.getARead(), globalVar.getAWrite()] and + accessPath = "" or // Recursive case: Nested attribute access (depth > 0) - exists(AttrRead attr, int parentDepth | - globalVariableAttrPathAtDepth(globalVar, attr.getObject(), parentDepth) and + exists(AttrRef attr, Node n, string attrName, int parentDepth, string parentAccessPath | + attr.accesses(n, attrName) and + globalVariableAttrPathAtDepth(globalVar, parentAccessPath, n, parentDepth) and node = attr and depth = parentDepth + 1 and - depth <= getMaxGlobalVariableDepth() + depth <= getMaxGlobalVariableDepth() and + accessPath = parentAccessPath + "." + attrName ) } From 11185c304c4e792d5490cbf3e9970e4ed6ea6e2e Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 7 Aug 2025 13:17:33 +0000 Subject: [PATCH 06/11] Python: Rewrite access path computation --- .../dataflow/new/internal/DataFlowPrivate.qll | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 5dceda1cbfcc..f1d0e905d7af 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -574,8 +574,8 @@ predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { ) and write.getAttributeName() = read.getAttributeName() and nodeFrom = write.getValue() and - nodeTo = read and - write.getEnclosingCallable() != read.getEnclosingCallable() + nodeTo = read //and + //write.getEnclosingCallable() != read.getEnclosingCallable() ) } @@ -583,7 +583,7 @@ predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { * Maximum depth for global variable nested attribute access. * Depth 0 = globalVar.foo, depth 1 = globalVar.foo.bar, depth 2 = globalVar.foo.bar.baz, etc. */ -private int getMaxGlobalVariableDepth() { result = 1 } +private int getMaxGlobalVariableDepth() { result = 10 } /** * Holds if `node` is an attribute access path starting from global variable `globalVar`. @@ -592,7 +592,7 @@ private int getMaxGlobalVariableDepth() { result = 1 } predicate globalVariableAttrPath(ModuleVariableNode globalVar, string accessPath, Node node) { exists(int depth | globalVariableAttrPathAtDepth(globalVar, accessPath, node, depth) and - depth > 0 + depth >= 0 ) } @@ -607,14 +607,15 @@ predicate globalVariableAttrPathAtDepth( node in [globalVar.getARead(), globalVar.getAWrite()] and accessPath = "" or - // Recursive case: Nested attribute access (depth > 0) - exists(AttrRef attr, Node n, string attrName, int parentDepth, string parentAccessPath | - attr.accesses(n, attrName) and - globalVariableAttrPathAtDepth(globalVar, parentAccessPath, n, parentDepth) and - node = attr and + exists(Node obj, string attrName, string parentAccessPath, int parentDepth | + node.(AttrRead).accesses(obj, attrName) + or + exists(AttrWrite aw | aw.accesses(obj, attrName) and aw.getValue() = node) + | + globalVariableAttrPathAtDepth(globalVar, parentAccessPath, obj, parentDepth) and + accessPath = parentAccessPath + "." + attrName and depth = parentDepth + 1 and - depth <= getMaxGlobalVariableDepth() and - accessPath = parentAccessPath + "." + attrName + depth <= getMaxGlobalVariableDepth() ) } From 7fa224c8836656242bf476f2ae0204ed7429ae7c Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Thu, 7 Aug 2025 14:55:50 +0000 Subject: [PATCH 07/11] Python: Update `globalVariableAttrPathAtDepth` base case --- .../semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 2 +- python/ql/test/library-tests/dataflow/fieldflow/test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index f1d0e905d7af..3f4c31bfeb19 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -604,7 +604,7 @@ predicate globalVariableAttrPathAtDepth( ) { // Base case: Direct global variable access (depth 0) depth = 0 and - node in [globalVar.getARead(), globalVar.getAWrite()] and + node in [globalVar.getARead(), globalVar.getAWrite(), globalVar] and accessPath = "" or exists(Node obj, string attrName, string parentAccessPath, int parentDepth | diff --git a/python/ql/test/library-tests/dataflow/fieldflow/test.py b/python/ql/test/library-tests/dataflow/fieldflow/test.py index 1ce049917f01..f0152ad38a84 100644 --- a/python/ql/test/library-tests/dataflow/fieldflow/test.py +++ b/python/ql/test/library-tests/dataflow/fieldflow/test.py @@ -301,12 +301,12 @@ def set_to_source(): @expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..) def test_global_flow_to_class_attribute(): inst = WithTuple2() - SINK_F(WithTuple2.my_tuple[0]) + SINK_F(WithTuple2.my_tuple[0]) # $ SPURIOUS: flow="SOURCE, l:-5 -> WithTuple2.my_tuple[0]" SINK_F(inst.my_tuple[0]) set_to_source() - SINK(WithTuple2.my_tuple[0]) # $ MISSING: flow="SOURCE, l:-10 -> WithTuple2.my_tuple[0]" + SINK(WithTuple2.my_tuple[0]) # $ flow="SOURCE, l:-10 -> WithTuple2.my_tuple[0]" SINK(inst.my_tuple[0]) # $ MISSING: flow="SOURCE, l:-11 -> inst.my_tuple[0]" From 2a0431f56edfbfdb05b7a29376b464910bbe2f58 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 8 Aug 2025 12:56:28 +0000 Subject: [PATCH 08/11] Python: Add `AttrWrite.writes` and `AttrRead.reads` The latter of these is identical to `AttrRef.accesses`, but makes the API a bit more intuitive. --- .../dataflow/new/internal/Attributes.qll | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll index 51dccc29312c..58646448ed18 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll @@ -64,6 +64,15 @@ abstract class AttrRef extends Node { abstract class AttrWrite extends AttrRef { /** Gets the data flow node corresponding to the value that is written to the attribute. */ abstract Node getValue(); + + /** + * Holds if this attribute write writes the attribute named `attrName` on object `object` with + * value `value`. + */ + predicate writes(Node object, string attrName, Node value) { + this.accesses(object, attrName) and + this.getValue() = value + } } /** @@ -225,7 +234,14 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode { * - Dynamic attribute reads using `getattr`: `getattr(object, attr)` * - Qualified imports: `from module import attr as name` */ -abstract class AttrRead extends AttrRef, Node, LocalSourceNode { } +abstract class AttrRead extends AttrRef, Node, LocalSourceNode { + + /** Holds if this attribute read reads the attribute named `attrName` on the object `object`. */ + predicate reads(Node object, string attrName) { + this.accesses(object, attrName) + } + + } /** A simple attribute read, e.g. `object.attr` */ private class AttributeReadAsAttrRead extends AttrRead, CfgNode { From 485f39715b69a822a9730a571c7b1c91f1d78cc8 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 8 Aug 2025 12:57:16 +0000 Subject: [PATCH 09/11] Python: Use `AttrWrite.writes` Also applies @napalys' fix to the base case. --- .../python/dataflow/new/internal/Attributes.qll | 8 ++------ .../dataflow/new/internal/DataFlowPrivate.qll | 13 +++++++------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll index 58646448ed18..42c0c862e89b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll @@ -235,13 +235,9 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode { * - Qualified imports: `from module import attr as name` */ abstract class AttrRead extends AttrRef, Node, LocalSourceNode { - /** Holds if this attribute read reads the attribute named `attrName` on the object `object`. */ - predicate reads(Node object, string attrName) { - this.accesses(object, attrName) - } - - } + predicate reads(Node object, string attrName) { this.accesses(object, attrName) } +} /** A simple attribute read, e.g. `object.attr` */ private class AttributeReadAsAttrRead extends AttrRead, CfgNode { diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 3f4c31bfeb19..1fd54b74d7c8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -574,8 +574,7 @@ predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { ) and write.getAttributeName() = read.getAttributeName() and nodeFrom = write.getValue() and - nodeTo = read //and - //write.getEnclosingCallable() != read.getEnclosingCallable() + nodeTo = read ) } @@ -583,7 +582,7 @@ predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { * Maximum depth for global variable nested attribute access. * Depth 0 = globalVar.foo, depth 1 = globalVar.foo.bar, depth 2 = globalVar.foo.bar.baz, etc. */ -private int getMaxGlobalVariableDepth() { result = 10 } +private int getMaxGlobalVariableDepth() { result = 2 } /** * Holds if `node` is an attribute access path starting from global variable `globalVar`. @@ -604,13 +603,15 @@ predicate globalVariableAttrPathAtDepth( ) { // Base case: Direct global variable access (depth 0) depth = 0 and - node in [globalVar.getARead(), globalVar.getAWrite(), globalVar] and + // We use `globalVar` instead of `globalVar.getAWrite()` due to some weirdness with how + // attribute writes are handled in the global scope (see `GlobalAttributeAssignmentAsAttrWrite`). + node in [globalVar.getARead(), globalVar] and accessPath = "" or exists(Node obj, string attrName, string parentAccessPath, int parentDepth | - node.(AttrRead).accesses(obj, attrName) + node.(AttrRead).reads(obj, attrName) or - exists(AttrWrite aw | aw.accesses(obj, attrName) and aw.getValue() = node) + any(AttrWrite aw).writes(obj, attrName, node) | globalVariableAttrPathAtDepth(globalVar, parentAccessPath, obj, parentDepth) and accessPath = parentAccessPath + "." + attrName and From c9f495ea06a107249df75722da8b89e7ed86f40b Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Mon, 11 Aug 2025 06:45:50 +0000 Subject: [PATCH 10/11] Python: Updated doc string and removed redundant predicate. --- .../dataflow/new/internal/DataFlowPrivate.qll | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 1fd54b74d7c8..96e51cb31063 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -565,12 +565,12 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) { * Holds if there is a jump step from `nodeFrom` to `nodeTo` through global variable field access. * This supports tracking nested object field access through global variables like `app.obj.foo`. */ -predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { +private predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { exists(ModuleVariableNode globalVar, AttrWrite write, AttrRead read | // Match writes and reads on the same global variable attribute path exists(string accessPath | - globalVariableAttrPath(globalVar, accessPath, write.getObject()) and - globalVariableAttrPath(globalVar, accessPath, read.getObject()) + globalVariableAttrPathAtDepth(globalVar, accessPath, write.getObject(), _) and + globalVariableAttrPathAtDepth(globalVar, accessPath, read.getObject(), _) ) and write.getAttributeName() = read.getAttributeName() and nodeFrom = write.getValue() and @@ -580,25 +580,14 @@ predicate globalVariableNestedFieldJumpStep(Node nodeFrom, Node nodeTo) { /** * Maximum depth for global variable nested attribute access. - * Depth 0 = globalVar.foo, depth 1 = globalVar.foo.bar, depth 2 = globalVar.foo.bar.baz, etc. + * Depth 1 = globalVar.foo, depth 2 = globalVar.foo.bar, depth 3 = globalVar.foo.bar.baz, etc. */ private int getMaxGlobalVariableDepth() { result = 2 } -/** - * Holds if `node` is an attribute access path starting from global variable `globalVar`. - * Supports configurable nesting depth via getMaxGlobalVariableDepth(). - */ -predicate globalVariableAttrPath(ModuleVariableNode globalVar, string accessPath, Node node) { - exists(int depth | - globalVariableAttrPathAtDepth(globalVar, accessPath, node, depth) and - depth >= 0 - ) -} - /** * Holds if `node` is an attribute access path starting from global variable `globalVar` at specific `depth`. */ -predicate globalVariableAttrPathAtDepth( +private predicate globalVariableAttrPathAtDepth( ModuleVariableNode globalVar, string accessPath, Node node, int depth ) { // Base case: Direct global variable access (depth 0) From a2edf108fb81aa791f493751a817579f1e806d45 Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Mon, 11 Aug 2025 09:42:44 +0000 Subject: [PATCH 11/11] Python: Add change note --- .../ql/lib/change-notes/2025-08-11-jump-step-global-nested.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2025-08-11-jump-step-global-nested.md diff --git a/python/ql/lib/change-notes/2025-08-11-jump-step-global-nested.md b/python/ql/lib/change-notes/2025-08-11-jump-step-global-nested.md new file mode 100644 index 000000000000..4109bb788259 --- /dev/null +++ b/python/ql/lib/change-notes/2025-08-11-jump-step-global-nested.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Data flow tracking through global variables now supports nested field access patterns such as `global_var.obj.field`. This improves the precision of taint tracking analysis when data flows through complex global variable structures. \ No newline at end of file