From 21bc2c42eddd425c0fe54338e868eb9f341b7344 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 07:15:46 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITICAL]?= =?UTF-8?q?=20Fix=20SQL=20injection=20in=20metadata=5Fgenerator.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fetch and use table schema columns as allowlist for filtering dictionary keys before dynamic SQL insert construction to prevent SQL injection. - Added journaling to .jules/sentinel.md detailing the vulnerability, learning, and prevention mechanisms. Co-authored-by: thebearwithabite <216692431+thebearwithabite@users.noreply.github.com> --- .jules/sentinel.md | 10 ++++++++++ metadata_generator.py | 14 ++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 4bd30c3..fbe8b61 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -19,3 +19,13 @@ **Prevention:** 1. Always convert `pathlib.Path` objects to absolute strings using `str(path.absolute())` before passing them as arguments to `subprocess.run`. 2. Absolute paths always begin with a directory separator (`/` on Unix) or a drive letter (`C:\` on Windows), guaranteeing the command-line tool parses them as file paths rather than flags or options. + +## 2024-05-30 - SQL Injection via Dictionary Keys in Dynamic Inserts + +**Vulnerability:** The `save_file_metadata` method in `metadata_generator.py` constructed an `INSERT OR REPLACE INTO` query by directly formatting the keys of the user-provided `metadata` dictionary into the column names string `({column_names})`. This allowed an attacker to inject arbitrary SQL statements if they could control the keys of the `metadata` dictionary. + +**Learning:** When dynamically generating SQL queries (such as `INSERT` or `UPDATE` with dynamically generated column names), standard parameterized queries (using `?`) only protect the values. The column names themselves must be strictly validated. Relying on the structure of a dictionary is insufficient if the dictionary can be externally manipulated. + +**Prevention:** +1. Always validate dictionary keys against a strict allowlist of allowed columns before dynamically constructing SQL queries based on those keys. +2. The schema definitions retrieved dynamically via `PRAGMA table_info(table_name)` can serve as an effective allowlist. diff --git a/metadata_generator.py b/metadata_generator.py index ffa9d33..17f1fb8 100644 --- a/metadata_generator.py +++ b/metadata_generator.py @@ -467,9 +467,19 @@ def save_file_metadata(self, metadata: Dict[str, Any]) -> bool: try: with sqlite3.connect(self.db_path) as conn: + # Fetch valid columns to prevent SQL injection + cursor = conn.execute("PRAGMA table_info(file_metadata)") + valid_columns = {row[1] for row in cursor.fetchall()} + + # Filter metadata to only include valid columns + filtered_metadata = {k: v for k, v in metadata.items() if k in valid_columns} + + if not filtered_metadata: + return False + # Convert to database format - columns = list(metadata.keys()) - values = list(metadata.values()) + columns = list(filtered_metadata.keys()) + values = list(filtered_metadata.values()) placeholders = ', '.join(['?' for _ in values]) column_names = ', '.join(columns)