From 74ae4ec9d660e9c0a5bf7ace65248fedfe3d97ef Mon Sep 17 00:00:00 2001 From: Michal Baumgartner Date: Tue, 11 Nov 2025 12:52:35 +0100 Subject: [PATCH 1/4] fix: Escaping in SQL templates for `qmark` parameter style --- deepnote_toolkit/sql/jinjasql_utils.py | 23 ++++++++++++++--------- tests/unit/test_jinjasql_utils.py | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/deepnote_toolkit/sql/jinjasql_utils.py b/deepnote_toolkit/sql/jinjasql_utils.py index df34521..a0a9552 100644 --- a/deepnote_toolkit/sql/jinjasql_utils.py +++ b/deepnote_toolkit/sql/jinjasql_utils.py @@ -20,13 +20,13 @@ def render_jinja_sql_template(template, param_style=None): str: The rendered SQL query. """ - escaped_template = _escape_jinja_template(template) - # Default to pyformat for backwards compatibility # Note: Some databases like Trino require "qmark" or "format" style - jinja_sql = JinjaSql( - param_style=param_style if param_style is not None else "pyformat" - ) + effective_param_style = param_style if param_style is not None else "pyformat" + + escaped_template = _escape_jinja_template(template, effective_param_style) + + jinja_sql = JinjaSql(param_style=effective_param_style) parsed_content = jinja_sql.env.parse(escaped_template) required_variables = meta.find_undeclared_variables(parsed_content) jinja_sql_data = { @@ -40,9 +40,14 @@ def _get_variable_value(variable_name): return getattr(__main__, variable_name) -def _escape_jinja_template(template): +def _escape_jinja_template(template, param_style: str = "pyformat"): # see https://github.com/sripathikrishnan/jinjasql/issues/28 and https://stackoverflow.com/q/8657508/2761695 # we have to replace % by %% in the SQL query due to how SQL alchemy interprets % - # but only if the { is not preceded by { or followed by }, because those are jinja blocks - # we use lookbehind ?<= and lookahead ?= regex matchers to capture the { and } symbols - return re.sub(r"(?<=[^{])%(?=[^}])", "%%", template) + # but ONLY for param styles that use % (format and pyformat) + # For other param styles (qmark), % has no special meaning + # and should not be escaped (e.g., in date format strings like '%m-%d-%Y') + if param_style in ("format", "pyformat"): + # Only escape % if it's not part of a jinja block (not preceded by { or followed by }) + # we use lookbehind ?<= and lookahead ?= regex matchers to capture the { and } symbols + return re.sub(r"(?<=[^{])%(?=[^}])", "%%", template) + return template diff --git a/tests/unit/test_jinjasql_utils.py b/tests/unit/test_jinjasql_utils.py index b9efec1..2f0317b 100644 --- a/tests/unit/test_jinjasql_utils.py +++ b/tests/unit/test_jinjasql_utils.py @@ -89,6 +89,32 @@ def test_qmark_format(self): self.assertEqual(query.strip(), "SELECT * FROM users WHERE id = ?") self.assertEqual(bind_params, ["test"]) + def test_qmark_escaping(self): + template = "SELECT date_format(TIMESTAMP '2022-10-20 05:10:00', '%m-%d-%Y %H')" + + query, bind_params = render_jinja_sql_template(template, param_style="qmark") + + self.assertEqual(query, template) + self.assertEqual(bind_params, []) + + def test_pyformat_escaping(self): + query, bind_params = render_jinja_sql_template( + "SELECT '% character'", + param_style="pyformat", + ) + + self.assertEqual(query, "SELECT '%% character'") + self.assertEqual(bind_params, {}) + + def test_format_escaping(self): + query, bind_params = render_jinja_sql_template( + "SELECT '% character'", + param_style="format", + ) + + self.assertEqual(query, "SELECT '%% character'") + self.assertEqual(bind_params, []) + if __name__ == "__main__": unittest.main() From d75c32f838c861424a899b0ea044a5b45be62d29 Mon Sep 17 00:00:00 2001 From: Michal Baumgartner Date: Tue, 11 Nov 2025 14:29:21 +0100 Subject: [PATCH 2/4] fix: Align DuckDB with `qmark` fixes --- deepnote_toolkit/sql/sql_execution.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/deepnote_toolkit/sql/sql_execution.py b/deepnote_toolkit/sql/sql_execution.py index 940fded..dc78281 100644 --- a/deepnote_toolkit/sql/sql_execution.py +++ b/deepnote_toolkit/sql/sql_execution.py @@ -160,6 +160,10 @@ class ExecuteSqlError(Exception): } param_style = dialect_param_styles.get(url_obj.drivername) + if requires_duckdb and param_style is None: + # DuckDB uses the DB-API qmark style (`?` placeholders) + param_style = "qmark" + skip_template_render = re.search( "^snowflake.*host=.*.proxy.cloud.getdbt.com", sql_alchemy_dict["url"] ) @@ -294,10 +298,7 @@ def _execute_sql_with_caching( ): # duckdb SQL is not cached, so we can skip the logic below for duckdb if requires_duckdb: - # duckdb requires % to be unescaped, but other dialects require it to be escaped as %% - # https://docs.sqlalchemy.org/en/14/faq/sqlexpressions.html#why-are-percent-signs-being-doubled-up-when-stringifying-sql-statements - query_unescaped = query % () if query else query - dataframe = execute_duckdb_sql(query_unescaped, bind_params) + dataframe = execute_duckdb_sql(query, bind_params) # for Chained SQL we return the dataframe with the SQL source attached as DeepnoteQueryPreview object if return_variable_type == "query_preview": return _convert_dataframe_to_query_preview(dataframe, query_preview_source) From ca88d1ed7a36b0be50a40a564d21023937cc5f8f Mon Sep 17 00:00:00 2001 From: Michal Baumgartner Date: Tue, 11 Nov 2025 15:15:32 +0100 Subject: [PATCH 3/4] chore: Clean up parameter style handling for DuckDB dialect --- deepnote_toolkit/sql/sql_execution.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/deepnote_toolkit/sql/sql_execution.py b/deepnote_toolkit/sql/sql_execution.py index dc78281..02892dc 100644 --- a/deepnote_toolkit/sql/sql_execution.py +++ b/deepnote_toolkit/sql/sql_execution.py @@ -156,14 +156,11 @@ class ExecuteSqlError(Exception): url_obj = make_url(sql_alchemy_dict["url"]) # Mapping of SQLAlchemy dialect names to their required param_style dialect_param_styles = { - "trino": "qmark", # Trino requires ? placeholders with list/tuple params + "trino": "qmark", # Trino only supports qmark style + "deepnote+duckdb": "qmark", # DuckDB officially recommends qmark style (doesn't support pyformat) } param_style = dialect_param_styles.get(url_obj.drivername) - if requires_duckdb and param_style is None: - # DuckDB uses the DB-API qmark style (`?` placeholders) - param_style = "qmark" - skip_template_render = re.search( "^snowflake.*host=.*.proxy.cloud.getdbt.com", sql_alchemy_dict["url"] ) From 1217282f211df8564f2121ed5a94b6d1a0b014b5 Mon Sep 17 00:00:00 2001 From: Michal Baumgartner Date: Tue, 11 Nov 2025 15:23:09 +0100 Subject: [PATCH 4/4] chore: Cover default DuckDB parameter style handling with test --- tests/unit/test_sql_execution.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/test_sql_execution.py b/tests/unit/test_sql_execution.py index f17543e..a8e7664 100644 --- a/tests/unit/test_sql_execution.py +++ b/tests/unit/test_sql_execution.py @@ -60,6 +60,19 @@ def test_duckdb_concat_with_percentage_sign(self): self.assertEqual(result.iloc[0]["percentage_string"], "25.5%") self.assertNotEqual(result.iloc[0]["percentage_string"], "25.5%%") + def test_duckdb_defaults_to_qmark_param_style(self): + os.environ["SQL_DEEPNOTE_DATAFRAME_SQL"] = ( + '{"url":"deepnote+duckdb:///:memory:","params":{},"param_style":null}' + ) + + result = execute_sql( + "SELECT '%' as value", + "SQL_DEEPNOTE_DATAFRAME_SQL", + ) + + assert result is not None + self.assertEqual(result.iloc[0]["value"], "%") + @mock.patch("deepnote_toolkit.sql.sql_execution._execute_sql_on_engine") @mock.patch("sqlalchemy.engine.create_engine") def test_delete_sql_that_doesnt_produce_a_dataframe(