Skip to content

Commit c7b7a88

Browse files
Add support for funky column names with polars lazy=True (duckdb#33)
Fixes: duckdb/duckdb#18875
2 parents cb2c9b4 + d1b01d8 commit c7b7a88

File tree

2 files changed

+45
-3
lines changed

2 files changed

+45
-3
lines changed

duckdb/polars_io.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,18 @@ def _pl_operation_to_sql(op: str) -> str:
5858
raise NotImplementedError(op)
5959

6060

61+
def _escape_sql_identifier(identifier: str) -> str:
62+
"""
63+
Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
64+
65+
Example:
66+
>>> _escape_sql_identifier('column"name')
67+
'"column""name"'
68+
"""
69+
escaped = identifier.replace('"', '""')
70+
return f'"{escaped}"'
71+
72+
6173
def _pl_tree_to_sql(tree: dict) -> str:
6274
"""
6375
Recursively convert a Polars expression tree (as JSON) to a SQL string.
@@ -95,7 +107,8 @@ def _pl_tree_to_sql(tree: dict) -> str:
95107
)
96108
if node_type == "Column":
97109
# A reference to a column name
98-
return subtree
110+
# Wrap in quotes to handle special characters
111+
return _escape_sql_identifier(subtree)
99112

100113
if node_type in ("Literal", "Dyn"):
101114
# Recursively process dynamic or literal values
@@ -196,7 +209,7 @@ def source_generator(
196209
duck_predicate = None
197210
relation_final = relation
198211
if with_columns is not None:
199-
cols = ",".join(with_columns)
212+
cols = ",".join(map(_escape_sql_identifier, with_columns))
200213
relation_final = relation_final.project(cols)
201214
if n_rows is not None:
202215
relation_final = relation_final.limit(n_rows)
@@ -213,7 +226,6 @@ def source_generator(
213226
while True:
214227
try:
215228
record_batch = results.read_next_batch()
216-
df = pl.from_arrow(record_batch)
217229
if predicate is not None and duck_predicate is None:
218230
# We have a predicate, but did not manage to push it down, we fallback here
219231
yield pl.from_arrow(record_batch).filter(predicate)

tests/fast/arrow/test_polars.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,36 @@ def test_polars_lazy(self, duckdb_cursor):
131131
]
132132
assert lazy_df.filter(pl.col("b") < 32).select('a').collect().to_dicts() == [{'a': 'Mark'}, {'a': 'Thijs'}]
133133

134+
def test_polars_column_with_tricky_name(self, duckdb_cursor):
135+
# Test that a polars DataFrame with a column name that is non standard still works
136+
df_colon = pl.DataFrame({"x:y": [1, 2]})
137+
lf = duckdb_cursor.sql("from df_colon").pl(lazy=True)
138+
result = lf.select(pl.all()).collect()
139+
assert result.to_dicts() == [{"x:y": 1}, {"x:y": 2}]
140+
result = lf.select(pl.all()).filter(pl.col("x:y") == 1).collect()
141+
assert result.to_dicts() == [{"x:y": 1}]
142+
143+
df_space = pl.DataFrame({"x y": [1, 2]})
144+
lf = duckdb_cursor.sql("from df_space").pl(lazy=True)
145+
result = lf.select(pl.all()).collect()
146+
assert result.to_dicts() == [{"x y": 1}, {"x y": 2}]
147+
result = lf.select(pl.all()).filter(pl.col("x y") == 1).collect()
148+
assert result.to_dicts() == [{"x y": 1}]
149+
150+
df_dot = pl.DataFrame({"x.y": [1, 2]})
151+
lf = duckdb_cursor.sql("from df_dot").pl(lazy=True)
152+
result = lf.select(pl.all()).collect()
153+
assert result.to_dicts() == [{"x.y": 1}, {"x.y": 2}]
154+
result = lf.select(pl.all()).filter(pl.col("x.y") == 1).collect()
155+
assert result.to_dicts() == [{"x.y": 1}]
156+
157+
df_quote = pl.DataFrame({'"xy"': [1, 2]})
158+
lf = duckdb_cursor.sql("from df_quote").pl(lazy=True)
159+
result = lf.select(pl.all()).collect()
160+
assert result.to_dicts() == [{'"xy"': 1}, {'"xy"': 2}]
161+
result = lf.select(pl.all()).filter(pl.col('"xy"') == 1).collect()
162+
assert result.to_dicts() == [{'"xy"': 1}]
163+
134164
@pytest.mark.parametrize(
135165
'data_type',
136166
[

0 commit comments

Comments
 (0)