Skip to content

Commit a251639

Browse files
escape unescape sharp, single quote, double quote
1 parent 9b375be commit a251639

File tree

2 files changed

+90
-9
lines changed

2 files changed

+90
-9
lines changed

pandas/core/frame.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
cast,
3535
overload,
3636
)
37+
import urllib.parse
3738
import warnings
3839

3940
import numpy as np
@@ -4554,14 +4555,8 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
45544555
For other characters that fall outside the ASCII range (U+0001..U+007F)
45554556
and those that are not further specified in PEP 3131,
45564557
the query parser will raise an error.
4557-
This excludes whitespace different than the space character,
4558-
but also the hashtag (as it is used for comments) and the backtick
4559-
itself (backtick can also not be escaped).
4560-
4561-
In a special case, quotes that make a pair around a backtick can
4562-
confuse the parser.
4563-
For example, ```it's` > `that's``` will raise an error,
4564-
as it forms a quoted string (``'s > `that'``) with a backtick inside.
4558+
This excludes whitespace different than the space character
4559+
and the backtick itself (backtick cannot be escaped).
45654560
45664561
See also the `Python documentation about lexical analysis
45674562
<https://docs.python.org/3/reference/lexical_analysis.html>`__
@@ -4615,7 +4610,35 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
46154610
raise ValueError(msg)
46164611
kwargs["level"] = kwargs.pop("level", 0) + 1
46174612
kwargs["target"] = None
4618-
res = self.eval(expr, **kwargs)
4613+
4614+
# GH 59285
4615+
if any(("#" in col) or ("'" in col) or ('"' in col) for col in self.columns):
4616+
# Create a copy of `self` with column names escaped
4617+
escaped_self = self.copy()
4618+
escaped_self.columns = [
4619+
urllib.parse.quote(col) for col in escaped_self.columns
4620+
]
4621+
4622+
# In expr, escape column names between backticks
4623+
column_name_to_escaped_name = {
4624+
col: urllib.parse.quote(col) for col in self.columns
4625+
}
4626+
escaped_expr = "`".join(
4627+
(column_name_to_escaped_name.get(token, token) if (i % 2) else token)
4628+
for i, token in enumerate(expr.split("`"))
4629+
)
4630+
4631+
# eval
4632+
escaped_res = escaped_self.eval(escaped_expr, **kwargs)
4633+
4634+
# If `res` is a Series or DataFrame, unescape names
4635+
res = escaped_res.copy()
4636+
if isinstance(res, Series) and res.name:
4637+
res.name = urllib.parse.unquote(res.name)
4638+
elif isinstance(res, DataFrame):
4639+
res.columns = [urllib.parse.unquote(col) for col in res.columns]
4640+
else:
4641+
res = self.eval(expr, **kwargs)
46194642

46204643
try:
46214644
result = self.loc[res]

pandas/tests/computation/test_eval.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,6 +1978,64 @@ def test_eval_no_support_column_name(request, column):
19781978
tm.assert_frame_equal(result, expected)
19791979

19801980

1981+
def test_query_on_column_name_with_hashtag_character():
1982+
# GH 59285
1983+
df = DataFrame((1, 2, 3), columns=["a#"])
1984+
result = df.query("`a#` < 2")
1985+
expected = df[df["a#"] < 2]
1986+
tm.assert_frame_equal(result, expected)
1987+
1988+
1989+
def test_query_on_expr_with_comment():
1990+
# GH 59285
1991+
df = DataFrame((1, 2, 3), columns=["a#"])
1992+
result = df.query("`a#` < 2 # This is a comment")
1993+
expected = df[df["a#"] < 2]
1994+
tm.assert_frame_equal(result, expected)
1995+
1996+
1997+
def test_query_on_column_names_with_single_quote_character():
1998+
df = DataFrame(
1999+
[
2000+
{"it's": 1, "that's": 2},
2001+
{"it's": 3, "that's": 4},
2002+
{"it's": -1, "that's": -2},
2003+
{"it's": -3, "that's": -4},
2004+
]
2005+
)
2006+
result = df.query("`it's` < `that's`")
2007+
expected = df[df["it's"] < df["that's"]]
2008+
tm.assert_frame_equal(result, expected)
2009+
2010+
2011+
def test_query_on_column_names_with_double_quote_character():
2012+
df = DataFrame(
2013+
[
2014+
{'it"s': 1, 'that"s': 2},
2015+
{'it"s': 3, 'that"s': 4},
2016+
{'it"s': -1, 'that"s': -2},
2017+
{'it"s': -3, 'that"s': -4},
2018+
]
2019+
)
2020+
result = df.query('`it"s` < `that"s`')
2021+
expected = df[df['it"s'] < df['that"s']]
2022+
tm.assert_frame_equal(result, expected)
2023+
2024+
2025+
def test_query_on_column_names_with_single_quote_and_double_quote_character():
2026+
df = DataFrame(
2027+
[
2028+
{"it's": 1, 'that\'s "nice"': 2},
2029+
{"it's": 3, 'that\'s "nice"': 4},
2030+
{"it's": -1, 'that\'s "nice"': -2},
2031+
{"it's": -3, 'that\'s "nice"': -4},
2032+
]
2033+
)
2034+
result = df.query("`it's` < `that's \"nice\"`")
2035+
expected = df[df["it's"] < df['that\'s "nice"']]
2036+
tm.assert_frame_equal(result, expected)
2037+
2038+
19812039
def test_set_inplace():
19822040
# https://github.com/pandas-dev/pandas/issues/47449
19832041
# Ensure we don't only update the DataFrame inplace, but also the actual

0 commit comments

Comments
 (0)