diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 181c29db4..357971275 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -409,13 +409,30 @@ def select(self, *exprs: Expr | str) -> DataFrame: def drop(self, *columns: str) -> DataFrame: """Drop arbitrary amount of columns. + Column names are case-sensitive and do not require double quotes like + other operations such as `select`. Leading and trailing double quotes + are allowed and will be automatically stripped if present. + Args: - columns: Column names to drop from the dataframe. + columns: Column names to drop from the dataframe. Both ``column_name`` + and ``"column_name"`` are accepted. Returns: DataFrame with those columns removed in the projection. + + Example Usage:: + + df.drop('ID_For_Students') # Works + df.drop('"ID_For_Students"') # Also works (quotes stripped) """ - return DataFrame(self.df.drop(*columns)) + normalized_columns = [] + for col in columns: + if col.startswith('"') and col.endswith('"'): + normalized_columns.append(col.strip('"')) # Strip double quotes + else: + normalized_columns.append(col) + + return DataFrame(self.df.drop(*normalized_columns)) def filter(self, *predicates: Expr) -> DataFrame: """Return a DataFrame for which ``predicate`` evaluates to ``True``. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 343d32a92..ba799f55e 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -217,6 +217,16 @@ def test_select(df): assert result.column(1) == pa.array([1, 2, 3]) +def test_drop_quoted_columns(): + ctx = SessionContext() + batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], names=["ID_For_Students"]) + df = ctx.create_dataframe([[batch]]) + + # Both should work + assert df.drop('"ID_For_Students"').schema().names == [] + assert df.drop("ID_For_Students").schema().names == [] + + def test_select_mixed_expr_string(df): df = df.select(column("b"), "a")