diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index c5ac0bb89..a9e4d4d10 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -129,6 +129,17 @@ def select(self, *exprs: Expr | str) -> DataFrame: ] return DataFrame(self.df.select(*exprs_internal)) + def drop(self, *columns: str) -> DataFrame: + """Drop arbitrary amount of columns. + + Args: + columns: Column names to drop from the dataframe. + + Returns: + DataFrame with those columns removed in the projection. + """ + return DataFrame(self.df.drop(*columns)) + def filter(self, *predicates: Expr) -> DataFrame: """Return a DataFrame for which ``predicate`` evaluates to ``True``. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index e89c57159..88c642a7d 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -169,6 +169,17 @@ def test_sort(df): assert table.to_pydict() == expected +def test_drop(df): + df = df.drop("c") + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert df.schema().names == ["a", "b"] + assert result.column(0) == pa.array([1, 2, 3]) + assert result.column(1) == pa.array([4, 5, 6]) + + def test_limit(df): df = df.limit(1) diff --git a/src/dataframe.rs b/src/dataframe.rs index e77ca8425..db243704a 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -170,6 +170,13 @@ impl PyDataFrame { Ok(Self::new(df)) } + #[pyo3(signature = (*args))] + fn drop(&self, args: Vec) -> PyResult { + let cols = args.iter().map(|s| s.as_ref()).collect::>(); + let df = self.df.as_ref().clone().drop_columns(&cols)?; + Ok(Self::new(df)) + } + fn filter(&self, predicate: PyExpr) -> PyResult { let df = self.df.as_ref().clone().filter(predicate.into())?; Ok(Self::new(df))