From fab7e2eac5c581acd578f55c4196550b4b1e39ec Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Sat, 12 Oct 2024 12:07:20 +0200 Subject: [PATCH 1/3] refactor: rename to rename --- python/datafusion/dataframe.py | 21 ++++++++++++++++++++- python/tests/test_dataframe.py | 4 ++-- src/dataframe.rs | 14 +++++++------- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index c5ac0bb89..b8d4f5e16 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -160,6 +160,9 @@ def with_column(self, name: str, expr: Expr) -> DataFrame: """ return DataFrame(self.df.with_column(name, expr.expr)) + @deprecated( + "with_column_renamed() is deprecated. Use :py:meth:`~DataFrame.rename` instead" + ) def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: r"""Rename one column by applying a new projection. @@ -175,7 +178,23 @@ def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: Returns: DataFrame with the column renamed. """ - return DataFrame(self.df.with_column_renamed(old_name, new_name)) + return DataFrame(self.df.rename({old_name: new_name})) + + def rename(self, mapping: dict[str, str]) -> DataFrame: + r"""Rename one or multiple columns by applying a new projection. + + This is a no-op if the column to be renamed does not exist. + + The method supports case sensitive rename with wrapping column name + into one the following symbols (" or ' or \`). + + Args: + mapping (dict[str, str]): mapping of old (key) to new (value) names + + Returns: + DataFrame with one or multiple columns renamed. + """ + return DataFrame(self.df.rename(mapping)) def aggregate( self, group_by: list[Expr] | Expr, aggs: list[Expr] | Expr diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index e89c57159..461a81fc6 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -205,8 +205,8 @@ def test_with_column(df): assert result.column(2) == pa.array([5, 7, 9]) -def test_with_column_renamed(df): - df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum") +def test_rename(df): + df = df.with_column("c", column("a") + column("b")).rename({"c":"sum"}) result = df.collect()[0] diff --git a/src/dataframe.rs b/src/dataframe.rs index e77ca8425..c5a76e6db 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; use std::ffi::CString; use std::sync::Arc; @@ -180,14 +181,13 @@ impl PyDataFrame { Ok(Self::new(df)) } - /// Rename one column by applying a new projection. This is a no-op if the column to be + /// Rename single or multiple columns by applying a new projection. This is a no-op if the column to be /// renamed does not exist. - fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyResult { - let df = self - .df - .as_ref() - .clone() - .with_column_renamed(old_name, new_name)?; + fn rename(&self, mapping: HashMap) -> PyResult { + let mut df = self.df.as_ref().clone(); + for (old_name, new_name) in mapping.iter() { + df = df.with_column_renamed(old_name, new_name)? + } Ok(Self::new(df)) } From 644129931464bb310064725ba4d41589f9e5cd49 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Sun, 13 Oct 2024 09:54:54 +0200 Subject: [PATCH 2/3] chore: fmt --- python/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 461a81fc6..71d8764bc 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -206,7 +206,7 @@ def test_with_column(df): def test_rename(df): - df = df.with_column("c", column("a") + column("b")).rename({"c":"sum"}) + df = df.with_column("c", column("a") + column("b")).rename({"c": "sum"}) result = df.collect()[0] From eb3d68fb47b6c3609d05211b16490e84ccb78c78 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:33:36 +0200 Subject: [PATCH 3/3] chore: pr feedback --- python/datafusion/dataframe.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index b8d4f5e16..aa01240fc 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -160,9 +160,6 @@ def with_column(self, name: str, expr: Expr) -> DataFrame: """ return DataFrame(self.df.with_column(name, expr.expr)) - @deprecated( - "with_column_renamed() is deprecated. Use :py:meth:`~DataFrame.rename` instead" - ) def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: r"""Rename one column by applying a new projection. @@ -189,7 +186,7 @@ def rename(self, mapping: dict[str, str]) -> DataFrame: into one the following symbols (" or ' or \`). Args: - mapping (dict[str, str]): mapping of old (key) to new (value) names + mapping: mapping of old (key) to new (value) names Returns: DataFrame with one or multiple columns renamed.