Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion python/datafusion/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ def with_column(self, name: str, expr: Expr) -> DataFrame:
"""
return DataFrame(self.df.with_column(name, expr.expr))

@deprecated(
"with_column_renamed() is deprecated. Use :py:meth:`~DataFrame.rename` instead"
)
def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame:
r"""Rename one column by applying a new projection.

Expand All @@ -175,7 +178,23 @@ def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame:
Returns:
DataFrame with the column renamed.
"""
return DataFrame(self.df.with_column_renamed(old_name, new_name))
return DataFrame(self.df.rename({old_name: new_name}))

def rename(self, mapping: dict[str, str]) -> DataFrame:
r"""Rename one or multiple columns by applying a new projection.
Comment on lines +180 to +181
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about with_columns_renamed to be more clear what this is doing? I could imagine users confusing this changing a table's name, even though the dataframe doesn't have a name associated with it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would argue rename is quite clear if you are acquainted with pandas and pola-rs API.

With columns renamed is quite verbose ^^

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to hear other people's opinions. Also I don't recommend we stray too far from what the upstream DataFusion uses for naming conventions.


This is a no-op if the column to be renamed does not exist.

The method supports case sensitive rename with wrapping column name
into one the following symbols (" or ' or \`).

Args:
mapping (dict[str, str]): mapping of old (key) to new (value) names

Returns:
DataFrame with one or multiple columns renamed.
"""
return DataFrame(self.df.rename(mapping))

def aggregate(
self, group_by: list[Expr] | Expr, aggs: list[Expr] | Expr
Expand Down
4 changes: 2 additions & 2 deletions python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ def test_with_column(df):
assert result.column(2) == pa.array([5, 7, 9])


def test_with_column_renamed(df):
df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum")
def test_rename(df):
df = df.with_column("c", column("a") + column("b")).rename({"c": "sum"})

result = df.collect()[0]

Expand Down
14 changes: 7 additions & 7 deletions src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use std::collections::HashMap;
use std::ffi::CString;
use std::sync::Arc;

Expand Down Expand Up @@ -180,14 +181,13 @@ impl PyDataFrame {
Ok(Self::new(df))
}

/// Rename one column by applying a new projection. This is a no-op if the column to be
/// Rename single or multiple columns by applying a new projection. This is a no-op if the column to be
/// renamed does not exist.
fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyResult<Self> {
let df = self
.df
.as_ref()
.clone()
.with_column_renamed(old_name, new_name)?;
fn rename(&self, mapping: HashMap<String, String>) -> PyResult<Self> {
let mut df = self.df.as_ref().clone();
for (old_name, new_name) in mapping.iter() {
df = df.with_column_renamed(old_name, new_name)?
}
Ok(Self::new(df))
}

Expand Down