Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions python/datafusion/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,15 @@
DataFrame only containing the specified columns.
"""
return self.select(*args)

Check failure on line 407 in python/datafusion/dataframe.py

View workflow job for this annotation

GitHub Actions / build

Ruff (W293)

python/datafusion/dataframe.py:407:1: W293 Blank line contains whitespace
def select_exprs(self, *args: str) -> DataFrame:
"""Project arbitrary list of expression strings into a new DataFrame. Method will parse string expressions into logical plan expressions.

Check failure on line 409 in python/datafusion/dataframe.py

View workflow job for this annotation

GitHub Actions / build

Ruff (E501)

python/datafusion/dataframe.py:409:89: E501 Line too long (145 > 88)

Check failure on line 409 in python/datafusion/dataframe.py

View workflow job for this annotation

GitHub Actions / build

Ruff (W505)

python/datafusion/dataframe.py:409:89: W505 Doc line too long (145 > 88)
The output DataFrame has one column for each element in exprs.
Returns:
DataFrame only containing the specified columns.
"""

Check failure on line 414 in python/datafusion/dataframe.py

View workflow job for this annotation

GitHub Actions / build

Ruff (D205)

python/datafusion/dataframe.py:409:9: D205 1 blank line required between summary line and description
return self.df.select_exprs(*args)
Comment on lines 408 to 417
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this will fail the ruff linter

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed, will follow up later if it fails


def select(self, *exprs: Expr | str) -> DataFrame:
"""Project arbitrary expressions into a new :py:class:`DataFrame`.
Expand Down
31 changes: 31 additions & 0 deletions python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,37 @@ def test_select(df):
assert result.column(1) == pa.array([1, 2, 3])


def test_select_exprs(df):
df_1 = df.select_exprs(
"a + b",
"a - b",
)

# execute and collect the first (and only) batch
result = df_1.collect()[0]

assert result.column(0) == pa.array([5, 7, 9])
assert result.column(1) == pa.array([-3, -3, -3])

df_2 = df.select_exprs("b", "a")

# execute and collect the first (and only) batch
result = df_2.collect()[0]

assert result.column(0) == pa.array([4, 5, 6])
assert result.column(1) == pa.array([1, 2, 3])

df_3 = df.select_exprs(
"abs(a + b)",
"abs(a - b)",
)

# execute and collect the first (and only) batch
result = df_3.collect()[0]

assert result.column(0) == pa.array([5, 7, 9])
assert result.column(1) == pa.array([3, 3, 3])

def test_drop_quoted_columns():
ctx = SessionContext()
batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], names=["ID_For_Students"])
Expand Down
7 changes: 7 additions & 0 deletions src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,13 @@ impl PyDataFrame {
Ok(Self::new(df))
}

#[pyo3(signature = (*args))]
fn select_exprs(&self, args: Vec<PyBackedStr>) -> PyDataFusionResult<Self> {
let args = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
let df = self.df.as_ref().clone().select_exprs(&args)?;
Ok(Self::new(df))
}

#[pyo3(signature = (*args))]
fn select(&self, args: Vec<PyExpr>) -> PyDataFusionResult<Self> {
let expr: Vec<Expr> = args.into_iter().map(|e| e.into()).collect();
Expand Down
Loading