Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# The dictionary keys represent column names and the dictionary values
# represent column values
df = ctx.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
assert type(df) == datafusion.DataFrame
assert type(df) is datafusion.DataFrame
# Dataframe:
# +---+---+
# | a | b |
Expand All @@ -40,19 +40,19 @@

# Create a datafusion DataFrame from a Python list of rows
df = ctx.from_pylist([{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}])
assert type(df) == datafusion.DataFrame
assert type(df) is datafusion.DataFrame

# Convert pandas DataFrame to datafusion DataFrame
pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df = ctx.from_pandas(pandas_df)
assert type(df) == datafusion.DataFrame
assert type(df) is datafusion.DataFrame

# Convert polars DataFrame to datafusion DataFrame
polars_df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df = ctx.from_polars(polars_df)
assert type(df) == datafusion.DataFrame
assert type(df) is datafusion.DataFrame

# Convert Arrow Table to datafusion DataFrame
arrow_table = pa.Table.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
df = ctx.from_arrow(arrow_table)
assert type(df) == datafusion.DataFrame
assert type(df) is datafusion.DataFrame
3 changes: 3 additions & 0 deletions python/datafusion/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ def schema(self) -> pa.Schema:
"""
return self.df.schema()

@deprecated(
"select_columns() is deprecated. Use :py:meth:`~DataFrame.select` instead"
)
def select_columns(self, *args: str) -> DataFrame:
"""Filter the DataFrame by columns.

Expand Down
14 changes: 6 additions & 8 deletions python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,30 +103,28 @@ def partitioned_df():


def test_select(df):
df = df.select(
df_1 = df.select(
column("a") + column("b"),
column("a") - column("b"),
)

# execute and collect the first (and only) batch
result = df.collect()[0]
result = df_1.collect()[0]

assert result.column(0) == pa.array([5, 7, 9])
assert result.column(1) == pa.array([-3, -3, -3])


def test_select_mixed_expr_string(df):
df = df.select_columns(column("b"), "a")
df_2 = df.select("b", "a")

# execute and collect the first (and only) batch
result = df.collect()[0]
result = df_2.collect()[0]

assert result.column(0) == pa.array([4, 5, 6])
assert result.column(1) == pa.array([1, 2, 3])


def test_select_columns(df):
df = df.select_columns("b", "a")
def test_select_mixed_expr_string(df):
df = df.select(column("b"), "a")

# execute and collect the first (and only) batch
result = df.collect()[0]
Expand Down
Loading