apache · timsaucer · Oct 22, 2024 · Oct 13, 2024 · Oct 13, 2024 · Oct 21, 2024
diff --git a/examples/import.py b/examples/import.py
@@ -28,7 +28,7 @@
 # The dictionary keys represent column names and the dictionary values
 # represent column values
 df = ctx.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 # Dataframe:
 # +---+---+
 # | a | b |
@@ -40,19 +40,19 @@
 
 # Create a datafusion DataFrame from a Python list of rows
 df = ctx.from_pylist([{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}])
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 
 # Convert pandas DataFrame to datafusion DataFrame
 pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 df = ctx.from_pandas(pandas_df)
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 
 # Convert polars DataFrame to datafusion DataFrame
 polars_df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
 df = ctx.from_polars(polars_df)
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
 
 # Convert Arrow Table to datafusion DataFrame
 arrow_table = pa.Table.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
 df = ctx.from_arrow(arrow_table)
-assert type(df) == datafusion.DataFrame
+assert type(df) is datafusion.DataFrame
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -97,6 +97,9 @@ def schema(self) -> pa.Schema:
         """
         return self.df.schema()
 
+    @deprecated(
+        "select_columns() is deprecated. Use :py:meth:`~DataFrame.select` instead"
+    )
     def select_columns(self, *args: str) -> DataFrame:
         """Filter the DataFrame by columns.
 

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
@@ -103,30 +103,28 @@ def partitioned_df():
 
 
 def test_select(df):
-    df = df.select(
+    df_1 = df.select(
         column("a") + column("b"),
         column("a") - column("b"),
     )
 
     # execute and collect the first (and only) batch
-    result = df.collect()[0]
+    result = df_1.collect()[0]
 
     assert result.column(0) == pa.array([5, 7, 9])
     assert result.column(1) == pa.array([-3, -3, -3])
 
-
-def test_select_mixed_expr_string(df):
-    df = df.select_columns(column("b"), "a")
+    df_2 = df.select("b", "a")
 
     # execute and collect the first (and only) batch
-    result = df.collect()[0]
+    result = df_2.collect()[0]
 
     assert result.column(0) == pa.array([4, 5, 6])
     assert result.column(1) == pa.array([1, 2, 3])
 
 
-def test_select_columns(df):
-    df = df.select_columns("b", "a")
+def test_select_mixed_expr_string(df):
+    df = df.select(column("b"), "a")
 
     # execute and collect the first (and only) batch
     result = df.collect()[0]