|
21 | 21 |
|
22 | 22 | from __future__ import annotations |
23 | 23 |
|
24 | | -from typing import Any, Iterable, List, TYPE_CHECKING |
| 24 | + |
| 25 | +from typing import Any, Iterable, List, Literal, TYPE_CHECKING |
25 | 26 | from datafusion.record_batch import RecordBatchStream |
26 | 27 | from typing_extensions import deprecated |
27 | 28 | from datafusion.plan import LogicalPlan, ExecutionPlan |
@@ -129,6 +130,17 @@ def select(self, *exprs: Expr | str) -> DataFrame: |
129 | 130 | ] |
130 | 131 | return DataFrame(self.df.select(*exprs_internal)) |
131 | 132 |
|
| 133 | + def drop(self, *columns: str) -> DataFrame: |
| 134 | + """Drop arbitrary amount of columns. |
| 135 | +
|
| 136 | + Args: |
| 137 | + columns: Column names to drop from the dataframe. |
| 138 | +
|
| 139 | + Returns: |
| 140 | + DataFrame with those columns removed in the projection. |
| 141 | + """ |
| 142 | + return DataFrame(self.df.drop(*columns)) |
| 143 | + |
132 | 144 | def filter(self, *predicates: Expr) -> DataFrame: |
133 | 145 | """Return a DataFrame for which ``predicate`` evaluates to ``True``. |
134 | 146 |
|
@@ -163,14 +175,25 @@ def with_column(self, name: str, expr: Expr) -> DataFrame: |
163 | 175 | def with_columns( |
164 | 176 | self, *exprs: Expr | Iterable[Expr], **named_exprs: Expr |
165 | 177 | ) -> DataFrame: |
166 | | - """Add an additional column to the DataFrame. |
| 178 | + """Add columns to the DataFrame. |
| 179 | +
|
| 180 | + By passing expressions, iteratables of expressions, or named expressions. To |
| 181 | + pass named expressions use the form name=Expr. |
| 182 | +
|
| 183 | + Example usage: The following will add 4 columns labeled a, b, c, and d:: |
| 184 | +
|
| 185 | + df = df.with_columns( |
| 186 | + lit(0).alias('a'), |
| 187 | + [lit(1).alias('b'), lit(2).alias('c')], |
| 188 | + d=lit(3) |
| 189 | + ) |
167 | 190 |
|
168 | 191 | Args: |
169 | | - *exprs: Name of the column to add. |
170 | | - **named_exprs: Expression to compute the column. |
| 192 | + exprs: Either a single expression or an iterable of expressions to add. |
| 193 | + named_exprs: Named expressions in the form of ``name=expr`` |
171 | 194 |
|
172 | 195 | Returns: |
173 | | - DataFrame with the new column. |
| 196 | + DataFrame with the new columns added. |
174 | 197 | """ |
175 | 198 |
|
176 | 199 | def _simplify_expression( |
@@ -339,6 +362,29 @@ def join( |
339 | 362 | """ |
340 | 363 | return DataFrame(self.df.join(right.df, join_keys, how)) |
341 | 364 |
|
| 365 | + def join_on( |
| 366 | + self, |
| 367 | + right: DataFrame, |
| 368 | + *on_exprs: Expr, |
| 369 | + how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", |
| 370 | + ) -> DataFrame: |
| 371 | + """Join two :py:class:`DataFrame`using the specified expressions. |
| 372 | +
|
| 373 | + On expressions are used to support in-equality predicates. Equality |
| 374 | + predicates are correctly optimized |
| 375 | +
|
| 376 | + Args: |
| 377 | + right: Other DataFrame to join with. |
| 378 | + on_exprs: single or multiple (in)-equality predicates. |
| 379 | + how: Type of join to perform. Supported types are "inner", "left", |
| 380 | + "right", "full", "semi", "anti". |
| 381 | +
|
| 382 | + Returns: |
| 383 | + DataFrame after join. |
| 384 | + """ |
| 385 | + exprs = [expr.expr for expr in on_exprs] |
| 386 | + return DataFrame(self.df.join_on(right.df, exprs, how)) |
| 387 | + |
342 | 388 | def explain(self, verbose: bool = False, analyze: bool = False) -> DataFrame: |
343 | 389 | """Return a DataFrame with the explanation of its plan so far. |
344 | 390 |
|
|
0 commit comments