2020"""
2121
2222from __future__ import annotations
23-
24- from typing import Any , List , TYPE_CHECKING
23+ import warnings
24+ from typing import Any , List , TYPE_CHECKING , Literal , overload
2525from datafusion .record_batch import RecordBatchStream
2626from typing_extensions import deprecated
2727from datafusion .plan import LogicalPlan , ExecutionPlan
3131 import pandas as pd
3232 import polars as pl
3333 import pathlib
34- from typing import Callable
34+ from typing import Callable , Sequence
3535
3636from datafusion ._internal import DataFrame as DataFrameInternal
3737from datafusion .expr import Expr , SortExpr , sort_or_default
@@ -271,11 +271,51 @@ def distinct(self) -> DataFrame:
271271 """
272272 return DataFrame (self .df .distinct ())
273273
274+ @overload
275+ def join (
276+ self ,
277+ right : DataFrame ,
278+ on : str | Sequence [str ],
279+ how : Literal ["inner" , "left" , "right" , "full" , "semi" , "anti" ] = "inner" ,
280+ * ,
281+ left_on : None = None ,
282+ right_on : None = None ,
283+ join_keys : None = None ,
284+ ) -> DataFrame : ...
285+
286+ @overload
274287 def join (
275288 self ,
276289 right : DataFrame ,
290+ on : None = None ,
291+ how : Literal ["inner" , "left" , "right" , "full" , "semi" , "anti" ] = "inner" ,
292+ * ,
293+ left_on : str | Sequence [str ],
294+ right_on : str | Sequence [str ],
295+ join_keys : tuple [list [str ], list [str ]] | None = None ,
296+ ) -> DataFrame : ...
297+
298+ @overload
299+ def join (
300+ self ,
301+ right : DataFrame ,
302+ on : None = None ,
303+ how : Literal ["inner" , "left" , "right" , "full" , "semi" , "anti" ] = "inner" ,
304+ * ,
277305 join_keys : tuple [list [str ], list [str ]],
278- how : str ,
306+ left_on : None = None ,
307+ right_on : None = None ,
308+ ) -> DataFrame : ...
309+
310+ def join (
311+ self ,
312+ right : DataFrame ,
313+ on : str | Sequence [str ] | None = None ,
314+ how : Literal ["inner" , "left" , "right" , "full" , "semi" , "anti" ] = "inner" ,
315+ * ,
316+ left_on : str | Sequence [str ] | None = None ,
317+ right_on : str | Sequence [str ] | None = None ,
318+ join_keys : tuple [list [str ], list [str ]] | None = None ,
279319 ) -> DataFrame :
280320 """Join this :py:class:`DataFrame` with another :py:class:`DataFrame`.
281321
@@ -284,14 +324,41 @@ def join(
284324
285325 Args:
286326 right: Other DataFrame to join with.
287- join_keys: Tuple of two lists of column names to join on.
327+ on: Column names to join on in both dataframes .
288328 how: Type of join to perform. Supported types are "inner", "left",
289329 "right", "full", "semi", "anti".
330+ left_on: Join column of the left dataframe.
331+ right_on: Join column of the right dataframe.
332+ join_keys: Tuple of two lists of column names to join on. [Deprecated]
290333
291334 Returns:
292335 DataFrame after join.
293336 """
294- return DataFrame (self .df .join (right .df , join_keys , how ))
337+ if join_keys is not None :
338+ warnings .warn (
339+ "`join_keys` is deprecated, use `on` or `left_on` with `right_on`" ,
340+ category = DeprecationWarning ,
341+ stacklevel = 2 ,
342+ )
343+ left_on = join_keys [0 ]
344+ right_on = join_keys [1 ]
345+
346+ if on :
347+ if left_on or right_on :
348+ raise ValueError (
349+ "`left_on` or `right_on` should not provided with `on`"
350+ )
351+ left_on = on
352+ right_on = on
353+ elif left_on or right_on :
354+ if left_on is None or right_on is None :
355+ raise ValueError ("`left_on` and `right_on` should both be provided." )
356+ else :
357+ raise ValueError (
358+ "either `on` or `left_on` and `right_on` should be provided."
359+ )
360+
361+ return DataFrame (self .df .join (right .df , how , left_on , right_on ))
295362
296363 def explain (self , verbose : bool = False , analyze : bool = False ) -> DataFrame :
297364 """Return a DataFrame with the explanation of its plan so far.
0 commit comments