Skip to content

Commit d9c90d2

Browse files
authored
feat: Add SQL expression for repartition_by_hash (#1285)
* add sql expression for repartition * fix formatting
1 parent a93d614 commit d9c90d2

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

python/datafusion/dataframe.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -917,17 +917,20 @@ def repartition(self, num: int) -> DataFrame:
917917
"""
918918
return DataFrame(self.df.repartition(num))
919919

920-
def repartition_by_hash(self, *exprs: Expr, num: int) -> DataFrame:
920+
def repartition_by_hash(self, *exprs: Expr | str, num: int) -> DataFrame:
921921
"""Repartition a DataFrame using a hash partitioning scheme.
922922
923923
Args:
924-
exprs: Expressions to evaluate and perform hashing on.
924+
exprs: Expressions or a SQL expression string to evaluate
925+
and perform hashing on.
925926
num: Number of partitions to repartition the DataFrame into.
926927
927928
Returns:
928929
Repartitioned DataFrame.
929930
"""
930-
exprs = [expr.expr for expr in exprs]
931+
exprs = [self.parse_sql_expr(e) if isinstance(e, str) else e for e in exprs]
932+
exprs = expr_list_to_raw_expr_list(exprs)
933+
931934
return DataFrame(self.df.repartition_by_hash(*exprs, num=num))
932935

933936
def union(self, other: DataFrame, distinct: bool = False) -> DataFrame:

python/tests/test_dataframe.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,6 +1622,14 @@ def test_repartition_by_hash(df):
16221622
df.repartition_by_hash(column("a"), num=2)
16231623

16241624

1625+
def test_repartition_by_hash_sql_expression(df):
1626+
df.repartition_by_hash("a", num=2)
1627+
1628+
1629+
def test_repartition_by_hash_mix(df):
1630+
df.repartition_by_hash(column("a"), "b", num=2)
1631+
1632+
16251633
def test_intersect():
16261634
ctx = SessionContext()
16271635

0 commit comments

Comments
 (0)