Skip to content

Commit dfe880e

Browse files
committed
[SPARK-52149] Add String.toExpression extension
### What changes were proposed in this pull request? This PR aims to introduce `String.toExpression` extension to simplify the code. ### Why are the changes needed? To simplify the code by reducing the repetition. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #148 from dongjoon-hyun/SPARK-52149. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 9622e8e commit dfe880e

File tree

4 files changed

+16
-40
lines changed

4 files changed

+16
-40
lines changed

Sources/SparkConnect/DataFrameWriterV2.swift

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,7 @@ public actor DataFrameWriterV2: Sendable {
7272
/// - Parameter columns: Columns to partition
7373
/// - Returns: A ``DataFrameWriterV2``.
7474
public func partitionBy(_ columns: String...) -> DataFrameWriterV2 {
75-
self.partitioningColumns = columns.map {
76-
var expr = Spark_Connect_Expression()
77-
expr.expressionString = $0.toExpressionString
78-
return expr
79-
}
75+
self.partitioningColumns = columns.map { $0.toExpression }
8076
return self
8177
}
8278

Sources/SparkConnect/Extension.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ extension String {
126126
return expression
127127
}
128128

129+
var toExpression: Spark_Connect_Expression {
130+
var expressionString = ExpressionString()
131+
expressionString.expression = self
132+
var expression = Spark_Connect_Expression()
133+
expression.expressionString = expressionString
134+
return expression
135+
}
136+
129137
var toExplainMode: ExplainMode {
130138
let mode = switch self {
131139
case "codegen": ExplainMode.codegen

Sources/SparkConnect/GroupedData.swift

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,8 @@ public actor GroupedData {
3232
var aggregate = Aggregate()
3333
aggregate.input = await (self.df.getPlan() as! Plan).root
3434
aggregate.groupType = self.groupType
35-
aggregate.groupingExpressions = self.groupingCols.map {
36-
var expr = Spark_Connect_Expression()
37-
expr.expressionString = $0.toExpressionString
38-
return expr
39-
}
40-
aggregate.aggregateExpressions = exprs.map {
41-
var expr = Spark_Connect_Expression()
42-
expr.expressionString = $0.toExpressionString
43-
return expr
44-
}
35+
aggregate.groupingExpressions = self.groupingCols.map { $0.toExpression }
36+
aggregate.aggregateExpressions = exprs.map { $0.toExpression }
4537
var relation = Relation()
4638
relation.aggregate = aggregate
4739
var plan = Plan()

Sources/SparkConnect/SparkConnectClient.swift

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -508,11 +508,7 @@ public actor SparkConnectClient {
508508
static func getProjectExprs(_ child: Relation, _ exprs: [String]) -> Plan {
509509
var project = Project()
510510
project.input = child
511-
let expressions: [Spark_Connect_Expression] = exprs.map {
512-
var expression = Spark_Connect_Expression()
513-
expression.exprType = .expressionString($0.toExpressionString)
514-
return expression
515-
}
511+
let expressions: [Spark_Connect_Expression] = exprs.map { $0.toExpression }
516512
project.expressions = expressions
517513
var relation = Relation()
518514
relation.project = project
@@ -908,11 +904,7 @@ public actor SparkConnectClient {
908904
) -> Plan {
909905
var repartitionByExpression = RepartitionByExpression()
910906
repartitionByExpression.input = child
911-
repartitionByExpression.partitionExprs = partitionExprs.map {
912-
var expr = Spark_Connect_Expression()
913-
expr.expressionString = $0.toExpressionString
914-
return expr
915-
}
907+
repartitionByExpression.partitionExprs = partitionExprs.map { $0.toExpression }
916908
if let numPartitions {
917909
repartitionByExpression.numPartitions = numPartitions
918910
}
@@ -932,18 +924,10 @@ public actor SparkConnectClient {
932924
) -> Plan {
933925
var unpivot = Spark_Connect_Unpivot()
934926
unpivot.input = child
935-
unpivot.ids = ids.map {
936-
var expr = Spark_Connect_Expression()
937-
expr.expressionString = $0.toExpressionString
938-
return expr
939-
}
927+
unpivot.ids = ids.map { $0.toExpression }
940928
if let values {
941929
var unpivotValues = Spark_Connect_Unpivot.Values()
942-
unpivotValues.values = values.map {
943-
var expr = Spark_Connect_Expression()
944-
expr.expressionString = $0.toExpressionString
945-
return expr
946-
}
930+
unpivotValues.values = values.map { $0.toExpression }
947931
unpivot.values = unpivotValues
948932
}
949933
unpivot.variableColumnName = variableColumnName
@@ -958,11 +942,7 @@ public actor SparkConnectClient {
958942
static func getTranspose(_ child: Relation, _ indexColumn: [String]) -> Plan {
959943
var transpose = Spark_Connect_Transpose()
960944
transpose.input = child
961-
transpose.indexColumns = indexColumn.map {
962-
var expr = Spark_Connect_Expression()
963-
expr.expressionString = $0.toExpressionString
964-
return expr
965-
}
945+
transpose.indexColumns = indexColumn.map { $0.toExpression }
966946
var relation = Relation()
967947
relation.transpose = transpose
968948
var plan = Plan()

0 commit comments

Comments
 (0)