Skip to content

Commit 7639540

Browse files
committed
[SPARK-51642] Support explain for DataFrame
### What changes were proposed in this pull request? This PR aims to support `explain` for `DataFrame`. ### Why are the changes needed? For the feature parity. ### Does this PR introduce _any_ user-facing change? No, this is a new API addition to the unreleased versions. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #32 from dongjoon-hyun/SPARK-51642. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent cb1e416 commit 7639540

File tree

5 files changed

+59
-0
lines changed

5 files changed

+59
-0
lines changed

Sources/SparkConnect/DataFrame.swift

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,4 +330,29 @@ public actor DataFrame: Sendable {
330330

331331
return self
332332
}
333+
334+
public func explain() async throws {
335+
try await explain("simple")
336+
}
337+
338+
public func explain(_ extended: Bool) async throws {
339+
if (extended) {
340+
try await explain("extended")
341+
} else {
342+
try await explain("simple")
343+
}
344+
}
345+
346+
public func explain(_ mode: String) async throws {
347+
try await withGRPCClient(
348+
transport: .http2NIOPosix(
349+
target: .dns(host: spark.client.host, port: spark.client.port),
350+
transportSecurity: .plaintext
351+
)
352+
) { client in
353+
let service = Spark_Connect_SparkConnectService.Client(wrapping: client)
354+
let response = try await service.analyzePlan(spark.client.getExplain(spark.sessionID, plan, mode))
355+
print(response.explain.explainString)
356+
}
357+
}
333358
}

Sources/SparkConnect/Extension.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@ extension String {
5757
expression.expression = self
5858
return expression
5959
}
60+
61+
var toExplainMode: ExplainMode {
62+
let mode = switch self {
63+
case "codegen": ExplainMode.codegen
64+
case "cost": ExplainMode.cost
65+
case "extended": ExplainMode.extended
66+
case "formatted": ExplainMode.formatted
67+
case "simple": ExplainMode.simple
68+
default: ExplainMode.simple
69+
}
70+
return mode
71+
}
6072
}
6173

6274
extension [String: String] {

Sources/SparkConnect/SparkConnectClient.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,18 @@ public actor SparkConnectClient {
282282
})
283283
}
284284

285+
func getExplain(_ sessionID: String, _ plan: Plan, _ mode: String) async -> AnalyzePlanRequest
286+
{
287+
return analyze(
288+
sessionID,
289+
{
290+
var explain = AnalyzePlanRequest.Explain()
291+
explain.plan = plan
292+
explain.explainMode = mode.toExplainMode
293+
return OneOf_Analyze.explain(explain)
294+
})
295+
}
296+
285297
static func getProject(_ child: Relation, _ cols: [String]) -> Plan {
286298
var project = Project()
287299
project.input = child

Sources/SparkConnect/TypeAliases.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ typealias ConfigRequest = Spark_Connect_ConfigRequest
2222
typealias DataSource = Spark_Connect_Read.DataSource
2323
typealias DataType = Spark_Connect_DataType
2424
typealias ExecutePlanRequest = Spark_Connect_ExecutePlanRequest
25+
typealias ExplainMode = AnalyzePlanRequest.Explain.ExplainMode
2526
typealias ExpressionString = Spark_Connect_Expression.ExpressionString
2627
typealias Filter = Spark_Connect_Filter
2728
typealias KeyValue = Spark_Connect_KeyValue

Tests/SparkConnectTests/DataFrameTests.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ struct DataFrameTests {
7070
await spark.stop()
7171
}
7272

73+
@Test
74+
func explain() async throws {
75+
let spark = try await SparkSession.builder.getOrCreate()
76+
try await spark.range(1).explain()
77+
try await spark.range(1).explain(true)
78+
try await spark.range(1).explain("formatted")
79+
await spark.stop()
80+
}
81+
7382
@Test
7483
func count() async throws {
7584
let spark = try await SparkSession.builder.getOrCreate()

0 commit comments

Comments
 (0)