Skip to content

Commit 0b087ee

Browse files
committed
[SPARK-51676] Support printSchema for DataFrame
### What changes were proposed in this pull request? This PR aims to support `printSchema` for `DataFrame`. ### Why are the changes needed? For feature parity. ### Does this PR introduce _any_ user-facing change? No, this is a new addition to the unreleased version. ### How was this patch tested? Pass the CIs and do the manual check from the log. Or, manually. ``` $ swift test --filter DataFrameTests.printSchema ... 􀟈 Test printSchema() started. root |-- struct(1, 2): struct (nullable = false) | |-- col1: integer (nullable = false) | |-- col2: integer (nullable = false) root |-- struct(1, 2): struct (nullable = false) 􁁛 Test printSchema() passed after 0.044 seconds. 􁁛 Suite DataFrameTests passed after 0.044 seconds. 􁁛 Test run with 1 test passed after 0.044 seconds. ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #35 from dongjoon-hyun/SPARK-51676. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 0436211 commit 0b087ee

File tree

3 files changed

+40
-0
lines changed

3 files changed

+40
-0
lines changed

Sources/SparkConnect/DataFrame.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,4 +355,24 @@ public actor DataFrame: Sendable {
355355
print(response.explain.explainString)
356356
}
357357
}
358+
359+
/// Prints the schema to the console in a nice tree format.
360+
public func printSchema() async throws {
361+
try await printSchema(Int32.max)
362+
}
363+
364+
/// Prints the schema up to the given level to the console in a nice tree format.
365+
/// - Parameter level: A level to be printed.
366+
public func printSchema(_ level: Int32) async throws {
367+
try await withGRPCClient(
368+
transport: .http2NIOPosix(
369+
target: .dns(host: spark.client.host, port: spark.client.port),
370+
transportSecurity: .plaintext
371+
)
372+
) { client in
373+
let service = Spark_Connect_SparkConnectService.Client(wrapping: client)
374+
let response = try await service.analyzePlan(spark.client.getTreeString(spark.sessionID, plan, level))
375+
print(response.treeString.treeString)
376+
}
377+
}
358378
}

Sources/SparkConnect/SparkConnectClient.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,18 @@ public actor SparkConnectClient {
294294
})
295295
}
296296

297+
func getTreeString(_ sessionID: String, _ plan: Plan, _ level: Int32) async -> AnalyzePlanRequest
298+
{
299+
return analyze(
300+
sessionID,
301+
{
302+
var treeString = AnalyzePlanRequest.TreeString()
303+
treeString.plan = plan
304+
treeString.level = level
305+
return OneOf_Analyze.treeString(treeString)
306+
})
307+
}
308+
297309
static func getProject(_ child: Relation, _ cols: [String]) -> Plan {
298310
var project = Project()
299311
project.input = child

Tests/SparkConnectTests/DataFrameTests.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,14 @@ struct DataFrameTests {
7070
await spark.stop()
7171
}
7272

73+
@Test
74+
func printSchema() async throws {
75+
let spark = try await SparkSession.builder.getOrCreate()
76+
try await spark.sql("SELECT struct(1, 2)").printSchema()
77+
try await spark.sql("SELECT struct(1, 2)").printSchema(1)
78+
await spark.stop()
79+
}
80+
7381
@Test
7482
func explain() async throws {
7583
let spark = try await SparkSession.builder.getOrCreate()

0 commit comments

Comments
 (0)