Skip to content

Commit 6c2d5eb

Browse files
committed
[SPARK-51853] Improve DataFrame.show API to support all signatures
### What changes were proposed in this pull request? This PR improve `show` API to support all nine signatures. ### Why are the changes needed? Previously, we only support `show()` with no argument (1 function signature). After this PR, the following nine signatures will be supported. ```swift spark.range(1000).show() spark.range(1000).show(1) spark.range(1000).show(true) spark.range(1000).show(false) spark.range(1000).show(1, true) spark.range(1000).show(1, false) spark.range(1000).show(1, 20) spark.range(1000).show(1, 20, true) spark.range(1000).show(1, 20, false) ``` ### Does this PR introduce _any_ user-facing change? No. This is a new addition to `show` API. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #75 from dongjoon-hyun/SPARK-51853. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 2eeefda commit 6c2d5eb

File tree

5 files changed

+148
-27
lines changed

5 files changed

+148
-27
lines changed

Sources/SparkConnect/DataFrame.swift

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -226,35 +226,42 @@ public actor DataFrame: Sendable {
226226
return result
227227
}
228228

229-
/// Execute the plan and show the result.
229+
/// Displays the top 20 rows of ``DataFrame`` in a tabular form.
230230
public func show() async throws {
231-
try await execute()
231+
try await show(20)
232+
}
232233

233-
if let schema = self._schema {
234-
var columns: [TextTableColumn] = []
235-
for f in schema.struct.fields {
236-
columns.append(TextTableColumn(header: f.name))
237-
}
238-
var table = TextTable(columns: columns)
239-
for batch in self.batches {
240-
for i in 0..<batch.length {
241-
var values: [String] = []
242-
for column in batch.columns {
243-
let str = column.array as! AsString
244-
if column.data.isNull(i) {
245-
values.append("NULL")
246-
} else if column.data.type.info == ArrowType.ArrowBinary {
247-
let binary = str.asString(i).utf8.map { String(format: "%02x", $0) }.joined(separator: " ")
248-
values.append("[\(binary)]")
249-
} else {
250-
values.append(str.asString(i))
251-
}
252-
}
253-
table.addRow(values: values)
254-
}
255-
}
256-
print(table.render())
257-
}
234+
/// Displays the top 20 rows of ``DataFrame`` in a tabular form.
235+
/// - Parameter truncate: Whether truncate long strings. If true, strings more than 20 characters will be truncated
236+
/// and all cells will be aligned right
237+
public func show(_ truncate: Bool) async throws {
238+
try await show(20, truncate)
239+
}
240+
241+
/// Displays the ``DataFrame`` in a tabular form.
242+
/// - Parameters:
243+
/// - numRows: Number of rows to show
244+
/// - truncate: Whether truncate long strings. If true, strings more than 20 characters will be truncated
245+
/// and all cells will be aligned right
246+
public func show(_ numRows: Int32 = 20, _ truncate: Bool = true) async throws {
247+
try await show(numRows, truncate ? 20 : 0)
248+
}
249+
250+
/// Displays the ``DataFrame`` in a tabular form.
251+
/// - Parameters:
252+
/// - numRows: Number of rows to show
253+
/// - truncate: If set to more than 0, truncates strings to `truncate` characters and all cells will be aligned right.
254+
/// - vertical: If set to true, prints output rows vertically (one line per column value).
255+
public func show(_ numRows: Int32, _ truncate: Int32, _ vertical: Bool = false) async throws {
256+
let rows = try await showString(numRows, truncate, vertical).collect()
257+
assert(rows.count == 1)
258+
assert(rows[0].length == 1)
259+
print(try rows[0].get(0) as! String)
260+
}
261+
262+
func showString(_ numRows: Int32, _ truncate: Int32, _ vertical: Bool) -> DataFrame {
263+
let plan = SparkConnectClient.getShowString(self.plan.root, numRows, truncate, vertical)
264+
return DataFrame(spark: self.spark, plan: plan)
258265
}
259266

260267
/// Projects a set of expressions and returns a new ``DataFrame``.

Sources/SparkConnect/SparkConnectClient.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,21 @@ public actor SparkConnectClient {
317317
})
318318
}
319319

320+
static func getShowString(
321+
_ child: Relation, _ numRows: Int32, _ truncate: Int32 = 0, _ vertical: Bool = false
322+
) -> Plan {
323+
var showString = ShowString()
324+
showString.input = child
325+
showString.numRows = numRows
326+
showString.truncate = truncate
327+
showString.vertical = vertical
328+
var relation = Relation()
329+
relation.showString = showString
330+
var plan = Plan()
331+
plan.opType = .root(relation)
332+
return plan
333+
}
334+
320335
func getTreeString(_ sessionID: String, _ plan: Plan, _ level: Int32) async -> AnalyzePlanRequest
321336
{
322337
return analyze(

Sources/SparkConnect/TypeAliases.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ typealias Sample = Spark_Connect_Sample
4444
typealias SaveMode = Spark_Connect_WriteOperation.SaveMode
4545
typealias SetOperation = Spark_Connect_SetOperation
4646
typealias SetOpType = SetOperation.SetOpType
47+
typealias ShowString = Spark_Connect_ShowString
4748
typealias SparkConnectService = Spark_Connect_SparkConnectService
4849
typealias Sort = Spark_Connect_Sort
4950
typealias StructType = Spark_Connect_DataType.Struct
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
//
19+
20+
import Testing
21+
22+
@testable import SparkConnect
23+
24+
/// A test suite for `DataFrame` internal APIs
25+
struct DataFrameInternalTests {
26+
27+
#if !os(Linux)
28+
@Test
29+
func showString() async throws {
30+
let spark = try await SparkSession.builder.getOrCreate()
31+
let rows = try await spark.range(10).showString(2, 0, false).collect()
32+
#expect(rows.count == 1)
33+
#expect(rows[0].length == 1)
34+
#expect(
35+
try rows[0].get(0) as! String == """
36+
+---+
37+
|id |
38+
+---+
39+
|0 |
40+
|1 |
41+
+---+
42+
only showing top 2 rows
43+
""")
44+
await spark.stop()
45+
}
46+
47+
@Test
48+
func showStringTruncate() async throws {
49+
let spark = try await SparkSession.builder.getOrCreate()
50+
let rows = try await spark.sql("SELECT * FROM VALUES ('abc', 'def'), ('ghi', 'jkl')")
51+
.showString(2, 2, false).collect()
52+
#expect(rows.count == 1)
53+
#expect(rows[0].length == 1)
54+
print(try rows[0].get(0) as! String)
55+
#expect(
56+
try rows[0].get(0) as! String == """
57+
+----+----+
58+
|col1|col2|
59+
+----+----+
60+
| ab| de|
61+
| gh| jk|
62+
+----+----+
63+
64+
""")
65+
await spark.stop()
66+
}
67+
68+
@Test
69+
func showStringVertical() async throws {
70+
let spark = try await SparkSession.builder.getOrCreate()
71+
let rows = try await spark.range(10).showString(2, 0, true).collect()
72+
#expect(rows.count == 1)
73+
#expect(rows[0].length == 1)
74+
print(try rows[0].get(0) as! String)
75+
#expect(
76+
try rows[0].get(0) as! String == """
77+
-RECORD 0--
78+
id | 0
79+
-RECORD 1--
80+
id | 1
81+
only showing top 2 rows
82+
""")
83+
await spark.stop()
84+
}
85+
#endif
86+
}

Tests/SparkConnectTests/DataFrameTests.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,18 @@ struct DataFrameTests {
348348
try await spark.sql("SELECT * FROM VALUES (true, false)").show()
349349
try await spark.sql("SELECT * FROM VALUES (1, 2)").show()
350350
try await spark.sql("SELECT * FROM VALUES ('abc', 'def'), ('ghi', 'jkl')").show()
351+
352+
// Check all signatures
353+
try await spark.range(1000).show()
354+
try await spark.range(1000).show(1)
355+
try await spark.range(1000).show(true)
356+
try await spark.range(1000).show(false)
357+
try await spark.range(1000).show(1, true)
358+
try await spark.range(1000).show(1, false)
359+
try await spark.range(1000).show(1, 20)
360+
try await spark.range(1000).show(1, 20, true)
361+
try await spark.range(1000).show(1, 20, false)
362+
351363
await spark.stop()
352364
}
353365

0 commit comments

Comments
 (0)