Skip to content

Commit 6738ff0

Browse files
committed
[SPARK-51853] Improve show to support all signatures
1 parent 2eeefda commit 6738ff0

File tree

5 files changed

+145
-27
lines changed

5 files changed

+145
-27
lines changed

Sources/SparkConnect/DataFrame.swift

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -226,35 +226,42 @@ public actor DataFrame: Sendable {
226226
return result
227227
}
228228

229-
/// Execute the plan and show the result.
229+
/// Displays the top 20 rows of ``DataFrame`` in a tabular form.
230230
public func show() async throws {
231-
try await execute()
231+
try await show(20)
232+
}
232233

233-
if let schema = self._schema {
234-
var columns: [TextTableColumn] = []
235-
for f in schema.struct.fields {
236-
columns.append(TextTableColumn(header: f.name))
237-
}
238-
var table = TextTable(columns: columns)
239-
for batch in self.batches {
240-
for i in 0..<batch.length {
241-
var values: [String] = []
242-
for column in batch.columns {
243-
let str = column.array as! AsString
244-
if column.data.isNull(i) {
245-
values.append("NULL")
246-
} else if column.data.type.info == ArrowType.ArrowBinary {
247-
let binary = str.asString(i).utf8.map { String(format: "%02x", $0) }.joined(separator: " ")
248-
values.append("[\(binary)]")
249-
} else {
250-
values.append(str.asString(i))
251-
}
252-
}
253-
table.addRow(values: values)
254-
}
255-
}
256-
print(table.render())
257-
}
234+
/// Displays the top 20 rows of ``DataFrame`` in a tabular form.
235+
/// - Parameter truncate: Whether truncate long strings. If true, strings more than 20 characters will be truncated
236+
/// and all cells will be aligned right
237+
public func show(_ truncate: Bool) async throws {
238+
try await show(20, truncate)
239+
}
240+
241+
/// Displays the ``DataFrame`` in a tabular form.
242+
/// - Parameters:
243+
/// - numRows: Number of rows to show
244+
/// - truncate: Whether truncate long strings. If true, strings more than 20 characters will be truncated
245+
/// and all cells will be aligned right
246+
public func show(_ numRows: Int32 = 20, _ truncate: Bool = true) async throws {
247+
try await show(numRows, truncate ? 20 : 0)
248+
}
249+
250+
/// Displays the ``DataFrame`` in a tabular form.
251+
/// - Parameters:
252+
/// - numRows: Number of rows to show
253+
/// - truncate: If set to more than 0, truncates strings to `truncate` characters and all cells will be aligned right.
254+
/// - vertical: If set to true, prints output rows vertically (one line per column value).
255+
public func show(_ numRows: Int32, _ truncate: Int32, _ vertical: Bool = false) async throws {
256+
let rows = try await showString(numRows, truncate, vertical).collect()
257+
assert(rows.count == 1)
258+
assert(rows[0].length == 1)
259+
print(try rows[0].get(0) as! String)
260+
}
261+
262+
func showString(_ numRows: Int32, _ truncate: Int32, _ vertical: Bool) -> DataFrame {
263+
let plan = SparkConnectClient.getShowString(self.plan.root, numRows, truncate, vertical)
264+
return DataFrame(spark: self.spark, plan: plan)
258265
}
259266

260267
/// Projects a set of expressions and returns a new ``DataFrame``.

Sources/SparkConnect/SparkConnectClient.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,21 @@ public actor SparkConnectClient {
317317
})
318318
}
319319

320+
static func getShowString(_ child: Relation, _ numRows: Int32, _ truncate: Int32 = 0, _ vertical: Bool = false)
321+
-> Plan
322+
{
323+
var showString = ShowString()
324+
showString.input = child
325+
showString.numRows = numRows
326+
showString.truncate = truncate
327+
showString.vertical = vertical
328+
var relation = Relation()
329+
relation.showString = showString
330+
var plan = Plan()
331+
plan.opType = .root(relation)
332+
return plan
333+
}
334+
320335
func getTreeString(_ sessionID: String, _ plan: Plan, _ level: Int32) async -> AnalyzePlanRequest
321336
{
322337
return analyze(

Sources/SparkConnect/TypeAliases.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ typealias Sample = Spark_Connect_Sample
4444
typealias SaveMode = Spark_Connect_WriteOperation.SaveMode
4545
typealias SetOperation = Spark_Connect_SetOperation
4646
typealias SetOpType = SetOperation.SetOpType
47+
typealias ShowString = Spark_Connect_ShowString
4748
typealias SparkConnectService = Spark_Connect_SparkConnectService
4849
typealias Sort = Spark_Connect_Sort
4950
typealias StructType = Spark_Connect_DataType.Struct
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
//
19+
20+
import Testing
21+
22+
@testable import SparkConnect
23+
24+
/// A test suite for `DataFrame` internal APIs
25+
struct DataFrameInternalTests {
26+
@Test
27+
func showString() async throws {
28+
let spark = try await SparkSession.builder.getOrCreate()
29+
let rows = try await spark.range(10).showString(2, 0, false).collect()
30+
#expect(rows.count == 1)
31+
#expect(rows[0].length == 1)
32+
#expect(
33+
try rows[0].get(0) as! String == """
34+
+---+
35+
|id |
36+
+---+
37+
|0 |
38+
|1 |
39+
+---+
40+
only showing top 2 rows
41+
""")
42+
await spark.stop()
43+
}
44+
45+
@Test
46+
func showStringTruncate() async throws {
47+
let spark = try await SparkSession.builder.getOrCreate()
48+
let rows = try await spark.sql("SELECT * FROM VALUES ('abc', 'def'), ('ghi', 'jkl')")
49+
.showString(2, 2, false).collect()
50+
#expect(rows.count == 1)
51+
#expect(rows[0].length == 1)
52+
print(try rows[0].get(0) as! String)
53+
#expect(
54+
try rows[0].get(0) as! String == """
55+
+----+----+
56+
|col1|col2|
57+
+----+----+
58+
| ab| de|
59+
| gh| jk|
60+
+----+----+
61+
62+
""")
63+
await spark.stop()
64+
}
65+
66+
@Test
67+
func showStringVertical() async throws {
68+
let spark = try await SparkSession.builder.getOrCreate()
69+
let rows = try await spark.range(10).showString(2, 0, true).collect()
70+
#expect(rows.count == 1)
71+
#expect(rows[0].length == 1)
72+
print(try rows[0].get(0) as! String)
73+
#expect(
74+
try rows[0].get(0) as! String == """
75+
-RECORD 0--
76+
id | 0
77+
-RECORD 1--
78+
id | 1
79+
only showing top 2 rows
80+
""")
81+
await spark.stop()
82+
}
83+
}

Tests/SparkConnectTests/DataFrameTests.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,18 @@ struct DataFrameTests {
348348
try await spark.sql("SELECT * FROM VALUES (true, false)").show()
349349
try await spark.sql("SELECT * FROM VALUES (1, 2)").show()
350350
try await spark.sql("SELECT * FROM VALUES ('abc', 'def'), ('ghi', 'jkl')").show()
351+
352+
// Check all signatures
353+
try await spark.range(1000).show()
354+
try await spark.range(1000).show(1)
355+
try await spark.range(1000).show(true)
356+
try await spark.range(1000).show(false)
357+
try await spark.range(1000).show(1, true)
358+
try await spark.range(1000).show(1, false)
359+
try await spark.range(1000).show(1, 20)
360+
try await spark.range(1000).show(1, 20, true)
361+
try await spark.range(1000).show(1, 20, false)
362+
351363
await spark.stop()
352364
}
353365

0 commit comments

Comments
 (0)