Skip to content

Commit ddaf9ea

Browse files
committed
[SPARK-51784] Support xml in DataFrame(Reader/Writer)
1 parent 4185eb9 commit ddaf9ea

File tree

4 files changed

+43
-0
lines changed

4 files changed

+43
-0
lines changed

Sources/SparkConnect/DataFrameReader.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,22 @@ public actor DataFrameReader: Sendable {
160160
return load(paths)
161161
}
162162

163+
/// Loads an XML file and returns the result as a `DataFrame`.
164+
/// - Parameter path: A path string
165+
/// - Returns: A `DataFrame`.
166+
public func xml(_ path: String) -> DataFrame {
167+
self.source = "xml"
168+
return load(path)
169+
}
170+
171+
/// Loads XML files and returns the result as a `DataFrame`.
172+
/// - Parameter paths: Path strings
173+
/// - Returns: A `DataFrame`.
174+
public func xml(_ paths: String...) -> DataFrame {
175+
self.source = "xml"
176+
return load(paths)
177+
}
178+
163179
/// Loads an ORC file and returns the result as a `DataFrame`.
164180
/// - Parameter path: A path string
165181
/// - Returns: A `DataFrame`.

Sources/SparkConnect/DataFrameWriter.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,14 @@ public actor DataFrameWriter: Sendable {
171171
return try await save(path)
172172
}
173173

174+
/// Saves the content of the `DataFrame` in XML format at the specified path.
175+
/// - Parameter path: A path string
176+
/// - Returns: A `DataFrame`.
177+
public func xml(_ path: String) async throws {
178+
self.source = "xml"
179+
return try await save(path)
180+
}
181+
174182
/// Saves the content of the `DataFrame` in ORC format at the specified path.
175183
/// - Parameter path: A path string
176184
/// - Returns: A `DataFrame`.

Tests/SparkConnectTests/DataFrameReaderTests.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@ struct DataFrameReaderTests {
4545
await spark.stop()
4646
}
4747

48+
@Test
49+
func xml() async throws {
50+
let spark = try await SparkSession.builder.getOrCreate()
51+
let path = "../examples/src/main/resources/people.xml"
52+
#expect(try await spark.read.option("rowTag", "person").format("xml").load(path).count() == 3)
53+
#expect(try await spark.read.option("rowTag", "person").xml(path).count() == 3)
54+
#expect(try await spark.read.option("rowTag", "person").xml(path, path).count() == 6)
55+
await spark.stop()
56+
}
57+
4858
@Test
4959
func orc() async throws {
5060
let spark = try await SparkSession.builder.getOrCreate()

Tests/SparkConnectTests/DataFrameWriterTests.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ struct DataFrameWriterTests {
4343
await spark.stop()
4444
}
4545

46+
@Test
47+
func xml() async throws {
48+
let tmpDir = "/tmp/" + UUID().uuidString
49+
let spark = try await SparkSession.builder.getOrCreate()
50+
try await spark.range(2025).write.option("rowTag", "person").xml(tmpDir)
51+
#expect(try await spark.read.option("rowTag", "person").xml(tmpDir).count() == 2025)
52+
await spark.stop()
53+
}
54+
4655
@Test
4756
func orc() async throws {
4857
let tmpDir = "/tmp/" + UUID().uuidString

0 commit comments

Comments
 (0)