From 6d441c327e961e4e8b85965497d7257f1ace52af Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 16 Apr 2025 15:02:37 +0900 Subject: [PATCH 1/2] [SPARK-51815] Add `Row` struct --- Sources/SparkConnect/Catalog.swift | 10 +- Sources/SparkConnect/DataFrame.swift | 20 ++-- Sources/SparkConnect/Row.swift | 92 ++++++++++++++++++ Tests/SparkConnectTests/DataFrameTests.swift | 20 ++-- .../Resources/queries/binary.sql.answer | 1 + .../Resources/queries/binary.sql.json | 1 - .../Resources/queries/cache.sql.answer | 1 + .../Resources/queries/cache.sql.json | 1 - .../Resources/queries/clear_cache.sql.answer | 1 + .../Resources/queries/clear_cache.sql.json | 1 - .../Resources/queries/date.sql.answer | 1 + .../Resources/queries/date.sql.json | 1 - .../queries/describe_database.sql.answer | 5 + .../queries/describe_database.sql.json | 1 - .../queries/describe_function.sql.answer | 3 + .../queries/describe_function.sql.json | 1 - .../queries/describe_query.sql.answer | 3 + .../Resources/queries/describe_query.sql.json | 1 - .../queries/describe_table.sql.answer | 1 + .../Resources/queries/describe_table.sql.json | 1 - .../Resources/queries/explain.sql.answer | 22 +++++ .../Resources/queries/explain.sql.json | 1 - .../Resources/queries/floating.sql.answer | 1 + .../Resources/queries/floating.sql.json | 1 - .../Resources/queries/integral.sql.answer | 1 + .../Resources/queries/integral.sql.json | 1 - .../Resources/queries/pipesyntax.sql.answer | 2 + .../Resources/queries/pipesyntax.sql.json | 1 - .../Resources/queries/select.sql.answer | 1 + .../Resources/queries/select.sql.json | 1 - .../queries/show_databases.sql.answer | 1 + .../Resources/queries/show_databases.sql.json | 1 - .../Resources/queries/show_tables.sql.answer | 1 + .../Resources/queries/show_tables.sql.json | 1 - .../Resources/queries/string.sql.answer | 1 + .../Resources/queries/string.sql.json | 1 - .../Resources/queries/struct.sql.answer | 1 + .../Resources/queries/struct.sql.json | 1 - .../Resources/queries/uncache.sql.answer | 1 + .../Resources/queries/uncache.sql.json | 1 - Tests/SparkConnectTests/RowTests.swift | 95 +++++++++++++++++++ Tests/SparkConnectTests/SQLTests.swift | 5 +- .../SparkConnectTests/SparkSessionTests.swift | 2 +- 43 files changed, 263 insertions(+), 47 deletions(-) create mode 100644 Sources/SparkConnect/Row.swift create mode 100644 Tests/SparkConnectTests/Resources/queries/binary.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/binary.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/cache.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/cache.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/date.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/date.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/describe_database.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/describe_function.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/describe_query.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/describe_table.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/explain.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/explain.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/floating.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/floating.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/integral.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/integral.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/select.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/select.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/show_databases.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/show_tables.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/string.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/string.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/struct.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/struct.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/uncache.sql.answer delete mode 100644 Tests/SparkConnectTests/Resources/queries/uncache.sql.json create mode 100644 Tests/SparkConnectTests/RowTests.swift diff --git a/Sources/SparkConnect/Catalog.swift b/Sources/SparkConnect/Catalog.swift index ed37bf7..f14bff5 100644 --- a/Sources/SparkConnect/Catalog.swift +++ b/Sources/SparkConnect/Catalog.swift @@ -100,7 +100,7 @@ public actor Catalog: Sendable { catalog.catType = .currentCatalog(Spark_Connect_CurrentCatalog()) return catalog }) - return try await df.collect()[0][0]! + return try await df.collect()[0][0] as! String } /// Sets the current default catalog in this session. @@ -130,7 +130,7 @@ public actor Catalog: Sendable { return catalog }) return try await df.collect().map { - CatalogMetadata(name: $0[0]!, description: $0[1]) + try CatalogMetadata(name: $0[0] as! String, description: $0[1] as? String) } } @@ -142,7 +142,7 @@ public actor Catalog: Sendable { catalog.catType = .currentDatabase(Spark_Connect_CurrentDatabase()) return catalog }) - return try await df.collect()[0][0]! + return try await df.collect()[0][0] as! String } /// Sets the current default database in this session. @@ -173,7 +173,7 @@ public actor Catalog: Sendable { return catalog }) return try await df.collect().map { - Database(name: $0[0]!, catalog: $0[1], description: $0[2], locationUri: $0[3]!) + try Database(name: $0[0] as! String, catalog: $0[1] as? String, description: $0[2] as? String, locationUri: $0[3] as! String) } } @@ -189,7 +189,7 @@ public actor Catalog: Sendable { return catalog }) return try await df.collect().map { - Database(name: $0[0]!, catalog: $0[1], description: $0[2], locationUri: $0[3]!) + try Database(name: $0[0] as! String, catalog: $0[1] as? String, description: $0[2] as? String, locationUri: $0[3] as! String) }.first! } diff --git a/Sources/SparkConnect/DataFrame.swift b/Sources/SparkConnect/DataFrame.swift index be5c5e7..80e5692 100644 --- a/Sources/SparkConnect/DataFrame.swift +++ b/Sources/SparkConnect/DataFrame.swift @@ -197,15 +197,15 @@ public actor DataFrame: Sendable { } } - /// Execute the plan and return the result as ``[[String?]]``. - /// - Returns: ``[[String?]]`` - public func collect() async throws -> [[String?]] { + /// Execute the plan and return the result as ``[Row]``. + /// - Returns: ``[Row]`` + public func collect() async throws -> [Row] { try await execute() - var result: [[String?]] = [] + var result: [Row] = [] for batch in self.batches { for i in 0.. [[String?]] { + /// - Returns: ``[Row]`` + public func head(_ n: Int32 = 1) async throws -> [Row] { return try await limit(n).collect() } /// Returns the last `n` rows. /// - Parameter n: The number of rows. - /// - Returns: ``[[String?]]`` - public func tail(_ n: Int32) async throws -> [[String?]] { + /// - Returns: ``[Row]`` + public func tail(_ n: Int32) async throws -> [Row] { let lastN = DataFrame(spark:spark, plan: SparkConnectClient.getTail(self.plan.root, n)) return try await lastN.collect() } diff --git a/Sources/SparkConnect/Row.swift b/Sources/SparkConnect/Row.swift new file mode 100644 index 0000000..f6c774a --- /dev/null +++ b/Sources/SparkConnect/Row.swift @@ -0,0 +1,92 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +import Foundation + +public struct Row: Sendable, Equatable { + let values : [Sendable?] + + public init(_ values: Sendable?...) { + self.values = values + } + + public init(valueArray: [Sendable?]) { + self.values = valueArray + } + + public static var empty: Row { + get { + return Row() + } + } + + public var size: Int { get { return length } } + + public var length: Int { get { return values.count } } + + subscript(index: Int) -> Sendable { + get throws { + return try get(index) + } + } + + public func get(_ i: Int) throws -> Sendable { + if (i < 0 || i >= self.length) { + throw SparkConnectError.InvalidArgumentException + } + return values[i] + } + + public static func == (lhs: Row, rhs: Row) -> Bool { + if (lhs.values.count != rhs.values.count) { + return false + } + return lhs.values.elementsEqual(rhs.values) { (x, y) in + if x == nil && y == nil { + return true + } else if let a = x as? Bool, let b = y as? Bool { + return a == b + } else if let a = x as? Int, let b = y as? Int { + return a == b + } else if let a = x as? Int8, let b = y as? Int8 { + return a == b + } else if let a = x as? Int16, let b = y as? Int16 { + return a == b + } else if let a = x as? Int32, let b = y as? Int32 { + return a == b + } else if let a = x as? Int64, let b = y as? Int64 { + return a == b + } else if let a = x as? Float, let b = y as? Float { + return a == b + } else if let a = x as? Double, let b = y as? Double { + return a == b + } else if let a = x as? String, let b = y as? String { + return a == b + } else { + return false + } + } + } + + public func toString() -> String { + return "[\(self.values.map { "\($0 ?? "null")" }.joined(separator: ","))]" + } +} + +extension Row { +} diff --git a/Tests/SparkConnectTests/DataFrameTests.swift b/Tests/SparkConnectTests/DataFrameTests.swift index b9c927d..afd182c 100644 --- a/Tests/SparkConnectTests/DataFrameTests.swift +++ b/Tests/SparkConnectTests/DataFrameTests.swift @@ -249,7 +249,7 @@ struct DataFrameTests { @Test func sort() async throws { let spark = try await SparkSession.builder.getOrCreate() - let expected = (1...10).map{ [String($0)] } + let expected = Array((1...10).map{ Row(String($0)) }) #expect(try await spark.range(10, 0, -1).sort("id").collect() == expected) await spark.stop() } @@ -257,7 +257,7 @@ struct DataFrameTests { @Test func orderBy() async throws { let spark = try await SparkSession.builder.getOrCreate() - let expected = (1...10).map{ [String($0)] } + let expected = Array((1...10).map{ Row(String($0)) }) #expect(try await spark.range(10, 0, -1).orderBy("id").collect() == expected) await spark.stop() } @@ -284,7 +284,7 @@ struct DataFrameTests { #expect( try await spark.sql( "SELECT * FROM VALUES (1, true, 'abc'), (null, null, null), (3, false, 'def')" - ).collect() == [["1", "true", "abc"], [nil, nil, nil], ["3", "false", "def"]]) + ).collect() == [Row("1", "true", "abc"), Row(nil, nil, nil), Row("3", "false", "def")]) await spark.stop() } @@ -292,10 +292,10 @@ struct DataFrameTests { func head() async throws { let spark = try await SparkSession.builder.getOrCreate() #expect(try await spark.range(0).head().isEmpty) - #expect(try await spark.range(2).sort("id").head() == [["0"]]) - #expect(try await spark.range(2).sort("id").head(1) == [["0"]]) - #expect(try await spark.range(2).sort("id").head(2) == [["0"], ["1"]]) - #expect(try await spark.range(2).sort("id").head(3) == [["0"], ["1"]]) + #expect(try await spark.range(2).sort("id").head() == [Row("0")]) + #expect(try await spark.range(2).sort("id").head(1) == [Row("0")]) + #expect(try await spark.range(2).sort("id").head(2) == [Row("0"), Row("1")]) + #expect(try await spark.range(2).sort("id").head(3) == [Row("0"), Row("1")]) await spark.stop() } @@ -303,9 +303,9 @@ struct DataFrameTests { func tail() async throws { let spark = try await SparkSession.builder.getOrCreate() #expect(try await spark.range(0).tail(1).isEmpty) - #expect(try await spark.range(2).sort("id").tail(1) == [["1"]]) - #expect(try await spark.range(2).sort("id").tail(2) == [["0"], ["1"]]) - #expect(try await spark.range(2).sort("id").tail(3) == [["0"], ["1"]]) + #expect(try await spark.range(2).sort("id").tail(1) == [Row("1")]) + #expect(try await spark.range(2).sort("id").tail(2) == [Row("0"), Row("1")]) + #expect(try await spark.range(2).sort("id").tail(3) == [Row("0"), Row("1")]) await spark.stop() } diff --git a/Tests/SparkConnectTests/Resources/queries/binary.sql.answer b/Tests/SparkConnectTests/Resources/queries/binary.sql.answer new file mode 100644 index 0000000..0d42bcd --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/binary.sql.answer @@ -0,0 +1 @@ +[[61 62 63]] diff --git a/Tests/SparkConnectTests/Resources/queries/binary.sql.json b/Tests/SparkConnectTests/Resources/queries/binary.sql.json deleted file mode 100644 index 08434e6..0000000 --- a/Tests/SparkConnectTests/Resources/queries/binary.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["[61 62 63]"]] diff --git a/Tests/SparkConnectTests/Resources/queries/cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/cache.sql.answer new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/cache.sql.answer @@ -0,0 +1 @@ + diff --git a/Tests/SparkConnectTests/Resources/queries/cache.sql.json b/Tests/SparkConnectTests/Resources/queries/cache.sql.json deleted file mode 100644 index fe51488..0000000 --- a/Tests/SparkConnectTests/Resources/queries/cache.sql.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer @@ -0,0 +1 @@ + diff --git a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json deleted file mode 100644 index fe51488..0000000 --- a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/Tests/SparkConnectTests/Resources/queries/date.sql.answer b/Tests/SparkConnectTests/Resources/queries/date.sql.answer new file mode 100644 index 0000000..41ae9ec --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/date.sql.answer @@ -0,0 +1 @@ +[2025-03-15 00:00:00 +0000] diff --git a/Tests/SparkConnectTests/Resources/queries/date.sql.json b/Tests/SparkConnectTests/Resources/queries/date.sql.json deleted file mode 100644 index 3fda858..0000000 --- a/Tests/SparkConnectTests/Resources/queries/date.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["2025-03-15 00:00:00 +0000"]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer new file mode 100644 index 0000000..a44243f --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer @@ -0,0 +1,5 @@ +[Catalog Name,spark_catalog] +[Namespace Name,default] +[Comment,default database] +[Location,*] +[Owner,*] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json deleted file mode 100644 index 614a70d..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["Catalog Name","spark_catalog"],["Namespace Name","default"],["Comment","default database"],["Location","file:\/opt\/spark\/work-dir\/spark-warehouse"],["Owner","185"]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer new file mode 100644 index 0000000..3044189 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer @@ -0,0 +1,3 @@ +[Function: abs] +[Class: org.apache.spark.sql.catalyst.expressions.Abs] +[Usage: abs(expr) - Returns the absolute value of the numeric or interval value.] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json deleted file mode 100644 index a9a8b67..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["Function: abs"],["Class: org.apache.spark.sql.catalyst.expressions.Abs"],["Usage: abs(expr) - Returns the absolute value of the numeric or interval value."]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer new file mode 100644 index 0000000..fba8554 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer @@ -0,0 +1,3 @@ +[id,int,null] +[name,string,null] +[salary,double,null] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json deleted file mode 100644 index 7f3b5be..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["id","int",null],["name","string",null],["salary","double",null]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer new file mode 100644 index 0000000..eeec974 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer @@ -0,0 +1 @@ +[col,int,null] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json deleted file mode 100644 index 381060a..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["col","int",null]] diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.answer b/Tests/SparkConnectTests/Resources/queries/explain.sql.answer new file mode 100644 index 0000000..df0b263 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/explain.sql.answer @@ -0,0 +1,22 @@ +[== Parsed Logical Plan == +'Aggregate ['k], ['k, unresolvedalias('sum('v))] ++- SubqueryAlias t + +- LocalRelation [k#, v#] + +== Analyzed Logical Plan == +k: int, sum(v): bigint +Aggregate [k#], [k#, sum(v#) AS sum(v)#] ++- SubqueryAlias t + +- LocalRelation [k#, v#] + +== Optimized Logical Plan == +Aggregate [k#], [k#, sum(v#) AS sum(v)#] ++- LocalRelation [k#, v#] + +== Physical Plan == +AdaptiveSparkPlan isFinalPlan=false ++- HashAggregate(keys=[k#], functions=[sum(v#)], output=[k#, sum(v)#]) + +- Exchange hashpartitioning(k#, 200), ENSURE_REQUIREMENTS, [plan_id=] + +- HashAggregate(keys=[k#], functions=[partial_sum(v#)], output=[k#, sum#]) + +- LocalTableScan [k#, v#] +] diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.json b/Tests/SparkConnectTests/Resources/queries/explain.sql.json deleted file mode 100644 index 4335a7a..0000000 --- a/Tests/SparkConnectTests/Resources/queries/explain.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["== Parsed Logical Plan ==\n'Aggregate ['k], ['k, unresolvedalias('sum('v))]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Analyzed Logical Plan ==\nk: int, sum(v): bigint\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Optimized Logical Plan ==\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- LocalRelation [k#, v#]\n\n== Physical Plan ==\nAdaptiveSparkPlan isFinalPlan=false\n+- HashAggregate(keys=[k#], functions=[sum(v#)], output=[k#, sum(v)#])\n +- Exchange hashpartitioning(k#, 200), ENSURE_REQUIREMENTS, [plan_id=]\n +- HashAggregate(keys=[k#], functions=[partial_sum(v#)], output=[k#, sum#])\n +- LocalTableScan [k#, v#]\n"]] diff --git a/Tests/SparkConnectTests/Resources/queries/floating.sql.answer b/Tests/SparkConnectTests/Resources/queries/floating.sql.answer new file mode 100644 index 0000000..913b56c --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/floating.sql.answer @@ -0,0 +1 @@ +[1.0,-2.0,3.0,-4.0,inf,nan,inf,nan] diff --git a/Tests/SparkConnectTests/Resources/queries/floating.sql.json b/Tests/SparkConnectTests/Resources/queries/floating.sql.json deleted file mode 100644 index 8bedf9a..0000000 --- a/Tests/SparkConnectTests/Resources/queries/floating.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["1.0","-2.0","3.0","-4.0","inf","nan","inf","nan"]] diff --git a/Tests/SparkConnectTests/Resources/queries/integral.sql.answer b/Tests/SparkConnectTests/Resources/queries/integral.sql.answer new file mode 100644 index 0000000..3933c80 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/integral.sql.answer @@ -0,0 +1 @@ +[127,-128,32767,-32768,2147483647,-2147483648,9223372036854775807,-9223372036854775808] diff --git a/Tests/SparkConnectTests/Resources/queries/integral.sql.json b/Tests/SparkConnectTests/Resources/queries/integral.sql.json deleted file mode 100644 index 4c24e38..0000000 --- a/Tests/SparkConnectTests/Resources/queries/integral.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["127","-128","32767","-32768","2147483647","-2147483648","9223372036854775807","-9223372036854775808"]] diff --git a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer new file mode 100644 index 0000000..8e79aed --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer @@ -0,0 +1,2 @@ +[0,0] +[1,2] diff --git a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json deleted file mode 100644 index bfa3b54..0000000 --- a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["0","0"],["1","2"]] diff --git a/Tests/SparkConnectTests/Resources/queries/select.sql.answer b/Tests/SparkConnectTests/Resources/queries/select.sql.answer new file mode 100644 index 0000000..7660873 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/select.sql.answer @@ -0,0 +1 @@ +[1] diff --git a/Tests/SparkConnectTests/Resources/queries/select.sql.json b/Tests/SparkConnectTests/Resources/queries/select.sql.json deleted file mode 100644 index 0a0ab46..0000000 --- a/Tests/SparkConnectTests/Resources/queries/select.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["1"]] diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer new file mode 100644 index 0000000..ab109a1 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer @@ -0,0 +1 @@ +[default] diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json deleted file mode 100644 index 621d59f..0000000 --- a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["default"]] diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer new file mode 100644 index 0000000..9338974 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer @@ -0,0 +1 @@ +[,testcache,true] diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json deleted file mode 100644 index 2785318..0000000 --- a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["","testcache","true"]] diff --git a/Tests/SparkConnectTests/Resources/queries/string.sql.answer b/Tests/SparkConnectTests/Resources/queries/string.sql.answer new file mode 100644 index 0000000..8895994 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/string.sql.answer @@ -0,0 +1 @@ +[abc,def] diff --git a/Tests/SparkConnectTests/Resources/queries/string.sql.json b/Tests/SparkConnectTests/Resources/queries/string.sql.json deleted file mode 100644 index f5935b2..0000000 --- a/Tests/SparkConnectTests/Resources/queries/string.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["abc","def"]] diff --git a/Tests/SparkConnectTests/Resources/queries/struct.sql.answer b/Tests/SparkConnectTests/Resources/queries/struct.sql.answer new file mode 100644 index 0000000..cbbff35 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/struct.sql.answer @@ -0,0 +1 @@ +[{1},{2,{3}}] diff --git a/Tests/SparkConnectTests/Resources/queries/struct.sql.json b/Tests/SparkConnectTests/Resources/queries/struct.sql.json deleted file mode 100644 index 08ef088..0000000 --- a/Tests/SparkConnectTests/Resources/queries/struct.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["{1}","{2,{3}}"]] diff --git a/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer b/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer @@ -0,0 +1 @@ + diff --git a/Tests/SparkConnectTests/Resources/queries/uncache.sql.json b/Tests/SparkConnectTests/Resources/queries/uncache.sql.json deleted file mode 100644 index fe51488..0000000 --- a/Tests/SparkConnectTests/Resources/queries/uncache.sql.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/Tests/SparkConnectTests/RowTests.swift b/Tests/SparkConnectTests/RowTests.swift new file mode 100644 index 0000000..de923a5 --- /dev/null +++ b/Tests/SparkConnectTests/RowTests.swift @@ -0,0 +1,95 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +import Foundation +import Testing + +import SparkConnect + +/// A test suite for `Row` +struct RowTests { + @Test + func empty() { + #expect(Row.empty.size == 0) + #expect(Row.empty.length == 0) + #expect(throws: SparkConnectError.InvalidArgumentException) { + try Row.empty.get(0) + } + } + + @Test + func create() { + #expect(Row(nil).size == 1) + #expect(Row(1).size == 1) + #expect(Row(1.1).size == 1) + #expect(Row("a").size == 1) + #expect(Row(nil, 1, 1.1, "a", true).size == 5) + #expect(Row(valueArray: [nil, 1, 1.1, "a", true]).size == 5) + } + + @Test + func string() async throws { + #expect(Row(nil, 1, 1.1, "a", true).toString() == "[null,1,1.1,a,true]") + } + + @Test + func get() throws { + let row = Row(1, 1.1, "a", true) + #expect(try row.get(0) as! Int == 1) + #expect(try row.get(1) as! Double == 1.1) + #expect(try row.get(2) as! String == "a") + #expect(try row.get(3) as! Bool == true) + #expect(throws: SparkConnectError.InvalidArgumentException) { + try Row.empty.get(-1) + } + } + + @Test + func compare() { + #expect(Row(nil) != Row()) + #expect(Row(nil) == Row(nil)) + + #expect(Row(1) == Row(1)) + #expect(Row(1) != Row(2)) + #expect(Row(1,2,3) == Row(1,2,3)) + #expect(Row(1,2,3) != Row(1,2,4)) + + #expect(Row(1.0) == Row(1.0)) + #expect(Row(1.0) != Row(2.0)) + + #expect(Row("a") == Row("a")) + #expect(Row("a") != Row("b")) + + #expect(Row(true) == Row(true)) + #expect(Row(true) != Row(false)) + + #expect(Row(1,"a") == Row(1,"a")) + #expect(Row(1,"a") != Row(2,"a")) + #expect(Row(1,"a") != Row(1,"b")) + + #expect(Row(0, 1, 2) == Row(valueArray: [0, 1, 2])) + + #expect(Row(0) == Row(Optional(0))) + #expect(Row(Optional(0)) == Row(Optional(0))) + + #expect([Row(1)] == [Row(1)]) + #expect([Row(1), Row(2)] == [Row(1), Row(2)]) + #expect([Row(1), Row(2)] != [Row(1), Row(3)]) + } +} diff --git a/Tests/SparkConnectTests/SQLTests.swift b/Tests/SparkConnectTests/SQLTests.swift index 997e888..7df5c99 100644 --- a/Tests/SparkConnectTests/SQLTests.swift +++ b/Tests/SparkConnectTests/SQLTests.swift @@ -78,9 +78,8 @@ struct SQLTests { print(name) let sql = try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name)"), encoding: .utf8) - let jsonData = try encoder.encode(try await spark.sql(sql).collect()) - let answer = cleanUp(String(data: jsonData, encoding: .utf8)!) - let expected = cleanUp(try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name).json"), encoding: .utf8)) + let answer = cleanUp(try await spark.sql(sql).collect().map { $0.toString() }.joined(separator: "\n")) + let expected = cleanUp(try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name).answer"), encoding: .utf8)) #expect(answer == expected.trimmingCharacters(in: .whitespacesAndNewlines)) } await spark.stop() diff --git a/Tests/SparkConnectTests/SparkSessionTests.swift b/Tests/SparkConnectTests/SparkSessionTests.swift index 901a683..f864b09 100644 --- a/Tests/SparkConnectTests/SparkSessionTests.swift +++ b/Tests/SparkConnectTests/SparkSessionTests.swift @@ -91,7 +91,7 @@ struct SparkSessionTests { let spark = try await SparkSession.builder.getOrCreate() #expect(try await spark.time(spark.range(1000).count) == 1000) #if !os(Linux) - #expect(try await spark.time(spark.range(1).collect) == [["0"]]) + #expect(try await spark.time(spark.range(1).collect) == [Row("0")]) try await spark.time(spark.range(10).show) #endif await spark.stop() From d087b50a017afee22b7e86ff19c4e1582f4600ab Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 16 Apr 2025 15:35:42 +0900 Subject: [PATCH 2/2] Re-style --- Sources/SparkConnect/Row.swift | 22 ++++++++++------------ Tests/SparkConnectTests/RowTests.swift | 13 ++++++------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Sources/SparkConnect/Row.swift b/Sources/SparkConnect/Row.swift index f6c774a..0caf505 100644 --- a/Sources/SparkConnect/Row.swift +++ b/Sources/SparkConnect/Row.swift @@ -19,7 +19,7 @@ import Foundation public struct Row: Sendable, Equatable { - let values : [Sendable?] + let values: [Sendable?] public init(_ values: Sendable?...) { self.values = values @@ -30,14 +30,12 @@ public struct Row: Sendable, Equatable { } public static var empty: Row { - get { - return Row() - } + return Row() } - public var size: Int { get { return length } } + public var size: Int { return length } - public var length: Int { get { return values.count } } + public var length: Int { return values.count } subscript(index: Int) -> Sendable { get throws { @@ -46,21 +44,21 @@ public struct Row: Sendable, Equatable { } public func get(_ i: Int) throws -> Sendable { - if (i < 0 || i >= self.length) { + if i < 0 || i >= self.length { throw SparkConnectError.InvalidArgumentException } return values[i] } public static func == (lhs: Row, rhs: Row) -> Bool { - if (lhs.values.count != rhs.values.count) { + if lhs.values.count != rhs.values.count { return false } return lhs.values.elementsEqual(rhs.values) { (x, y) in if x == nil && y == nil { return true } else if let a = x as? Bool, let b = y as? Bool { - return a == b + return a == b } else if let a = x as? Int, let b = y as? Int { return a == b } else if let a = x as? Int8, let b = y as? Int8 { @@ -72,11 +70,11 @@ public struct Row: Sendable, Equatable { } else if let a = x as? Int64, let b = y as? Int64 { return a == b } else if let a = x as? Float, let b = y as? Float { - return a == b + return a == b } else if let a = x as? Double, let b = y as? Double { - return a == b + return a == b } else if let a = x as? String, let b = y as? String { - return a == b + return a == b } else { return false } diff --git a/Tests/SparkConnectTests/RowTests.swift b/Tests/SparkConnectTests/RowTests.swift index de923a5..3262686 100644 --- a/Tests/SparkConnectTests/RowTests.swift +++ b/Tests/SparkConnectTests/RowTests.swift @@ -18,9 +18,8 @@ // import Foundation -import Testing - import SparkConnect +import Testing /// A test suite for `Row` struct RowTests { @@ -67,8 +66,8 @@ struct RowTests { #expect(Row(1) == Row(1)) #expect(Row(1) != Row(2)) - #expect(Row(1,2,3) == Row(1,2,3)) - #expect(Row(1,2,3) != Row(1,2,4)) + #expect(Row(1, 2, 3) == Row(1, 2, 3)) + #expect(Row(1, 2, 3) != Row(1, 2, 4)) #expect(Row(1.0) == Row(1.0)) #expect(Row(1.0) != Row(2.0)) @@ -79,9 +78,9 @@ struct RowTests { #expect(Row(true) == Row(true)) #expect(Row(true) != Row(false)) - #expect(Row(1,"a") == Row(1,"a")) - #expect(Row(1,"a") != Row(2,"a")) - #expect(Row(1,"a") != Row(1,"b")) + #expect(Row(1, "a") == Row(1, "a")) + #expect(Row(1, "a") != Row(2, "a")) + #expect(Row(1, "a") != Row(1, "b")) #expect(Row(0, 1, 2) == Row(valueArray: [0, 1, 2]))