diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 00ea132..4ac6085 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -96,9 +96,10 @@ jobs: run: | curl -LO https://dist.apache.org/repos/dist/dev/spark/v4.0.0-rc4-bin/spark-4.0.0-bin-hadoop3.tgz tar xvfz spark-4.0.0-bin-hadoop3.tgz - cd spark-4.0.0-bin-hadoop3/sbin + mv spark-4.0.0-bin-hadoop3 /tmp/spark + cd /tmp/spark/sbin ./start-connect-server.sh - cd ../.. + cd - swift test --no-parallel integration-test-token: @@ -114,9 +115,10 @@ jobs: run: | curl -LO https://dist.apache.org/repos/dist/dev/spark/v4.0.0-rc4-bin/spark-4.0.0-bin-hadoop3.tgz tar xvfz spark-4.0.0-bin-hadoop3.tgz - cd spark-4.0.0-bin-hadoop3/sbin + mv spark-4.0.0-bin-hadoop3 /tmp/spark + cd /tmp/spark/sbin ./start-connect-server.sh - cd ../.. + cd - swift test --no-parallel integration-test-mac-spark3: @@ -135,7 +137,8 @@ jobs: run: | curl -LO https://downloads.apache.org/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz tar xvfz spark-3.5.5-bin-hadoop3.tgz - cd spark-3.5.5-bin-hadoop3/sbin + mv spark-3.5.5-bin-hadoop3 /tmp/spark + cd /tmp/spark/sbin ./start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:3.5.5 - cd ../.. + cd - swift test --no-parallel diff --git a/Tests/SparkConnectTests/Resources/queries/binary.sql.answer b/Tests/SparkConnectTests/Resources/queries/binary.sql.answer index 52085ed..13eb1f4 100644 --- a/Tests/SparkConnectTests/Resources/queries/binary.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/binary.sql.answer @@ -1 +1,5 @@ -[abc] ++----------+ +| abc| ++----------+ +|[61 62 63]| ++----------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/cache.sql.answer index 8b13789..9723686 100644 --- a/Tests/SparkConnectTests/Resources/queries/cache.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/cache.sql.answer @@ -1 +1,4 @@ - +++ +|| +++ +++ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer index 8b13789..9723686 100644 --- a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer @@ -1 +1,4 @@ - +++ +|| +++ +++ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/create_scala_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/create_scala_function.sql.answer index 8b13789..9723686 100644 --- a/Tests/SparkConnectTests/Resources/queries/create_scala_function.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/create_scala_function.sql.answer @@ -1 +1,4 @@ - +++ +|| +++ +++ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/create_table_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/create_table_function.sql.answer index 8b13789..9723686 100644 --- a/Tests/SparkConnectTests/Resources/queries/create_table_function.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/create_table_function.sql.answer @@ -1 +1,4 @@ - +++ +|| +++ +++ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/date.sql.answer b/Tests/SparkConnectTests/Resources/queries/date.sql.answer index 41ae9ec..4ad4f95 100644 --- a/Tests/SparkConnectTests/Resources/queries/date.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/date.sql.answer @@ -1 +1,5 @@ -[2025-03-15 00:00:00 +0000] ++-----------------+ +|DATE '2025-03-15'| ++-----------------+ +| 2025-03-15| ++-----------------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer index a44243f..28c7c7d 100644 --- a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer @@ -1,5 +1,9 @@ -[Catalog Name,spark_catalog] -[Namespace Name,default] -[Comment,default database] -[Location,*] -[Owner,*] ++--------------+----------------------------------------+ +| info_name| info_value| ++--------------+----------------------------------------+ +| Catalog Name| spark_catalog| +|Namespace Name| default| +| Comment| default database| +| Location|*| +| Owner| *| ++--------------+----------------------------------------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer index 3044189..14cad3e 100644 --- a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer @@ -1,3 +1,7 @@ -[Function: abs] -[Class: org.apache.spark.sql.catalyst.expressions.Abs] -[Usage: abs(expr) - Returns the absolute value of the numeric or interval value.] ++-------------------------------------------------------------------------------+ +| function_desc| ++-------------------------------------------------------------------------------+ +| Function: abs| +| Class: org.apache.spark.sql.catalyst.expressions.Abs| +|Usage: abs(expr) - Returns the absolute value of the numeric or interval value.| ++-------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer index fba8554..bb9ec24 100644 --- a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer @@ -1,3 +1,7 @@ -[id,int,null] -[name,string,null] -[salary,double,null] ++--------+---------+-------+ +|col_name|data_type|comment| ++--------+---------+-------+ +| id| int| NULL| +| name| string| NULL| +| salary| double| NULL| ++--------+---------+-------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer index eeec974..216dd46 100644 --- a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer @@ -1 +1,5 @@ -[col,int,null] ++--------+---------+-------+ +|col_name|data_type|comment| ++--------+---------+-------+ +| col| int| NULL| ++--------+---------+-------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/drop_scala_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/drop_scala_function.sql.answer index 8b13789..9723686 100644 --- a/Tests/SparkConnectTests/Resources/queries/drop_scala_function.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/drop_scala_function.sql.answer @@ -1 +1,4 @@ - +++ +|| +++ +++ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/drop_table_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/drop_table_function.sql.answer index 8b13789..9723686 100644 --- a/Tests/SparkConnectTests/Resources/queries/drop_table_function.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/drop_table_function.sql.answer @@ -1 +1,4 @@ - +++ +|| +++ +++ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.answer b/Tests/SparkConnectTests/Resources/queries/explain.sql.answer index df0b263..e726de8 100644 --- a/Tests/SparkConnectTests/Resources/queries/explain.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/explain.sql.answer @@ -1,22 +1,5 @@ -[== Parsed Logical Plan == -'Aggregate ['k], ['k, unresolvedalias('sum('v))] -+- SubqueryAlias t - +- LocalRelation [k#, v#] - -== Analyzed Logical Plan == -k: int, sum(v): bigint -Aggregate [k#], [k#, sum(v#) AS sum(v)#] -+- SubqueryAlias t - +- LocalRelation [k#, v#] - -== Optimized Logical Plan == -Aggregate [k#], [k#, sum(v#) AS sum(v)#] -+- LocalRelation [k#, v#] - -== Physical Plan == -AdaptiveSparkPlan isFinalPlan=false -+- HashAggregate(keys=[k#], functions=[sum(v#)], output=[k#, sum(v)#]) - +- Exchange hashpartitioning(k#, 200), ENSURE_REQUIREMENTS, [plan_id=] - +- HashAggregate(keys=[k#], functions=[partial_sum(v#)], output=[k#, sum#]) - +- LocalTableScan [k#, v#] -] ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan| ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +|== Parsed Logical Plan ==\n'Aggregate ['k], ['k, unresolvedalias('sum('v))]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Analyzed Logical Plan ==\nk: int, sum(v): bigint\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Optimized Logical Plan ==\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- LocalRelation [k#, v#]\n\n== Physical Plan ==\nAdaptiveSparkPlan isFinalPlan=false\n+- HashAggregate(keys=[k#], functions=[sum(v#)], output=[k#, sum(v)#])\n +- Exchange hashpartitioning(k#, 200), ENSURE_REQUIREMENTS, [plan_id=]\n +- HashAggregate(keys=[k#], functions=[partial_sum(v#)], output=[k#, sum#])\n +- LocalTableScan [k#, v#]\n| ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/floating.sql.answer b/Tests/SparkConnectTests/Resources/queries/floating.sql.answer index 913b56c..c8ed73a 100644 --- a/Tests/SparkConnectTests/Resources/queries/floating.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/floating.sql.answer @@ -1 +1,5 @@ -[1.0,-2.0,3.0,-4.0,inf,nan,inf,nan] ++---+----+---+----+--------+---+--------+---+ +|1.0|-2.0|3.0|-4.0| inf|NaN| inf|NaN| ++---+----+---+----+--------+---+--------+---+ +|1.0|-2.0|3.0|-4.0|Infinity|NaN|Infinity|NaN| ++---+----+---+----+--------+---+--------+---+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/integral.sql.answer b/Tests/SparkConnectTests/Resources/queries/integral.sql.answer index 3933c80..5ed3bb9 100644 --- a/Tests/SparkConnectTests/Resources/queries/integral.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/integral.sql.answer @@ -1 +1,5 @@ -[127,-128,32767,-32768,2147483647,-2147483648,9223372036854775807,-9223372036854775808] ++---+----+-----+------+----------+-----------+-------------------+--------------------+ +|127|-128|32767|-32768|2147483647|-2147483648|9223372036854775807|-9223372036854775808| ++---+----+-----+------+----------+-----------+-------------------+--------------------+ +|127|-128|32767|-32768|2147483647|-2147483648|9223372036854775807|-9223372036854775808| ++---+----+-----+------+----------+-----------+-------------------+--------------------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer index 8e79aed..1d12e36 100644 --- a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer @@ -1,2 +1,6 @@ -[0,0] -[1,2] ++---+------+ +|col|result| ++---+------+ +| 0| 0| +| 1| 2| ++---+------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/select.sql.answer b/Tests/SparkConnectTests/Resources/queries/select.sql.answer index 7660873..4ed047e 100644 --- a/Tests/SparkConnectTests/Resources/queries/select.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/select.sql.answer @@ -1 +1,5 @@ -[1] ++---+ +| 1| ++---+ +| 1| ++---+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer index ab109a1..8332f61 100644 --- a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer @@ -1 +1,5 @@ -[default] ++---------+ +|namespace| ++---------+ +| default| ++---------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer index 9338974..9da8c72 100644 --- a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer @@ -1 +1,5 @@ -[,testcache,true] ++---------+---------+-----------+ +|namespace|tableName|isTemporary| ++---------+---------+-----------+ +| |testcache| true| ++---------+---------+-----------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/string.sql.answer b/Tests/SparkConnectTests/Resources/queries/string.sql.answer index 8895994..0cbe51d 100644 --- a/Tests/SparkConnectTests/Resources/queries/string.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/string.sql.answer @@ -1 +1,5 @@ -[abc,def] ++---+---+ +|abc|def| ++---+---+ +|abc|def| ++---+---+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/struct.sql.answer b/Tests/SparkConnectTests/Resources/queries/struct.sql.answer index cbbff35..ed2a6c9 100644 --- a/Tests/SparkConnectTests/Resources/queries/struct.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/struct.sql.answer @@ -1 +1,5 @@ -[{1},{2,{3}}] ++---------+--------------------+ +|struct(1)|struct(2, struct(3))| ++---------+--------------------+ +| {1}| {2, {3}}| ++---------+--------------------+ \ No newline at end of file diff --git a/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer b/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer index 8b13789..9723686 100644 --- a/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer +++ b/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer @@ -1 +1,4 @@ - +++ +|| +++ +++ \ No newline at end of file diff --git a/Tests/SparkConnectTests/SQLTests.swift b/Tests/SparkConnectTests/SQLTests.swift index f07c409..b81e94a 100644 --- a/Tests/SparkConnectTests/SQLTests.swift +++ b/Tests/SparkConnectTests/SQLTests.swift @@ -20,7 +20,7 @@ import Foundation import Testing -import SparkConnect +@testable import SparkConnect /// A test suite for various SQL statements. struct SQLTests { @@ -49,6 +49,10 @@ struct SQLTests { return str.replacing(regexOwner, with: "*") } + private func normalize(_ str: String) -> String { + return str.replacing(/[-]+/, with: "-").replacing(/[ ]+/, with: " ") + } + @Test func testRemoveID() { #expect(removeID("123") == "123") @@ -69,6 +73,12 @@ struct SQLTests { #expect(removeOwner("185") == "*") } + @Test + func testNormalize() { + #expect(normalize("+------+------------------+") == "+-+-+") + #expect(normalize("+ + +") == "+ + +") + } + let queriesForSpark4Only: [String] = [ "create_scala_function.sql", "create_table_function.sql", @@ -80,6 +90,7 @@ struct SQLTests { @Test func runAll() async throws { let spark = try await SparkSession.builder.getOrCreate() + let MAX = Int32.max for name in try! fm.contentsOfDirectory(atPath: path).sorted() { guard name.hasSuffix(".sql") else { continue } print(name) @@ -89,13 +100,18 @@ struct SQLTests { } let sql = try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name)"), encoding: .utf8) - let answer = cleanUp(try await spark.sql(sql).collect().map { $0.toString() }.joined(separator: "\n")) + let result = try await spark.sql(sql).showString(MAX, MAX, false).collect()[0].get(0) as! String + let answer = cleanUp(result.trimmingCharacters(in: .whitespacesAndNewlines)) if (regenerateGoldenFiles) { let path = "\(FileManager.default.currentDirectoryPath)/Tests/SparkConnectTests/Resources/queries/\(name).answer" - fm.createFile(atPath: path, contents: (answer + "\n").data(using: .utf8)!, attributes: nil) + fm.createFile(atPath: path, contents: answer.data(using: .utf8)!, attributes: nil) } else { let expected = cleanUp(try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name).answer"), encoding: .utf8)) - #expect(answer == expected.trimmingCharacters(in: .whitespacesAndNewlines)) + .trimmingCharacters(in: .whitespacesAndNewlines) + if (answer != expected) { + print("Try to compare normalized result.") + #expect(normalize(answer) == normalize(expected)) + } } } await spark.stop()