From 3fad56f97d5049fe605c72a9f0de317b1a8f9215 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 28 Mar 2025 08:57:09 -0700 Subject: [PATCH] [SPARK-51743] Add `describe_(database|table)`, `show_(database|table)`, `explain` sql test and answer files --- .../Resources/queries/describe_database.sql | 1 + .../queries/describe_database.sql.json | 1 + .../Resources/queries/describe_table.sql | 1 + .../Resources/queries/describe_table.sql.json | 1 + .../Resources/queries/explain.sql | 1 + .../Resources/queries/explain.sql.json | 1 + .../Resources/queries/show_databases.sql | 1 + .../Resources/queries/show_databases.sql.json | 1 + .../Resources/queries/show_tables.sql | 1 + .../Resources/queries/show_tables.sql.json | 1 + Tests/SparkConnectTests/SQLTests.swift | 20 +++++++++++++++++-- 11 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_database.sql create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_database.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_table.sql create mode 100644 Tests/SparkConnectTests/Resources/queries/describe_table.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/explain.sql create mode 100644 Tests/SparkConnectTests/Resources/queries/explain.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/show_databases.sql create mode 100644 Tests/SparkConnectTests/Resources/queries/show_databases.sql.json create mode 100644 Tests/SparkConnectTests/Resources/queries/show_tables.sql create mode 100644 Tests/SparkConnectTests/Resources/queries/show_tables.sql.json diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql b/Tests/SparkConnectTests/Resources/queries/describe_database.sql new file mode 100644 index 0000000..1d88085 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_database.sql @@ -0,0 +1 @@ +DESCRIBE DATABASE default diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json new file mode 100644 index 0000000..614a70d --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json @@ -0,0 +1 @@ +[["Catalog Name","spark_catalog"],["Namespace Name","default"],["Comment","default database"],["Location","file:\/opt\/spark\/work-dir\/spark-warehouse"],["Owner","185"]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql b/Tests/SparkConnectTests/Resources/queries/describe_table.sql new file mode 100644 index 0000000..eaf7ff0 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_table.sql @@ -0,0 +1 @@ +DESCRIBE TABLE testcache diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json new file mode 100644 index 0000000..381060a --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json @@ -0,0 +1 @@ +[["col","int",null]] diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql b/Tests/SparkConnectTests/Resources/queries/explain.sql new file mode 100644 index 0000000..cf8dcc7 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/explain.sql @@ -0,0 +1 @@ +EXPLAIN EXTENDED select k, sum(v) from values (1, 2), (1, 3) t(k, v) group by k diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.json b/Tests/SparkConnectTests/Resources/queries/explain.sql.json new file mode 100644 index 0000000..4335a7a --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/explain.sql.json @@ -0,0 +1 @@ +[["== Parsed Logical Plan ==\n'Aggregate ['k], ['k, unresolvedalias('sum('v))]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Analyzed Logical Plan ==\nk: int, sum(v): bigint\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Optimized Logical Plan ==\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- LocalRelation [k#, v#]\n\n== Physical Plan ==\nAdaptiveSparkPlan isFinalPlan=false\n+- HashAggregate(keys=[k#], functions=[sum(v#)], output=[k#, sum(v)#])\n +- Exchange hashpartitioning(k#, 200), ENSURE_REQUIREMENTS, [plan_id=]\n +- HashAggregate(keys=[k#], functions=[partial_sum(v#)], output=[k#, sum#])\n +- LocalTableScan [k#, v#]\n"]] diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql b/Tests/SparkConnectTests/Resources/queries/show_databases.sql new file mode 100644 index 0000000..da16ca6 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_databases.sql @@ -0,0 +1 @@ +SHOW DATABASES diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json new file mode 100644 index 0000000..621d59f --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json @@ -0,0 +1 @@ +[["default"]] diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql b/Tests/SparkConnectTests/Resources/queries/show_tables.sql new file mode 100644 index 0000000..61b3cfc --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_tables.sql @@ -0,0 +1 @@ +SHOW TABLES diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json new file mode 100644 index 0000000..2785318 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json @@ -0,0 +1 @@ +[["","testcache","true"]] diff --git a/Tests/SparkConnectTests/SQLTests.swift b/Tests/SparkConnectTests/SQLTests.swift index 172de05..c875a80 100644 --- a/Tests/SparkConnectTests/SQLTests.swift +++ b/Tests/SparkConnectTests/SQLTests.swift @@ -28,6 +28,22 @@ struct SQLTests { let path = Bundle.module.path(forResource: "queries", ofType: "")! let encoder = JSONEncoder() + let regexID = /#\d+L?/ + let regexPlanId = /plan_id=\d+/ + + private func removeID(_ str: String) -> String { + return str.replacing(regexPlanId, with: "plan_id=").replacing(regexID, with: "#") + } + + @Test + func testRemoveID() { + #expect(removeID("123") == "123") + #expect(removeID("123L") == "123L") + #expect(removeID("#123") == "#") + #expect(removeID("#123L") == "#") + #expect(removeID("plan_id=123") == "plan_id=") + } + #if !os(Linux) @Test func runAll() async throws { @@ -38,8 +54,8 @@ struct SQLTests { let sql = try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name)"), encoding: .utf8) let jsonData = try encoder.encode(try await spark.sql(sql).collect()) - let answer = String(data: jsonData, encoding: .utf8)! - let expected = try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name).json"), encoding: .utf8) + let answer = removeID(String(data: jsonData, encoding: .utf8)!) + let expected = removeID(try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name).json"), encoding: .utf8)) #expect(answer == expected.trimmingCharacters(in: .whitespacesAndNewlines)) } await spark.stop()