[SPARK-25296][SQL][TEST] Create ExplainSuite

gatorsmile · gatorsmile · commit 7fc8881b0fbc · 2018-08-31T08:47:20.000-07:00
## What changes were proposed in this pull request? Move the output verification of Explain test cases to a new suite ExplainSuite. ## How was this patch tested? N/A Closes apache#22300 from gatorsmile/test3200. Authored-by: Xiao Li <gatorsmile@gmail.com> Signed-off-by: Xiao Li <gatorsmile@gmail.com>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2553,13 +2553,4 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("SPARK-23034 show rdd names in RDD scan nodes") {
-    val rddWithName = spark.sparkContext.parallelize(Row(1, "abc") :: Nil).setName("testRdd")
-    val df2 = spark.createDataFrame(rddWithName, StructType.fromDDL("c0 int, c1 string"))
-    val output2 = new java.io.ByteArrayOutputStream()
-    Console.withOut(output2) {
-      df2.explain(extended = false)
-    }
-    assert(output2.toString.contains("Scan ExistingRDD testRdd"))
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -206,15 +206,4 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits
     // first time use, load cache
     checkDataset(df5, Row(10))
   }
-
-  test("SPARK-24850 InMemoryRelation string representation does not include cached plan") {
-    val df = Seq(1).toDF("a").cache()
-    val outputStream = new java.io.ByteArrayOutputStream()
-    Console.withOut(outputStream) {
-      df.explain(false)
-    }
-    assert(outputStream.toString.replaceAll("#\\d+", "#x").contains(
-      "InMemoryRelation [a#x], StorageLevel(disk, memory, deserialized, 1 replicas)"
-    ))
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1498,16 +1498,6 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       df.where($"city".contains(new java.lang.Character('A'))),
       Seq(Row("Amsterdam")))
   }
-
-  test("SPARK-23034 show rdd names in RDD scan nodes") {
-    val rddWithName = spark.sparkContext.parallelize(SingleData(1) :: Nil).setName("testRdd")
-    val df = spark.createDataFrame(rddWithName)
-    val output = new java.io.ByteArrayOutputStream()
-    Console.withOut(output) {
-      df.explain(extended = false)
-    }
-    assert(output.toString.contains("Scan testRdd"))
-  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.StructType
+
+class ExplainSuite extends QueryTest with SharedSQLContext {
+  import testImplicits._
+
+  /**
+   * Runs the plan and makes sure the plans contains all of the keywords.
+   */
+  private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
+    val output = new java.io.ByteArrayOutputStream()
+    Console.withOut(output) {
+      df.explain(extended = false)
+    }
+    for (key <- keywords) {
+      assert(output.toString.contains(key))
+    }
+  }
+
+  test("SPARK-23034 show rdd names in RDD scan nodes (Dataset)") {
+    val rddWithName = spark.sparkContext.parallelize(Row(1, "abc") :: Nil).setName("testRdd")
+    val df = spark.createDataFrame(rddWithName, StructType.fromDDL("c0 int, c1 string"))
+    checkKeywordsExistsInExplain(df, keywords = "Scan ExistingRDD testRdd")
+  }
+
+  test("SPARK-23034 show rdd names in RDD scan nodes (DataFrame)") {
+    val rddWithName = spark.sparkContext.parallelize(ExplainSingleData(1) :: Nil).setName("testRdd")
+    val df = spark.createDataFrame(rddWithName)
+    checkKeywordsExistsInExplain(df, keywords = "Scan testRdd")
+  }
+
+  test("SPARK-24850 InMemoryRelation string representation does not include cached plan") {
+    val df = Seq(1).toDF("a").cache()
+    checkKeywordsExistsInExplain(df,
+      keywords = "InMemoryRelation", "StorageLevel(disk, memory, deserialized, 1 replicas)")
+  }
+}
+
+case class ExplainSingleData(id: Int)

Original file line number	Diff line number	Diff line change
`@@ -2553,13 +2553,4 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {`
`2553`	`2553`	`}`
`2554`	`2554`	`}`
`2555`	`2555`
`2556`		`- test("SPARK-23034 show rdd names in RDD scan nodes") {`
`2557`		`- val rddWithName = spark.sparkContext.parallelize(Row(1, "abc") :: Nil).setName("testRdd")`
`2558`		`- val df2 = spark.createDataFrame(rddWithName, StructType.fromDDL("c0 int, c1 string"))`
`2559`		`- val output2 = new java.io.ByteArrayOutputStream()`
`2560`		`- Console.withOut(output2) {`
`2561`		`- df2.explain(extended = false)`
`2562`		`- }`
`2563`		`- assert(output2.toString.contains("Scan ExistingRDD testRdd"))`
`2564`		`- }`
`2565`	`2556`	`}`