[SPARK-24870][SQL] Cache can't work normally if there are case letters in SQL

eatoncys · gatorsmile · commit 13a67b070d33 · 2018-07-23T23:05:08.000-07:00
## What changes were proposed in this pull request? Modified the canonicalized to not case-insensitive. Before the PR, cache can't work normally if there are case letters in SQL, for example: sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) USING hive") sql("select key, sum(case when Key > 0 then 1 else 0 end) as positiveNum " + "from src group by key").cache().createOrReplaceTempView("src_cache") sql( s"""select a.key from (select key from src_cache where positiveNum = 1)a left join (select key from src_cache )b on a.key=b.key """).explain The physical plan of the sql is: ![image](https://user-images.githubusercontent.com/26834091/42979518-3decf0fa-8c05-11e8-9837-d5e4c334cb1f.png) The subquery "select key from src_cache where positiveNum = 1" on the left of join can use the cache data, but the subquery "select key from src_cache" on the right of join cannot use the cache data. ## How was this patch tested? new added test Author: 10129659 <chen.yanshan@zte.com.cn> Closes apache#21823 from eatoncys/canonicalized.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -284,7 +284,7 @@ object QueryPlan extends PredicateHelper {
         if (ordinal == -1) {
           ar
         } else {
-          ar.withExprId(ExprId(ordinal))
+          ar.withExprId(ExprId(ordinal)).canonicalized
         }
     }.canonicalized.asInstanceOf[T]
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -18,8 +18,11 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.IntegerType
 
 /**
  * Tests for the sameResult function for [[SparkPlan]]s.
@@ -58,4 +61,16 @@ class SameResultSuite extends QueryTest with SharedSQLContext {
     val df4 = spark.range(10).agg(sumDistinct($"id"))
     assert(df3.queryExecution.executedPlan.sameResult(df4.queryExecution.executedPlan))
   }
+
+  test("Canonicalized result is case-insensitive") {
+    val a = AttributeReference("A", IntegerType)()
+    val b = AttributeReference("B", IntegerType)()
+    val planUppercase = Project(Seq(a), LocalRelation(a, b))
+
+    val c = AttributeReference("a", IntegerType)()
+    val d = AttributeReference("b", IntegerType)()
+    val planLowercase = Project(Seq(c), LocalRelation(c, d))
+
+    assert(planUppercase.sameResult(planLowercase))
+  }
 }

Original file line number	Diff line number	Diff line change
`@@ -284,7 +284,7 @@ object QueryPlan extends PredicateHelper {`
`284`	`284`	`if (ordinal == -1) {`
`285`	`285`	`ar`
`286`	`286`	`} else {`
`287`		`- ar.withExprId(ExprId(ordinal))`
	`287`	`+ ar.withExprId(ExprId(ordinal)).canonicalized`
`288`	`288`	`}`
`289`	`289`	`}.canonicalized.asInstanceOf[T]`
`290`	`290`	`}`