Skip to content

Commit bb58a97

Browse files
committed
[SPARK-25988][SQL] Keep names unchanged when deduplicating the column names in Analyzer
## What changes were proposed in this pull request? When the queries do not use the column names with the same case, users might hit various errors. Below is a typical test failure they can hit. ``` Expected only partition pruning predicates: ArrayBuffer(isnotnull(tdate#237), (cast(tdate#237 as string) >= 2017-08-15)); org.apache.spark.sql.AnalysisException: Expected only partition pruning predicates: ArrayBuffer(isnotnull(tdate#237), (cast(tdate#237 as string) >= 2017-08-15)); at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.prunePartitionsByFilter(ExternalCatalogUtils.scala:146) at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.listPartitionsByFilter(InMemoryCatalog.scala:560) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.listPartitionsByFilter(SessionCatalog.scala:925) ``` ## How was this patch tested? Added two test cases. Closes apache#22990 from gatorsmile/fix1283. Authored-by: gatorsmile <[email protected]> Signed-off-by: gatorsmile <[email protected]> (cherry picked from commit 657fd00) Signed-off-by: gatorsmile <[email protected]>
1 parent 8b18dc0 commit bb58a97

File tree

4 files changed

+60
-2
lines changed

4 files changed

+60
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,8 @@ class Analyzer(
824824
}
825825

826826
private def dedupAttr(attr: Attribute, attrMap: AttributeMap[Attribute]): Attribute = {
827-
attrMap.get(attr).getOrElse(attr).withQualifier(attr.qualifier)
827+
val exprId = attrMap.getOrElse(attr, attr).exprId
828+
attr.withExprId(exprId)
828829
}
829830

830831
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute with Un
112112
override def withQualifier(newQualifier: Seq[String]): UnresolvedAttribute = this
113113
override def withName(newName: String): UnresolvedAttribute = UnresolvedAttribute.quoted(newName)
114114
override def withMetadata(newMetadata: Metadata): Attribute = this
115+
override def withExprId(newExprId: ExprId): UnresolvedAttribute = this
115116

116117
override def toString: String = s"'$name"
117118

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ abstract class Attribute extends LeafExpression with NamedExpression with NullIn
115115
def withQualifier(newQualifier: Seq[String]): Attribute
116116
def withName(newName: String): Attribute
117117
def withMetadata(newMetadata: Metadata): Attribute
118+
def withExprId(newExprId: ExprId): Attribute
118119

119120
override def toAttribute: Attribute = this
120121
def newInstance(): Attribute
@@ -299,7 +300,7 @@ case class AttributeReference(
299300
}
300301
}
301302

302-
def withExprId(newExprId: ExprId): AttributeReference = {
303+
override def withExprId(newExprId: ExprId): AttributeReference = {
303304
if (exprId == newExprId) {
304305
this
305306
} else {
@@ -362,6 +363,8 @@ case class PrettyAttribute(
362363
throw new UnsupportedOperationException
363364
override def qualifier: Seq[String] = throw new UnsupportedOperationException
364365
override def exprId: ExprId = throw new UnsupportedOperationException
366+
override def withExprId(newExprId: ExprId): Attribute =
367+
throw new UnsupportedOperationException
365368
override def nullable: Boolean = true
366369
}
367370

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2856,6 +2856,59 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
28562856
checkAnswer(sql("select 26393499451 / (1e6 * 1000)"), Row(BigDecimal("26.3934994510000")))
28572857
}
28582858
}
2859+
2860+
test("SPARK-25988: self join with aliases on partitioned tables #1") {
2861+
withTempView("tmpView1", "tmpView2") {
2862+
withTable("tab1", "tab2") {
2863+
sql(
2864+
"""
2865+
|CREATE TABLE `tab1` (`col1` INT, `TDATE` DATE)
2866+
|USING CSV
2867+
|PARTITIONED BY (TDATE)
2868+
""".stripMargin)
2869+
spark.table("tab1").where("TDATE >= '2017-08-15'").createOrReplaceTempView("tmpView1")
2870+
sql("CREATE TABLE `tab2` (`TDATE` DATE) USING parquet")
2871+
sql(
2872+
"""
2873+
|CREATE OR REPLACE TEMPORARY VIEW tmpView2 AS
2874+
|SELECT N.tdate, col1 AS aliasCol1
2875+
|FROM tmpView1 N
2876+
|JOIN tab2 Z
2877+
|ON N.tdate = Z.tdate
2878+
""".stripMargin)
2879+
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0") {
2880+
sql("SELECT * FROM tmpView2 x JOIN tmpView2 y ON x.tdate = y.tdate").collect()
2881+
}
2882+
}
2883+
}
2884+
}
2885+
2886+
test("SPARK-25988: self join with aliases on partitioned tables #2") {
2887+
withTempView("tmp") {
2888+
withTable("tab1", "tab2") {
2889+
sql(
2890+
"""
2891+
|CREATE TABLE `tab1` (`EX` STRING, `TDATE` DATE)
2892+
|USING parquet
2893+
|PARTITIONED BY (tdate)
2894+
""".stripMargin)
2895+
sql("CREATE TABLE `tab2` (`TDATE` DATE) USING parquet")
2896+
sql(
2897+
"""
2898+
|CREATE OR REPLACE TEMPORARY VIEW TMP as
2899+
|SELECT N.tdate, EX AS new_ex
2900+
|FROM tab1 N
2901+
|JOIN tab2 Z
2902+
|ON N.tdate = Z.tdate
2903+
""".stripMargin)
2904+
sql(
2905+
"""
2906+
|SELECT * FROM TMP x JOIN TMP y
2907+
|ON x.tdate = y.tdate
2908+
""".stripMargin).queryExecution.executedPlan
2909+
}
2910+
}
2911+
}
28592912
}
28602913

28612914
case class Foo(bar: Option[String])

0 commit comments

Comments
 (0)