Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit b8b80f6

Browse files
cloud-fangatorsmile
authored andcommitted
[SPARK-21150][SQL] Persistent view stored in Hive metastore should be case preserving
## What changes were proposed in this pull request? This is a regression in Spark 2.2. In Spark 2.2, we introduced a new way to resolve persisted view: https://issues.apache.org/jira/browse/SPARK-18209 , but this makes the persisted view non case-preserving because we store the schema in hive metastore directly. We should follow data source table and store schema in table properties. ## How was this patch tested? new regression test Author: Wenchen Fan <[email protected]> Closes apache#18360 from cloud-fan/view. (cherry picked from commit e862dc9) Signed-off-by: gatorsmile <[email protected]>
1 parent 514a7e6 commit b8b80f6

File tree

3 files changed

+56
-42
lines changed

3 files changed

+56
-42
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ case class CreateViewCommand(
159159
checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
160160

161161
// Handles `CREATE OR REPLACE VIEW v0 AS SELECT ...`
162-
catalog.alterTable(prepareTable(sparkSession, analyzedPlan))
162+
// Nothing we need to retain from the old view, so just drop and create a new one
163+
catalog.dropTable(viewIdent, ignoreIfNotExists = false, purge = false)
164+
catalog.createTable(prepareTable(sparkSession, analyzedPlan), ignoreIfExists = false)
163165
} else {
164166
// Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already
165167
// exists.

sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,4 +669,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
669669
"positive."))
670670
}
671671
}
672+
673+
test("permanent view should be case-preserving") {
674+
withView("v") {
675+
sql("CREATE VIEW v AS SELECT 1 as aBc")
676+
assert(spark.table("v").schema.head.name == "aBc")
677+
678+
sql("CREATE OR REPLACE VIEW v AS SELECT 2 as cBa")
679+
assert(spark.table("v").schema.head.name == "cBa")
680+
}
681+
}
672682
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -224,39 +224,36 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
224224
throw new TableAlreadyExistsException(db = db, table = table)
225225
}
226226

227-
if (tableDefinition.tableType == VIEW) {
228-
client.createTable(tableDefinition, ignoreIfExists)
227+
// Ideally we should not create a managed table with location, but Hive serde table can
228+
// specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
229+
// to create the table directory and write out data before we create this table, to avoid
230+
// exposing a partial written table.
231+
val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
232+
tableDefinition.storage.locationUri.isEmpty
233+
234+
val tableLocation = if (needDefaultTableLocation) {
235+
Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
229236
} else {
230-
// Ideally we should not create a managed table with location, but Hive serde table can
231-
// specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
232-
// to create the table directory and write out data before we create this table, to avoid
233-
// exposing a partial written table.
234-
val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
235-
tableDefinition.storage.locationUri.isEmpty
236-
237-
val tableLocation = if (needDefaultTableLocation) {
238-
Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
239-
} else {
240-
tableDefinition.storage.locationUri
241-
}
237+
tableDefinition.storage.locationUri
238+
}
242239

243-
if (DDLUtils.isHiveTable(tableDefinition)) {
244-
val tableWithDataSourceProps = tableDefinition.copy(
245-
// We can't leave `locationUri` empty and count on Hive metastore to set a default table
246-
// location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
247-
// table location for tables in default database, while we expect to use the location of
248-
// default database.
249-
storage = tableDefinition.storage.copy(locationUri = tableLocation),
250-
// Here we follow data source tables and put table metadata like table schema, partition
251-
// columns etc. in table properties, so that we can work around the Hive metastore issue
252-
// about not case preserving and make Hive serde table support mixed-case column names.
253-
properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
254-
client.createTable(tableWithDataSourceProps, ignoreIfExists)
255-
} else {
256-
createDataSourceTable(
257-
tableDefinition.withNewStorage(locationUri = tableLocation),
258-
ignoreIfExists)
259-
}
240+
if (DDLUtils.isDatasourceTable(tableDefinition)) {
241+
createDataSourceTable(
242+
tableDefinition.withNewStorage(locationUri = tableLocation),
243+
ignoreIfExists)
244+
} else {
245+
val tableWithDataSourceProps = tableDefinition.copy(
246+
// We can't leave `locationUri` empty and count on Hive metastore to set a default table
247+
// location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
248+
// table location for tables in default database, while we expect to use the location of
249+
// default database.
250+
storage = tableDefinition.storage.copy(locationUri = tableLocation),
251+
// Here we follow data source tables and put table metadata like table schema, partition
252+
// columns etc. in table properties, so that we can work around the Hive metastore issue
253+
// about not case preserving and make Hive serde table and view support mixed-case column
254+
// names.
255+
properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
256+
client.createTable(tableWithDataSourceProps, ignoreIfExists)
260257
}
261258
}
262259

@@ -669,16 +666,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
669666

670667
var table = inputTable
671668

672-
if (table.tableType != VIEW) {
673-
table.properties.get(DATASOURCE_PROVIDER) match {
674-
// No provider in table properties, which means this is a Hive serde table.
675-
case None =>
676-
table = restoreHiveSerdeTable(table)
677-
678-
// This is a regular data source table.
679-
case Some(provider) =>
680-
table = restoreDataSourceTable(table, provider)
681-
}
669+
table.properties.get(DATASOURCE_PROVIDER) match {
670+
case None if table.tableType == VIEW =>
671+
// If this is a view created by Spark 2.2 or higher versions, we should restore its schema
672+
// from table properties.
673+
if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
674+
table = table.copy(schema = getSchemaFromTableProperties(table))
675+
}
676+
677+
// No provider in table properties, which means this is a Hive serde table.
678+
case None =>
679+
table = restoreHiveSerdeTable(table)
680+
681+
// This is a regular data source table.
682+
case Some(provider) =>
683+
table = restoreDataSourceTable(table, provider)
682684
}
683685

684686
// construct Spark's statistics from information in Hive metastore

0 commit comments

Comments
 (0)