[SPARK-49866][SQL] Improve the error message for describe table with partition columns

mihailoale-db · MaxGekk · commit 37f2966b6423 · 2024-10-05T11:23:22.000+02:00
### What changes were proposed in this pull request? Provide more user facing error when partition column name can't be found in the table schema. ### Why are the changes needed? There's an issue where partition column sometimes doesn't match any from the table schema. When that happens we throw an assertion error which is not user friendly. Because of that we introduced new `QueryExecutionError` in order to make it more user facing. ### Does this PR introduce _any_ user-facing change? Yes, users will get more user friendly error message. ### Was this patch authored or co-authored using generative AI tooling? No Closes #48338 from mihailoale-db/mihailoale-db/fixdescribepartitioningmessage. Authored-by: Mihailo Aleksic <mihailo.aleksic@databricks.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -3802,6 +3802,12 @@
     ],
     "sqlState" : "428FT"
   },
+  "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA" : {
+    "message" : [
+      "Partition column <column> not found in schema <schema>. Please provide the existing column for partitioning."
+    ],
+    "sqlState" : "42000"
+  },
   "PATH_ALREADY_EXISTS" : {
     "message" : [
       "Path <outputPath> already exists. Set mode as \"overwrite\" to overwrite the existing path."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2856,4 +2856,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       )
     )
   }
+
+  def partitionColumnNotFoundInTheTableSchemaError(
+      column: Seq[String],
+      schema: StructType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA",
+      messageParameters = Map(
+        "column" -> toSQLId(column),
+        "schema" -> toSQLType(schema)
+      )
+    )
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{ClusterByTransform, IdentityTransform}
 import org.apache.spark.sql.connector.read.SupportsReportStatistics
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
 
@@ -156,9 +157,12 @@ case class DescribeTableExec(
           .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
           .map { fieldNames =>
             val nestedField = table.schema.findNestedField(fieldNames.toImmutableArraySeq)
-            assert(nestedField.isDefined,
-              s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " +
-              s"in the table schema ${table.schema().catalogString}.")
+            if (nestedField.isEmpty) {
+              throw QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
+                fieldNames.toSeq,
+                table.schema()
+              )
+            }
             nestedField.get
           }.map { case (path, field) =>
             toCatalystRow(

Original file line number	Diff line number	Diff line change
`@@ -2856,4 +2856,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE`
`2856`	`2856`	`)`
`2857`	`2857`	`)`
`2858`	`2858`	`}`
	`2859`	`+`
	`2860`	`+ def partitionColumnNotFoundInTheTableSchemaError(`
	`2861`	`+ column: Seq[String],`
	`2862`	`+ schema: StructType): SparkRuntimeException = {`
	`2863`	`+ new SparkRuntimeException(`
	`2864`	`+ errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA",`
	`2865`	`+ messageParameters = Map(`
	`2866`	`+ "column" -> toSQLId(column),`
	`2867`	`+ "schema" -> toSQLType(schema)`
	`2868`	`+ )`
	`2869`	`+ )`
	`2870`	`+ }`
`2859`	`2871`	`}`