Skip to content

Commit bab70d2

Browse files
fix: fall back on nested types for default values (#1799)
* Fall back on nested types for default values. * Update compatibility guide. * Address PR feedback. Co-authored-by: Andy Grove <[email protected]> * Fix compilation after applying suggestion. * Refactor check for nested types in default values. * Add comment. --------- Co-authored-by: Andy Grove <[email protected]>
1 parent e0d85ae commit bab70d2

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

docs/source/user-guide/compatibility.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ types (regardless of the logical type). This behavior can be disabled by setting
6565
- There is a known performance issue when pushing filters down to Parquet. See the [Comet Tuning Guide] for more
6666
information.
6767
- There are failures in the Spark SQL test suite when enabling these new scans (tracking issues: [#1542] and [#1545]).
68+
- No support for default values that are nested types (e.g., maps, arrays, structs). Literal default values are supported.
6869

6970
[#1545]: https://github.com/apache/datafusion-comet/issues/1545
7071
[#1542]: https://github.com/apache/datafusion-comet/issues/1542

docs/templates/compatibility-template.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ The new scans currently have the following limitations:
6565
- There is a known performance issue when pushing filters down to Parquet. See the [Comet Tuning Guide] for more
6666
information.
6767
- There are failures in the Spark SQL test suite when enabling these new scans (tracking issues: [#1542] and [#1545]).
68+
- No support for default values that are nested types (e.g., maps, arrays, structs). Literal default values are supported.
6869

6970
[#1545]: https://github.com/apache/datafusion-comet/issues/1545
7071
[#1542]: https://github.com/apache/datafusion-comet/issues/1542

spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,17 @@ package org.apache.comet.rules
2222
import scala.collection.mutable.ListBuffer
2323

2424
import org.apache.spark.sql.SparkSession
25-
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, PlanExpression}
25+
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, PlanExpression}
2626
import org.apache.spark.sql.catalyst.rules.Rule
27-
import org.apache.spark.sql.catalyst.util.MetadataColumnHelper
27+
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MetadataColumnHelper}
28+
import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.getExistenceDefaultValues
2829
import org.apache.spark.sql.comet.{CometBatchScanExec, CometScanExec}
2930
import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
3031
import org.apache.spark.sql.execution.datasources.HadoopFsRelation
3132
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
3233
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
3334
import org.apache.spark.sql.internal.SQLConf
34-
import org.apache.spark.sql.types.{ArrayType, ByteType, DataType, MapType, ShortType, StructType}
35+
import org.apache.spark.sql.types._
3536

3637
import org.apache.comet.{CometConf, DataTypeSupport}
3738
import org.apache.comet.CometConf._
@@ -118,7 +119,20 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] {
118119
return withInfos(scanExec, fallbackReasons.toSet)
119120
}
120121

121-
val typeChecker = new CometScanTypeChecker(scanImpl)
122+
val possibleDefaultValues = getExistenceDefaultValues(scanExec.requiredSchema)
123+
if (possibleDefaultValues.exists(d => {
124+
d != null && (d.isInstanceOf[ArrayBasedMapData] || d
125+
.isInstanceOf[GenericInternalRow] || d.isInstanceOf[GenericArrayData])
126+
})) {
127+
// Spark already converted these to Java-native types, so we can't check SQL types.
128+
// ArrayBasedMapData, GenericInternalRow, GenericArrayData correspond to maps, structs,
129+
// and arrays respectively.
130+
fallbackReasons +=
131+
"Full native scan disabled because nested types for default values are not supported"
132+
return withInfos(scanExec, fallbackReasons.toSet)
133+
}
134+
135+
val typeChecker = CometScanTypeChecker(scanImpl)
122136
val schemaSupported =
123137
typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons)
124138
val partitionSchemaSupported =

0 commit comments

Comments
 (0)