@@ -42,12 +42,13 @@ import org.apache.spark.sql.types._
4242
4343import org .apache .comet .{CometConf , CometNativeException , DataTypeSupport }
4444import org .apache .comet .CometConf ._
45- import org .apache .comet .CometSparkSessionExtensions .{isCometLoaded , withInfo , withInfos }
45+ import org .apache .comet .CometSparkSessionExtensions .{hasExplainInfo , isCometLoaded , withInfo , withInfos }
4646import org .apache .comet .DataTypeSupport .isComplexType
4747import org .apache .comet .iceberg .{CometIcebergNativeScanMetadata , IcebergReflection }
4848import org .apache .comet .objectstore .NativeConfig
4949import org .apache .comet .parquet .{CometParquetScan , Native , SupportsComet }
5050import org .apache .comet .parquet .CometParquetUtils .{encryptionEnabled , isEncryptionConfigSupported }
51+ import org .apache .comet .serde .operator .CometNativeScan
5152import org .apache .comet .shims .CometTypeShim
5253
5354/**
@@ -132,9 +133,6 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com
132133 }
133134 }
134135
135- private def isDynamicPruningFilter (e : Expression ): Boolean =
136- e.exists(_.isInstanceOf [PlanExpression [_]])
137-
138136 private def transformV1Scan (scanExec : FileSourceScanExec ): SparkPlan = {
139137
140138 if (COMET_DPP_FALLBACK_ENABLED .get() &&
@@ -144,10 +142,8 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com
144142
145143 scanExec.relation match {
146144 case r : HadoopFsRelation =>
147- val fallbackReasons = new ListBuffer [String ]()
148145 if (! CometScanExec .isFileFormatSupported(r.fileFormat)) {
149- fallbackReasons += s " Unsupported file format ${r.fileFormat}"
150- return withInfos(scanExec, fallbackReasons.toSet)
146+ return withInfo(scanExec, s " Unsupported file format ${r.fileFormat}" )
151147 }
152148
153149 var scanImpl = COMET_NATIVE_SCAN_IMPL .get()
@@ -160,42 +156,8 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com
160156 scanImpl = selectScan(scanExec, r.partitionSchema, hadoopConf)
161157 }
162158
163- // Native DataFusion doesn't support subqueries/dynamic pruning
164- if (scanImpl == SCAN_NATIVE_DATAFUSION &&
165- scanExec.partitionFilters.exists(isDynamicPruningFilter)) {
166- fallbackReasons += " Native DataFusion scan does not support subqueries/dynamic pruning"
167- return withInfos(scanExec, fallbackReasons.toSet)
168- }
169-
170- if (scanImpl == SCAN_NATIVE_DATAFUSION && ! COMET_EXEC_ENABLED .get()) {
171- fallbackReasons +=
172- s " Full native scan disabled because ${COMET_EXEC_ENABLED .key} disabled "
173- return withInfos(scanExec, fallbackReasons.toSet)
174- }
175-
176- if (scanImpl == CometConf .SCAN_NATIVE_DATAFUSION && (SQLConf .get.ignoreCorruptFiles ||
177- scanExec.relation.options
178- .get(" ignorecorruptfiles" ) // Spark sets this to lowercase.
179- .contains(" true" ))) {
180- fallbackReasons +=
181- " Full native scan disabled because ignoreCorruptFiles enabled"
182- return withInfos(scanExec, fallbackReasons.toSet)
183- }
184-
185- if (scanImpl == CometConf .SCAN_NATIVE_DATAFUSION && (SQLConf .get.ignoreMissingFiles ||
186- scanExec.relation.options
187- .get(" ignoremissingfiles" ) // Spark sets this to lowercase.
188- .contains(" true" ))) {
189- fallbackReasons +=
190- " Full native scan disabled because ignoreMissingFiles enabled"
191- return withInfos(scanExec, fallbackReasons.toSet)
192- }
193-
194- if (scanImpl == CometConf .SCAN_NATIVE_DATAFUSION && scanExec.bucketedScan) {
195- // https://github.com/apache/datafusion-comet/issues/1719
196- fallbackReasons +=
197- " Full native scan disabled because bucketed scan is not supported"
198- return withInfos(scanExec, fallbackReasons.toSet)
159+ if (scanImpl == SCAN_NATIVE_DATAFUSION && ! CometNativeScan .isSupported(scanExec)) {
160+ return scanExec
199161 }
200162
201163 val possibleDefaultValues = getExistenceDefaultValues(scanExec.requiredSchema)
@@ -206,36 +168,27 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com
206168 // Spark already converted these to Java-native types, so we can't check SQL types.
207169 // ArrayBasedMapData, GenericInternalRow, GenericArrayData correspond to maps, structs,
208170 // and arrays respectively.
209- fallbackReasons +=
210- " Full native scan disabled because nested types for default values are not supported "
211- return withInfos(scanExec, fallbackReasons.toSet )
171+ withInfo(
172+ scanExec,
173+ " Full native scan disabled because nested types for default values are not supported " )
212174 }
213175
214176 if (encryptionEnabled(hadoopConf) && scanImpl != CometConf .SCAN_NATIVE_COMET ) {
215177 if (! isEncryptionConfigSupported(hadoopConf)) {
216- return withInfos (scanExec, fallbackReasons.toSet )
178+ withInfo (scanExec, s " $scanImpl does not support encryption " )
217179 }
218180 }
219181
220- val typeChecker = CometScanTypeChecker (scanImpl)
221- val schemaSupported =
222- typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons)
223- val partitionSchemaSupported =
224- typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons)
225-
226- if (! schemaSupported) {
227- fallbackReasons += s " Unsupported schema ${scanExec.requiredSchema} for $scanImpl"
228- }
229- if (! partitionSchemaSupported) {
230- fallbackReasons += s " Unsupported partitioning schema ${r.partitionSchema} for $scanImpl"
231- }
182+ // check that schema is supported
183+ checkSchema(scanExec, scanImpl, r)
232184
233- if (schemaSupported && partitionSchemaSupported) {
185+ if (hasExplainInfo(scanExec)) {
186+ // could not accelerate, and plan is already tagged with fallback reasons
187+ scanExec
188+ } else {
234189 // this is confusing, but we always insert a CometScanExec here, which may replaced
235190 // with a CometNativeExec when CometExecRule runs, depending on the scanImpl value.
236191 CometScanExec (scanExec, session, scanImpl)
237- } else {
238- withInfos(scanExec, fallbackReasons.toSet)
239192 }
240193
241194 case _ =>
@@ -647,6 +600,24 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com
647600 }
648601 }
649602
603+ private def isDynamicPruningFilter (e : Expression ): Boolean =
604+ e.exists(_.isInstanceOf [PlanExpression [_]])
605+
606+ def checkSchema (scanExec : FileSourceScanExec , scanImpl : String , r : HadoopFsRelation ): Unit = {
607+ val fallbackReasons = new ListBuffer [String ]()
608+ val typeChecker = CometScanTypeChecker (scanImpl)
609+ val schemaSupported =
610+ typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons)
611+ if (! schemaSupported) {
612+ withInfo(scanExec, s " Unsupported schema ${scanExec.requiredSchema} for $scanImpl" )
613+ }
614+ val partitionSchemaSupported =
615+ typeChecker.isSchemaSupported(r.partitionSchema, fallbackReasons)
616+ if (! partitionSchemaSupported) {
617+ fallbackReasons += s " Unsupported partitioning schema ${r.partitionSchema} for $scanImpl"
618+ }
619+ withInfos(scanExec, fallbackReasons.toSet)
620+ }
650621}
651622
652623case class CometScanTypeChecker (scanImpl : String ) extends DataTypeSupport with CometTypeShim {
0 commit comments