@@ -21,6 +21,8 @@ package org.apache.comet
2121
2222import java .nio .ByteOrder
2323
24+ import scala .collection .mutable .ListBuffer
25+
2426import org .apache .spark .SparkConf
2527import org .apache .spark .internal .Logging
2628import org .apache .spark .network .util .ByteUnit
@@ -100,9 +102,6 @@ class CometSparkSessionExtensions
100102 plan
101103 } else {
102104
103- def isDynamicPruningFilter (e : Expression ): Boolean =
104- e.exists(_.isInstanceOf [PlanExpression [_]])
105-
106105 def hasMetadataCol (plan : SparkPlan ): Boolean = {
107106 plan.expressions.exists(_.exists {
108107 case a : Attribute =>
@@ -116,11 +115,9 @@ class CometSparkSessionExtensions
116115 withInfo(scan, " Metadata column is not supported" )
117116 scan
118117
119- case scanExec : FileSourceScanExec
120- if COMET_DPP_FALLBACK_ENABLED .get() &&
121- scanExec.partitionFilters.exists(isDynamicPruningFilter) =>
122- withInfo(scanExec, " DPP not supported" )
123- scanExec
118+ // data source V1
119+ case scanExec : FileSourceScanExec =>
120+ transformV1Scan(scanExec)
124121
125122 // data source V2
126123 case scanExec : BatchScanExec
@@ -188,69 +185,62 @@ class CometSparkSessionExtensions
188185 scanExec
189186 }
190187
191- // data source V1
192- case scanExec @ FileSourceScanExec (
193- HadoopFsRelation (_, partitionSchema, _, _, fileFormat, _),
194- _ : Seq [_],
195- requiredSchema,
196- _,
197- _,
198- _,
199- _,
200- _,
201- _)
202- if CometScanExec .isFileFormatSupported(fileFormat)
203- && CometNativeScanExec .isSchemaSupported(requiredSchema)
204- && CometNativeScanExec .isSchemaSupported(partitionSchema)
205- // TODO we only enable full native scan if COMET_EXEC_ENABLED is enabled
206- // but this is not really what we want .. we currently insert `CometScanExec`
207- // here and then it gets replaced with `CometNativeScanExec` in `CometExecRule`
208- // but that only happens if `COMET_EXEC_ENABLED` is enabled
209- && COMET_EXEC_ENABLED .get()
210- && COMET_NATIVE_SCAN_IMPL .get() == CometConf .SCAN_NATIVE_DATAFUSION =>
211- logInfo(" Comet extension enabled for v1 full native Scan" )
212- CometScanExec (scanExec, session)
188+ }
189+ }
190+ }
213191
214- // data source V1
215- case scanExec @ FileSourceScanExec (
216- HadoopFsRelation (_, partitionSchema, _, _, fileFormat, _),
217- _ : Seq [_],
218- requiredSchema,
219- _,
220- _,
221- _,
222- _,
223- _,
224- _)
225- if CometScanExec .isFileFormatSupported(fileFormat)
226- && CometScanExec .isSchemaSupported(requiredSchema)
227- && CometScanExec .isSchemaSupported(partitionSchema) =>
228- logInfo(" Comet extension enabled for v1 Scan" )
229- CometScanExec (scanExec, session)
192+ private def isDynamicPruningFilter (e : Expression ): Boolean =
193+ e.exists(_.isInstanceOf [PlanExpression [_]])
230194
231- // data source v1 not supported case
232- case scanExec @ FileSourceScanExec (
233- HadoopFsRelation (_, partitionSchema, _, _, fileFormat, _),
234- _ : Seq [_],
235- requiredSchema,
236- _,
237- _,
238- _,
239- _,
240- _,
241- _) =>
242- val info1 = createMessage(
243- ! CometScanExec .isFileFormatSupported(fileFormat),
244- s " File format $fileFormat is not supported " )
245- val info2 = createMessage(
246- ! CometScanExec .isSchemaSupported(requiredSchema),
247- s " Schema $requiredSchema is not supported " )
248- val info3 = createMessage(
249- ! CometScanExec .isSchemaSupported(partitionSchema),
250- s " Partition schema $partitionSchema is not supported " )
251- withInfo(scanExec, Seq (info1, info2, info3).flatten.mkString(" ," ))
195+ private def transformV1Scan (scanExec : FileSourceScanExec ): SparkPlan = {
196+
197+ if (COMET_DPP_FALLBACK_ENABLED .get() &&
198+ scanExec.partitionFilters.exists(isDynamicPruningFilter)) {
199+ withInfo(scanExec, " DPP not supported" )
200+ return scanExec
201+ }
202+
203+ scanExec.relation match {
204+ case r : HadoopFsRelation =>
205+ val fallbackReasons = new ListBuffer [String ]()
206+ if (! CometScanExec .isFileFormatSupported(r.fileFormat)) {
207+ fallbackReasons += s " Unsupported file format ${r.fileFormat}"
208+ }
209+
210+ val scanImpl = COMET_NATIVE_SCAN_IMPL .get()
211+ if (scanImpl == CometConf .SCAN_NATIVE_DATAFUSION && ! COMET_EXEC_ENABLED .get()) {
212+ fallbackReasons +=
213+ s " Full native scan disabled because ${COMET_EXEC_ENABLED .key} disabled "
214+ }
215+
216+ val (schemaSupported, partitionSchemaSupported) = scanImpl match {
217+ case CometConf .SCAN_NATIVE_DATAFUSION =>
218+ (
219+ CometNativeScanExec .isSchemaSupported(scanExec.requiredSchema),
220+ CometNativeScanExec .isSchemaSupported(r.partitionSchema))
221+ case CometConf .SCAN_NATIVE_COMET | SCAN_NATIVE_ICEBERG_COMPAT =>
222+ (
223+ CometScanExec .isSchemaSupported(scanExec.requiredSchema),
224+ CometScanExec .isSchemaSupported(r.partitionSchema))
225+ }
226+
227+ if (! schemaSupported) {
228+ fallbackReasons += s " Unsupported schema ${scanExec.requiredSchema} for $scanImpl"
229+ }
230+ if (! partitionSchemaSupported) {
231+ fallbackReasons += s " Unsupported partitioning schema ${r.partitionSchema} for $scanImpl"
232+ }
233+
234+ if (fallbackReasons.isEmpty) {
235+ CometScanExec (scanExec, session)
236+ } else {
237+ withInfo(scanExec, fallbackReasons.mkString(" , " ))
252238 scanExec
253- }
239+ }
240+
241+ case _ =>
242+ withInfo(scanExec, s " Unsupported relation ${scanExec.relation}" )
243+ scanExec
254244 }
255245 }
256246 }
0 commit comments