Skip to content

Commit db2fa02

Browse files
committed
feat: allow native Iceberg scans with non-identity transform residuals
1 parent 3dcd9ad commit db2fa02

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -478,29 +478,33 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com
478478
false
479479
}
480480

481-
// Check for unsupported transform functions in residual expressions
482-
// iceberg-rust can only handle identity transforms in residuals; all other transforms
483-
// (truncate, bucket, year, month, day, hour) must fall back to Spark
481+
// Check for transform functions in residual expressions
482+
// Non-identity transforms (truncate, bucket, year, month, day, hour) in residuals
483+
// are now supported - they skip row-group filtering and are handled post-scan by CometFilter.
484+
// This is less optimal than row-group filtering but still allows native execution.
484485
val transformFunctionsSupported =
485486
try {
486487
IcebergReflection.findNonIdentityTransformInResiduals(metadata.tasks) match {
487488
case Some(transformType) =>
488-
// Found unsupported transform
489-
fallbackReasons +=
490-
s"Iceberg transform function '$transformType' in residual expression " +
491-
"is not yet supported by iceberg-rust. " +
492-
"Only identity transforms are supported."
493-
false
489+
// Found non-identity transform - log info and continue with native scan
490+
// Row-group filtering will skip these predicates, but post-scan filtering will apply
491+
logInfo(
492+
s"Iceberg residual contains transform '$transformType' - " +
493+
"row-group filtering will skip this predicate, " +
494+
"post-scan filtering by CometFilter will apply instead.")
495+
true // Allow native execution
494496
case None =>
495-
// No unsupported transforms found - safe to use native execution
497+
// No non-identity transforms - optimal row-group filtering will apply
496498
true
497499
}
498500
} catch {
499501
case e: Exception =>
500-
// Reflection failure - cannot verify safety, must fall back
501-
fallbackReasons += "Iceberg reflection failure: Could not check for " +
502-
s"transform functions in residuals: ${e.getMessage}"
503-
false
502+
// Reflection failure - log warning but allow native execution
503+
// The predicate conversion will handle unsupported cases gracefully
504+
logWarning(
505+
s"Could not check for transform functions in residuals: ${e.getMessage}. " +
506+
"Continuing with native scan.")
507+
true
504508
}
505509

506510
// Check for unsupported struct types in delete files

0 commit comments

Comments
 (0)