Skip to content

Commit ca6f113

Browse files
authored
chore: Improve reporting of fallback reasons for CollectLimit (#1694)
1 parent 542f45b commit ca6f113

File tree

3 files changed

+64
-16
lines changed

3 files changed

+64
-16
lines changed

docs/source/user-guide/iceberg.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ This should produce the following output:
115115
scala> spark.sql(s"SELECT * from t1").show()
116116
25/04/28 07:29:37 INFO core/src/lib.rs: Comet native library version 0.9.0 initialized
117117
25/04/28 07:29:37 WARN CometSparkSessionExtensions$CometExecRule: Comet cannot execute some parts of this plan natively (set spark.comet.explainFallback.enabled=false to disable this logging):
118-
CollectLimit [COMET: CollectLimit is not supported]
118+
CollectLimit
119119
+- Project [COMET: toprettystring is not supported]
120120
+- CometScanWrapper
121121

spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@
1919

2020
package org.apache.comet.rules
2121

22+
import scala.collection.mutable.ListBuffer
23+
2224
import org.apache.spark.sql.SparkSession
2325
import org.apache.spark.sql.catalyst.expressions.{Divide, DoubleLiteral, EqualNullSafe, EqualTo, Expression, FloatLiteral, GreaterThan, GreaterThanOrEqual, KnownFloatingPointNormalized, LessThan, LessThanOrEqual, NamedExpression, Remainder}
2426
import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, Partial}
2527
import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero
2628
import org.apache.spark.sql.catalyst.rules.Rule
27-
import org.apache.spark.sql.comet.{CometBroadcastExchangeExec, CometBroadcastHashJoinExec, CometCoalesceExec, CometCollectLimitExec, CometExec, CometExpandExec, CometFilterExec, CometGlobalLimitExec, CometHashAggregateExec, CometHashJoinExec, CometLocalLimitExec, CometNativeExec, CometNativeScanExec, CometPlan, CometProjectExec, CometScanExec, CometScanWrapper, CometSinkPlaceHolder, CometSortExec, CometSortMergeJoinExec, CometSparkToColumnarExec, CometTakeOrderedAndProjectExec, CometUnionExec, CometWindowExec, SerializedPlan}
29+
import org.apache.spark.sql.comet._
2830
import org.apache.spark.sql.comet.execution.shuffle.{CometColumnarShuffle, CometNativeShuffle, CometShuffleExchangeExec}
29-
import org.apache.spark.sql.execution.{CoalesceExec, CollectLimitExec, ExpandExec, FilterExec, GlobalLimitExec, LocalLimitExec, ProjectExec, SortExec, SparkPlan, TakeOrderedAndProjectExec, UnionExec}
31+
import org.apache.spark.sql.execution._
3032
import org.apache.spark.sql.execution.adaptive.{AQEShuffleReadExec, BroadcastQueryStageExec, ShuffleQueryStageExec}
3133
import org.apache.spark.sql.execution.aggregate.{BaseAggregateExec, HashAggregateExec, ObjectHashAggregateExec}
3234
import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec}
@@ -36,7 +38,7 @@ import org.apache.spark.sql.types.{DoubleType, FloatType}
3638

3739
import org.apache.comet.{CometConf, ExtendedExplainInfo}
3840
import org.apache.comet.CometConf.COMET_ANSI_MODE_ENABLED
39-
import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometBroadcastNotEnabledReason, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometLoaded, isCometNativeShuffleMode, isCometScan, isCometShuffleEnabled, isSpark40Plus, shouldApplySparkToColumnar, withInfo}
41+
import org.apache.comet.CometSparkSessionExtensions._
4042
import org.apache.comet.serde.OperatorOuterClass.Operator
4143
import org.apache.comet.serde.QueryPlanSerde
4244

@@ -201,18 +203,34 @@ case class CometExecRule(session: SparkSession) extends Rule[SparkPlan] {
201203
op,
202204
CometGlobalLimitExec(_, op, op.limit, op.child, SerializedPlan(None)))
203205

204-
case op: CollectLimitExec
205-
if isCometNative(op.child) && CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.get(conf)
206-
&& isCometShuffleEnabled(conf)
207-
&& op.offset == 0 =>
208-
QueryPlanSerde
209-
.operator2Proto(op)
210-
.map { nativeOp =>
211-
val cometOp =
212-
CometCollectLimitExec(op, op.limit, op.offset, op.child)
213-
CometSinkPlaceHolder(nativeOp, op, cometOp)
206+
case op: CollectLimitExec =>
207+
val fallbackReasons = new ListBuffer[String]()
208+
if (!CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.get(conf)) {
209+
fallbackReasons += s"${CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.key} is false"
210+
}
211+
if (!isCometShuffleEnabled(conf)) {
212+
fallbackReasons += "Comet shuffle is not enabled"
213+
}
214+
if (op.offset != 0) {
215+
fallbackReasons += "CollectLimit with non-zero offset is not supported"
216+
}
217+
if (fallbackReasons.nonEmpty) {
218+
withInfos(op, fallbackReasons.toSet)
219+
} else {
220+
if (!isCometNative(op.child)) {
221+
// no reason to report reason if child is not native
222+
op
223+
} else {
224+
QueryPlanSerde
225+
.operator2Proto(op)
226+
.map { nativeOp =>
227+
val cometOp =
228+
CometCollectLimitExec(op, op.limit, op.offset, op.child)
229+
CometSinkPlaceHolder(nativeOp, op, cometOp)
230+
}
231+
.getOrElse(op)
214232
}
215-
.getOrElse(op)
233+
}
216234

217235
case op: ExpandExec =>
218236
newPlanWithProto(

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1875,7 +1875,37 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
18751875
+ "where A.c1 = B.c1 ",
18761876
Set(
18771877
"Comet shuffle is not enabled: spark.comet.exec.shuffle.enabled is not enabled",
1878-
"make_interval is not supported")))
1878+
"make_interval is not supported")),
1879+
(
1880+
s"select * from $table LIMIT 10 OFFSET 3",
1881+
Set(
1882+
"Comet shuffle is not enabled",
1883+
"CollectLimit with non-zero offset is not supported")))
1884+
.foreach(test => {
1885+
val qry = test._1
1886+
val expected = test._2
1887+
val df = sql(qry)
1888+
df.collect() // force an execution
1889+
checkSparkAnswerAndCompareExplainPlan(df, expected)
1890+
})
1891+
}
1892+
}
1893+
}
1894+
1895+
test("explain: CollectLimit disabled") {
1896+
withSQLConf(
1897+
CometConf.COMET_ENABLED.key -> "true",
1898+
CometConf.COMET_EXEC_ENABLED.key -> "true",
1899+
CometConf.COMET_EXEC_COLLECT_LIMIT_ENABLED.key -> "false",
1900+
EXTENDED_EXPLAIN_PROVIDERS_KEY -> "org.apache.comet.ExtendedExplainInfo") {
1901+
val table = "test"
1902+
withTable(table) {
1903+
sql(s"create table $table(c0 int, c1 int , c2 float) using parquet")
1904+
sql(s"insert into $table values(0, 1, 100.000001)")
1905+
Seq(
1906+
(
1907+
s"select * from $table LIMIT 10",
1908+
Set("spark.comet.exec.collectLimit.enabled is false")))
18791909
.foreach(test => {
18801910
val qry = test._1
18811911
val expected = test._2

0 commit comments

Comments
 (0)