Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions common/src/main/scala/org/apache/comet/CometConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,10 @@ object CometConf extends ShimCometConf {
val COMET_EXPLAIN_VERBOSE_ENABLED: ConfigEntry[Boolean] =
conf("spark.comet.explain.verbose.enabled")
.doc(
"When this setting is enabled, Comet will provide a verbose tree representation of " +
"the extended information.")
"When this setting is enabled, Comet's extended explain output will provide the full " +
"query plan annotated with fallback reasons as well as a summary of how much of " +
"the plan was accelerated by Comet. When this setting is disabled, a list of fallback " +
"reasons will be provided instead.")
.booleanConf
.createWithDefault(false)

Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/latest/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Comet provides the following configuration settings.
| spark.comet.exec.union.enabled | Whether to enable union by default. | true |
| spark.comet.exec.window.enabled | Whether to enable window by default. | true |
| spark.comet.explain.native.enabled | When this setting is enabled, Comet will provide a tree representation of the native query plan before execution and again after execution, with metrics. | false |
| spark.comet.explain.verbose.enabled | When this setting is enabled, Comet will provide a verbose tree representation of the extended information. | false |
| spark.comet.explain.verbose.enabled | When this setting is enabled, Comet's extended explain output will provide the full query plan annotated with fallback reasons as well as a summary of how much of the plan was accelerated by Comet. When this setting is disabled, a list of fallback reasons will be provided instead. | false |
| spark.comet.explainFallback.enabled | When this setting is enabled, Comet will provide logging explaining the reason(s) why a query stage cannot be executed natively. Set this to false to reduce the amount of logging. | false |
| spark.comet.expression.allowIncompatible | Comet is not currently fully compatible with Spark for all expressions. Set this config to true to allow them anyway. For more information, refer to the Comet Compatibility Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html). | false |
| spark.comet.logFallbackReasons.enabled | When this setting is enabled, Comet will log warnings for all fallback reasons. | false |
Expand Down
61 changes: 52 additions & 9 deletions spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ import scala.collection.mutable

import org.apache.spark.sql.ExtendedExplainGenerator
import org.apache.spark.sql.catalyst.trees.{TreeNode, TreeNodeTag}
import org.apache.spark.sql.execution.{InputAdapter, SparkPlan, WholeStageCodegenExec}
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
import org.apache.spark.sql.comet.{CometColumnarToRowExec, CometPlan, CometSparkToColumnarExec}
import org.apache.spark.sql.execution.{ColumnarToRowExec, InputAdapter, RowToColumnarExec, SparkPlan, WholeStageCodegenExec}
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AQEShuffleReadExec, QueryStageExec}
import org.apache.spark.sql.execution.exchange.ReusedExchangeExec

import org.apache.comet.CometExplainInfo.getActualPlan

Expand Down Expand Up @@ -81,9 +83,14 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator {
// generates the extended info in a verbose manner, printing each node along with the
// extended information in a tree display
def generateVerboseExtendedInfo(plan: SparkPlan): String = {
val planStats = new PlanStats()
val outString = new StringBuilder()
generateTreeString(getActualPlan(plan), 0, Seq(), 0, outString)
outString.toString()
generateTreeString(getActualPlan(plan), 0, Seq(), 0, outString, planStats)
val eligible = planStats.sparkOperators + planStats.cometOperators
val converted =
if (eligible == 0) 0.0 else planStats.cometOperators.toDouble / eligible * 100.0
val summary = s"Comet accelerated ${converted.toInt}% of eligible operators ($planStats)."
s"${outString.toString()}\n$summary"
}

// Simplified generateTreeString from Spark TreeNode. Appends explain info to the node if any
Expand All @@ -92,7 +99,22 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator {
depth: Int,
lastChildren: Seq[Boolean],
indent: Int,
outString: StringBuilder): Unit = {
outString: StringBuilder,
planStats: PlanStats): Unit = {

node match {
case _: AdaptiveSparkPlanExec | _: InputAdapter | _: QueryStageExec |
_: WholeStageCodegenExec | _: ReusedExchangeExec | _: AQEShuffleReadExec =>
planStats.wrappers += 1
case _: RowToColumnarExec | _: ColumnarToRowExec | _: CometColumnarToRowExec |
_: CometSparkToColumnarExec =>
planStats.transitions += 1
case _: CometPlan =>
planStats.cometOperators += 1
case _ =>
planStats.sparkOperators += 1
}

outString.append(" " * indent)
if (depth > 0) {
lastChildren.init.foreach { isLast =>
Expand All @@ -119,15 +141,17 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator {
depth + 2,
lastChildren :+ node.children.isEmpty :+ false,
indent,
outString)
outString,
planStats)
case _ =>
}
generateTreeString(
getActualPlan(innerChildrenLocal.last),
depth + 2,
lastChildren :+ node.children.isEmpty :+ true,
indent,
outString)
outString,
planStats)
}
if (node.children.nonEmpty) {
node.children.init.foreach {
Expand All @@ -137,18 +161,37 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator {
depth + 1,
lastChildren :+ false,
indent,
outString)
outString,
planStats)
case _ =>
}
node.children.last match {
case c @ (_: TreeNode[_]) =>
generateTreeString(getActualPlan(c), depth + 1, lastChildren :+ true, indent, outString)
generateTreeString(
getActualPlan(c),
depth + 1,
lastChildren :+ true,
indent,
outString,
planStats)
case _ =>
}
}
}
}

class PlanStats {
var sparkOperators: Int = 0
var cometOperators: Int = 0
var wrappers: Int = 0
var transitions: Int = 0

override def toString: String = {
s"sparkOperators=$sparkOperators, cometOperators=$cometOperators, " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could use a more verbose string here so that the meaning of these stats is a little more obvious

s"transitions=$transitions, wrappers=$wrappers"
}
}

object CometExplainInfo {
val EXTENSION_INFO = new TreeNodeTag[Set[String]]("CometExtensionInfo")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ class CometExecSuite extends CometTestBase {
val (_, cometPlan) = checkSparkAnswer(df)
val infos = new ExtendedExplainInfo().generateExtendedInfo(cometPlan)
assert(infos.contains("Dynamic Partition Pruning is not supported"))

withSQLConf(CometConf.COMET_EXPLAIN_VERBOSE_ENABLED.key -> "true") {
val extendedExplain = new ExtendedExplainInfo().generateExtendedInfo(cometPlan)
assert(extendedExplain.contains("Comet accelerated 33% of eligible operators"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be this number fluctuating?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is currently stable across all Spark versions that we test with.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wrong. There is a failure due to a different percentage. I will make the test less specific.

- DPP fallback *** FAILED *** (1 second, 553 milliseconds)
  "BroadcastHashJoin
  :- ColumnarToRow
  :  +-  Scan parquet  [COMET: Dynamic Partition Pruning is not supported]
  :        +- SubqueryBroadcast
  :           +- BroadcastExchange
  :              +- CometColumnarToRow
  :                 +- CometFilter
  :                    +- CometScan [native_iceberg_compat] parquet 
  +- BroadcastExchange
     +- CometColumnarToRow
        +- CometFilter
           +- CometScan [native_iceberg_compat] parquet 
  
  Comet accelerated 4 out of 9 eligible operators (44%). Final plan contains 3 transitions between Spark and Comet." did not contain "Comet accelerated 33% of eligible operators" (CometExecSuite.scala:124)

}
}
}
}
Expand Down
Loading