Skip to content

Commit f8eb57a

Browse files
authored
docs: Split configuration guide into different sections (scan, exec, shuffle, etc) (#2568)
1 parent 5ea5863 commit f8eb57a

File tree

7 files changed

+436
-128
lines changed

7 files changed

+436
-128
lines changed

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 109 additions & 41 deletions
Large diffs are not rendered by default.

docs/source/user-guide/latest/configs.md

Lines changed: 251 additions & 37 deletions
Large diffs are not rendered by default.

docs/source/user-guide/latest/expressions.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@ Comet supports the following Spark expressions. Expressions that are marked as S
2323
natively in Comet and provide the same results as Spark, or will fall back to Spark for cases that would not
2424
be compatible.
2525

26-
All expressions are enabled by default, but can be disabled by setting
26+
All expressions are enabled by default, but most can be disabled by setting
2727
`spark.comet.expression.EXPRNAME.enabled=false`, where `EXPRNAME` is the expression name as specified in
28-
the following tables, such as `Length`, or `StartsWith`.
28+
the following tables, such as `Length`, or `StartsWith`. See the [Comet Configuration Guide] for a full list
29+
of expressions that be disabled.
2930

3031
Expressions that are not Spark-compatible will fall back to Spark by default and can be enabled by setting
3132
`spark.comet.expression.EXPRNAME.allowIncompatible=true`.
@@ -269,4 +270,5 @@ incompatible expressions.
269270
| ToPrettyString | Yes | |
270271
| UnscaledValue | Yes | |
271272

273+
[Comet Configuration Guide]: configs.md
272274
[Comet Compatibility Guide]: compatibility.md

spark/src/main/scala/org/apache/comet/GenerateDocs.scala

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import scala.collection.mutable.ListBuffer
2626
import org.apache.spark.sql.catalyst.expressions.Cast
2727

2828
import org.apache.comet.expressions.{CometCast, CometEvalMode}
29-
import org.apache.comet.serde.{Compatible, Incompatible}
29+
import org.apache.comet.serde.{Compatible, Incompatible, QueryPlanSerde}
3030

3131
/**
3232
* Utility for generating markdown documentation from the configs.
@@ -37,29 +37,48 @@ object GenerateDocs {
3737

3838
private def userGuideLocation = "docs/source/user-guide/latest/"
3939

40+
val publicConfigs: Set[ConfigEntry[_]] = CometConf.allConfs.filter(_.isPublic).toSet
41+
4042
def main(args: Array[String]): Unit = {
4143
generateConfigReference()
4244
generateCompatibilityGuide()
4345
}
4446

4547
private def generateConfigReference(): Unit = {
48+
49+
val pattern = "<!--BEGIN:CONFIG_TABLE\\[(.*)]-->".r
4650
val filename = s"$userGuideLocation/configs.md"
4751
val lines = readFile(filename)
4852
val w = new BufferedOutputStream(new FileOutputStream(filename))
4953
for (line <- lines) {
5054
w.write(s"${line.stripTrailing()}\n".getBytes)
51-
if (line.trim == "<!--BEGIN:CONFIG_TABLE-->") {
52-
val publicConfigs = CometConf.allConfs.filter(_.isPublic)
53-
val confs = publicConfigs.sortBy(_.key)
54-
w.write("| Config | Description | Default Value |\n".getBytes)
55-
w.write("|--------|-------------|---------------|\n".getBytes)
56-
for (conf <- confs) {
57-
if (conf.defaultValue.isEmpty) {
58-
w.write(s"| ${conf.key} | ${conf.doc.trim} | |\n".getBytes)
59-
} else {
60-
w.write(s"| ${conf.key} | ${conf.doc.trim} | ${conf.defaultValueString} |\n".getBytes)
55+
line match {
56+
case pattern(category) =>
57+
w.write("| Config | Description | Default Value |\n".getBytes)
58+
w.write("|--------|-------------|---------------|\n".getBytes)
59+
category match {
60+
case "enable_expr" =>
61+
for (expr <- QueryPlanSerde.exprSerdeMap.keys.map(_.getSimpleName).toList.sorted) {
62+
val config = s"spark.comet.expression.$expr.enabled"
63+
w.write(s"| $config | Enable Comet acceleration for $expr | true |\n".getBytes)
64+
}
65+
case "enable_agg_expr" =>
66+
for (expr <- QueryPlanSerde.aggrSerdeMap.keys.map(_.getSimpleName).toList.sorted) {
67+
val config = s"spark.comet.expression.$expr.enabled"
68+
w.write(s"| $config | Enable Comet acceleration for $expr | true |\n".getBytes)
69+
}
70+
case _ =>
71+
val confs = publicConfigs.filter(_.category == category).toList.sortBy(_.key)
72+
for (conf <- confs) {
73+
if (conf.defaultValue.isEmpty) {
74+
w.write(s"| ${conf.key} | ${conf.doc.trim} | |\n".getBytes)
75+
} else {
76+
w.write(
77+
s"| ${conf.key} | ${conf.doc.trim} | ${conf.defaultValueString} |\n".getBytes)
78+
}
79+
}
6180
}
62-
}
81+
case _ =>
6382
}
6483
}
6584
w.close()

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
239239
/**
240240
* Mapping of Spark expression class to Comet expression handler.
241241
*/
242-
private val exprSerdeMap: Map[Class[_ <: Expression], CometExpressionSerde[_]] =
242+
val exprSerdeMap: Map[Class[_ <: Expression], CometExpressionSerde[_]] =
243243
mathExpressions ++ hashExpressions ++ stringExpressions ++
244244
conditionalExpressions ++ mapExpressions ++ predicateExpressions ++
245245
structExpressions ++ bitwiseExpressions ++ miscExpressions ++ arrayExpressions ++
@@ -248,7 +248,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
248248
/**
249249
* Mapping of Spark aggregate expression class to Comet expression handler.
250250
*/
251-
private val aggrSerdeMap: Map[Class[_], CometAggregateExpressionSerde[_]] = Map(
251+
val aggrSerdeMap: Map[Class[_], CometAggregateExpressionSerde[_]] = Map(
252252
classOf[Average] -> CometAverage,
253253
classOf[BitAndAgg] -> CometBitAndAgg,
254254
classOf[BitOrAgg] -> CometBitOrAgg,
@@ -578,9 +578,16 @@ object QueryPlanSerde extends Logging with CometExprShim {
578578
val cometExpr = aggrSerdeMap.get(fn.getClass)
579579
cometExpr match {
580580
case Some(handler) =>
581-
handler
582-
.asInstanceOf[CometAggregateExpressionSerde[AggregateFunction]]
583-
.convert(aggExpr, fn, inputs, binding, conf)
581+
val aggHandler = handler.asInstanceOf[CometAggregateExpressionSerde[AggregateFunction]]
582+
val exprConfName = aggHandler.getExprConfigName(fn)
583+
if (!CometConf.isExprEnabled(exprConfName)) {
584+
withInfo(
585+
aggExpr,
586+
"Expression support is disabled. Set " +
587+
s"${CometConf.getExprEnabledConfigKey(exprConfName)}=true to enable it.")
588+
return None
589+
}
590+
aggHandler.convert(aggExpr, fn, inputs, binding, conf)
584591
case _ =>
585592
withInfo(
586593
aggExpr,
@@ -1832,6 +1839,17 @@ trait CometExpressionSerde[T <: Expression] {
18321839
*/
18331840
trait CometAggregateExpressionSerde[T <: AggregateFunction] {
18341841

1842+
/**
1843+
* Get a short name for the expression that can be used as part of a config key related to the
1844+
* expression, such as enabling or disabling that expression.
1845+
*
1846+
* @param expr
1847+
* The Spark expression.
1848+
* @return
1849+
* Short name for the expression, defaulting to the Spark class name
1850+
*/
1851+
def getExprConfigName(expr: T): String = expr.getClass.getSimpleName
1852+
18351853
/**
18361854
* Convert a Spark expression into a protocol buffer representation that can be passed into
18371855
* native code.

spark/src/main/scala/org/apache/comet/serde/aggregates.scala

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -538,32 +538,23 @@ trait CometStddev {
538538
binding: Boolean,
539539
conf: SQLConf): Option[ExprOuterClass.AggExpr] = {
540540
val child = stddev.child
541-
if (CometConf.COMET_EXPR_STDDEV_ENABLED.get(conf)) {
542-
val childExpr = exprToProto(child, inputs, binding)
543-
val dataType = serializeDataType(stddev.dataType)
544-
545-
if (childExpr.isDefined && dataType.isDefined) {
546-
val builder = ExprOuterClass.Stddev.newBuilder()
547-
builder.setChild(childExpr.get)
548-
builder.setNullOnDivideByZero(nullOnDivideByZero)
549-
builder.setDatatype(dataType.get)
550-
builder.setStatsTypeValue(statsType)
551-
552-
Some(
553-
ExprOuterClass.AggExpr
554-
.newBuilder()
555-
.setStddev(builder)
556-
.build())
557-
} else {
558-
withInfo(aggExpr, child)
559-
None
560-
}
541+
val childExpr = exprToProto(child, inputs, binding)
542+
val dataType = serializeDataType(stddev.dataType)
543+
544+
if (childExpr.isDefined && dataType.isDefined) {
545+
val builder = ExprOuterClass.Stddev.newBuilder()
546+
builder.setChild(childExpr.get)
547+
builder.setNullOnDivideByZero(nullOnDivideByZero)
548+
builder.setDatatype(dataType.get)
549+
builder.setStatsTypeValue(statsType)
550+
551+
Some(
552+
ExprOuterClass.AggExpr
553+
.newBuilder()
554+
.setStddev(builder)
555+
.build())
561556
} else {
562-
withInfo(
563-
aggExpr,
564-
"stddev disabled by default because it can be slower than Spark. " +
565-
s"Set ${CometConf.COMET_EXPR_STDDEV_ENABLED}=true to enable it.",
566-
child)
557+
withInfo(aggExpr, child)
567558
None
568559
}
569560
}

spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,7 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
6161
}
6262

6363
test("stddev_pop should return NaN for some cases") {
64-
withSQLConf(
65-
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
66-
CometConf.COMET_EXPR_STDDEV_ENABLED.key -> "true") {
64+
withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") {
6765
Seq(true, false).foreach { nullOnDivideByZero =>
6866
withSQLConf("spark.sql.legacy.statisticalAggregate" -> nullOnDivideByZero.toString) {
6967

@@ -1446,9 +1444,7 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
14461444
}
14471445

14481446
test("stddev_pop and stddev_samp") {
1449-
withSQLConf(
1450-
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
1451-
CometConf.COMET_EXPR_STDDEV_ENABLED.key -> "true") {
1447+
withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") {
14521448
Seq("native", "jvm").foreach { cometShuffleMode =>
14531449
withSQLConf(CometConf.COMET_SHUFFLE_MODE.key -> cometShuffleMode) {
14541450
Seq(true, false).foreach { dictionary =>

0 commit comments

Comments
 (0)