Skip to content

Commit 2471159

Browse files
authored
docs: Documentation updates (#2581)
1 parent 8f89c1c commit 2471159

File tree

5 files changed

+301
-302
lines changed

5 files changed

+301
-302
lines changed

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ object CometConf extends ShimCometConf {
6060
private val CATEGORY_SCAN = "scan"
6161
private val CATEGORY_PARQUET = "parquet"
6262
private val CATEGORY_EXEC = "exec"
63+
private val CATEGORY_EXEC_EXPLAIN = "exec_explain"
6364
private val CATEGORY_ENABLE_EXEC = "enable_exec"
6465
private val CATEGORY_SHUFFLE = "shuffle"
6566
private val CATEGORY_TUNING = "tuning"
@@ -83,7 +84,7 @@ object CometConf extends ShimCometConf {
8384
.doc(
8485
"Whether to enable Comet extension for Spark. When this is turned on, Spark will use " +
8586
"Comet to read Parquet data source. Note that to enable native vectorized execution, " +
86-
"both this config and 'spark.comet.exec.enabled' need to be enabled. By default, this " +
87+
"both this config and `spark.comet.exec.enabled` need to be enabled. By default, this " +
8788
"config is the value of the env var `ENABLE_COMET` if set, or true otherwise.")
8889
.booleanConf
8990
.createWithDefault(sys.env.getOrElse("ENABLE_COMET", "true").toBoolean)
@@ -94,7 +95,7 @@ object CometConf extends ShimCometConf {
9495
"Whether to enable native scans. When this is turned on, Spark will use Comet to " +
9596
"read supported data sources (currently only Parquet is supported natively). Note " +
9697
"that to enable native vectorized execution, both this config and " +
97-
"'spark.comet.exec.enabled' need to be enabled.")
98+
"`spark.comet.exec.enabled` need to be enabled.")
9899
.booleanConf
99100
.createWithDefault(true)
100101

@@ -106,13 +107,13 @@ object CometConf extends ShimCometConf {
106107
val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl")
107108
.category(CATEGORY_SCAN)
108109
.doc(
109-
s"The implementation of Comet Native Scan to use. Available modes are '$SCAN_NATIVE_COMET'," +
110-
s"'$SCAN_NATIVE_DATAFUSION', and '$SCAN_NATIVE_ICEBERG_COMPAT'. " +
111-
s"'$SCAN_NATIVE_COMET' is for the original Comet native scan which uses a jvm based " +
110+
s"The implementation of Comet Native Scan to use. Available modes are `$SCAN_NATIVE_COMET`," +
111+
s"`$SCAN_NATIVE_DATAFUSION`, and `$SCAN_NATIVE_ICEBERG_COMPAT`. " +
112+
s"`$SCAN_NATIVE_COMET` is for the original Comet native scan which uses a jvm based " +
112113
"parquet file reader and native column decoding. Supports simple types only " +
113-
s"'$SCAN_NATIVE_DATAFUSION' is a fully native implementation of scan based on DataFusion" +
114-
s"'$SCAN_NATIVE_ICEBERG_COMPAT' is a native implementation that exposes apis to read " +
115-
s"parquet columns natively. $SCAN_AUTO chooses the best scan.")
114+
s"`$SCAN_NATIVE_DATAFUSION` is a fully native implementation of scan based on DataFusion" +
115+
s"`$SCAN_NATIVE_ICEBERG_COMPAT` is a native implementation that exposes apis to read " +
116+
s"parquet columns natively. `$SCAN_AUTO` chooses the best scan.")
116117
.internal()
117118
.stringConf
118119
.transform(_.toLowerCase(Locale.ROOT))
@@ -156,7 +157,7 @@ object CometConf extends ShimCometConf {
156157
.category(CATEGORY_PARQUET)
157158
.doc(
158159
"When enabled the parallel reader will try to merge ranges of data that are separated " +
159-
"by less than 'comet.parquet.read.io.mergeRanges.delta' bytes. Longer continuous reads " +
160+
"by less than `comet.parquet.read.io.mergeRanges.delta` bytes. Longer continuous reads " +
160161
"are faster on cloud storage.")
161162
.booleanConf
162163
.createWithDefault(true)
@@ -185,7 +186,7 @@ object CometConf extends ShimCometConf {
185186
.doc(
186187
"When enabled, data from Spark (non-native) Parquet v1 and v2 scans will be converted to " +
187188
"Arrow format. Note that to enable native vectorized execution, both this config and " +
188-
"'spark.comet.exec.enabled' need to be enabled.")
189+
"`spark.comet.exec.enabled` need to be enabled.")
189190
.booleanConf
190191
.createWithDefault(false)
191192

@@ -195,7 +196,7 @@ object CometConf extends ShimCometConf {
195196
.doc(
196197
"When enabled, data from Spark (non-native) JSON v1 and v2 scans will be converted to " +
197198
"Arrow format. Note that to enable native vectorized execution, both this config and " +
198-
"'spark.comet.exec.enabled' need to be enabled.")
199+
"`spark.comet.exec.enabled` need to be enabled.")
199200
.booleanConf
200201
.createWithDefault(false)
201202

@@ -205,7 +206,7 @@ object CometConf extends ShimCometConf {
205206
.doc(
206207
"When enabled, data from Spark (non-native) CSV v1 and v2 scans will be converted to " +
207208
"Arrow format. Note that to enable native vectorized execution, both this config and " +
208-
"'spark.comet.exec.enabled' need to be enabled.")
209+
"`spark.comet.exec.enabled` need to be enabled.")
209210
.booleanConf
210211
.createWithDefault(false)
211212

@@ -215,7 +216,7 @@ object CometConf extends ShimCometConf {
215216
"Whether to enable Comet native vectorized execution for Spark. This controls whether " +
216217
"Spark should convert operators into their Comet counterparts and execute them in " +
217218
"native space. Note: each operator is associated with a separate config in the " +
218-
"format of 'spark.comet.exec.<operator_name>.enabled' at the moment, and both the " +
219+
"format of `spark.comet.exec.<operator_name>.enabled` at the moment, and both the " +
219220
"config and this need to be turned on, in order for the operator to be executed in " +
220221
"native.")
221222
.booleanConf
@@ -308,9 +309,9 @@ object CometConf extends ShimCometConf {
308309
.category(CATEGORY_SHUFFLE)
309310
.doc(
310311
"Whether to enable Comet native shuffle. " +
311-
"Note that this requires setting 'spark.shuffle.manager' to " +
312-
"'org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager'. " +
313-
"'spark.shuffle.manager' must be set before starting the Spark application and " +
312+
"Note that this requires setting `spark.shuffle.manager` to " +
313+
"`org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager`. " +
314+
"`spark.shuffle.manager` must be set before starting the Spark application and " +
314315
"cannot be changed during the application.")
315316
.booleanConf
316317
.createWithDefault(true)
@@ -498,7 +499,7 @@ object CometConf extends ShimCometConf {
498499

499500
val COMET_EXPLAIN_VERBOSE_ENABLED: ConfigEntry[Boolean] =
500501
conf("spark.comet.explain.verbose.enabled")
501-
.category(CATEGORY_EXEC)
502+
.category(CATEGORY_EXEC_EXPLAIN)
502503
.doc(
503504
"When this setting is enabled, Comet's extended explain output will provide the full " +
504505
"query plan annotated with fallback reasons as well as a summary of how much of " +
@@ -509,7 +510,7 @@ object CometConf extends ShimCometConf {
509510

510511
val COMET_EXPLAIN_NATIVE_ENABLED: ConfigEntry[Boolean] =
511512
conf("spark.comet.explain.native.enabled")
512-
.category(CATEGORY_EXEC)
513+
.category(CATEGORY_EXEC_EXPLAIN)
513514
.doc(
514515
"When this setting is enabled, Comet will provide a tree representation of " +
515516
"the native query plan before execution and again after execution, with " +
@@ -519,7 +520,7 @@ object CometConf extends ShimCometConf {
519520

520521
val COMET_EXPLAIN_TRANSFORMATIONS: ConfigEntry[Boolean] =
521522
conf("spark.comet.explain.rules")
522-
.category(CATEGORY_EXEC)
523+
.category(CATEGORY_EXEC_EXPLAIN)
523524
.doc("When this setting is enabled, Comet will log all plan transformations performed " +
524525
"in physical optimizer rules. Default: false")
525526
.internal()
@@ -528,15 +529,15 @@ object CometConf extends ShimCometConf {
528529

529530
val COMET_LOG_FALLBACK_REASONS: ConfigEntry[Boolean] =
530531
conf("spark.comet.logFallbackReasons.enabled")
531-
.category(CATEGORY_EXEC)
532+
.category(CATEGORY_EXEC_EXPLAIN)
532533
.doc("When this setting is enabled, Comet will log warnings for all fallback reasons.")
533534
.booleanConf
534535
.createWithDefault(
535536
sys.env.getOrElse("ENABLE_COMET_LOG_FALLBACK_REASONS", "false").toBoolean)
536537

537538
val COMET_EXPLAIN_FALLBACK_ENABLED: ConfigEntry[Boolean] =
538539
conf("spark.comet.explainFallback.enabled")
539-
.category(CATEGORY_EXEC)
540+
.category(CATEGORY_EXEC_EXPLAIN)
540541
.doc(
541542
"When this setting is enabled, Comet will provide logging explaining the reason(s) " +
542543
"why a query stage cannot be executed natively. Set this to false to " +
@@ -680,7 +681,7 @@ object CometConf extends ShimCometConf {
680681
conf("spark.comet.sparkToColumnar.supportedOperatorList")
681682
.category(CATEGORY_SCAN)
682683
.doc("A comma-separated list of operators that will be converted to Arrow columnar " +
683-
"format when 'spark.comet.sparkToColumnar.enabled' is true")
684+
"format when `spark.comet.sparkToColumnar.enabled` is true")
684685
.stringConf
685686
.toSequence
686687
.createWithDefault(Seq("Range,InMemoryTableScan,RDDScan"))

docs/source/user-guide/latest/compatibility.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ The `native_datafusion` scan has some additional limitations:
8383
Comet will fall back to Spark for the following expressions when ANSI mode is enabled, unless
8484
`spark.comet.expression.allowIncompatible=true`.
8585

86-
- IntegralDivide
87-
- Remainder
88-
- Round
8986
- Average
9087
- Sum
9188
- Cast (in some cases)

0 commit comments

Comments
 (0)