Skip to content

Commit 382ac93

Browse files
authored
docs: various improvements to tuning guide (#1525)
1 parent 46b162c commit 382ac93

File tree

9 files changed

+170
-122
lines changed

9 files changed

+170
-122
lines changed

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 23 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -235,27 +235,28 @@ object CometConf extends ShimCometConf {
235235

236236
val COMET_MEMORY_OVERHEAD: OptionalConfigEntry[Long] = conf("spark.comet.memoryOverhead")
237237
.doc(
238-
"The amount of additional memory to be allocated per executor process for Comet, in MiB. " +
238+
"The amount of additional memory to be allocated per executor process for Comet, in MiB, " +
239+
"when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. " +
239240
"This config is optional. If this is not specified, it will be set to " +
240-
"`spark.comet.memory.overhead.factor` * `spark.executor.memory`. " +
241-
"This is memory that accounts for things like Comet native execution, Comet shuffle, etc.")
241+
s"`spark.comet.memory.overhead.factor` * `spark.executor.memory`. $TUNING_GUIDE.")
242242
.bytesConf(ByteUnit.MiB)
243243
.createOptional
244244

245-
val COMET_MEMORY_OVERHEAD_FACTOR: ConfigEntry[Double] = conf(
246-
"spark.comet.memory.overhead.factor")
247-
.doc(
248-
"Fraction of executor memory to be allocated as additional non-heap memory per executor " +
249-
"process for Comet.")
250-
.doubleConf
251-
.checkValue(
252-
factor => factor > 0,
253-
"Ensure that Comet memory overhead factor is a double greater than 0")
254-
.createWithDefault(0.2)
245+
val COMET_MEMORY_OVERHEAD_FACTOR: ConfigEntry[Double] =
246+
conf("spark.comet.memory.overhead.factor")
247+
.doc("Fraction of executor memory to be allocated as additional memory for Comet " +
248+
"when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. " +
249+
s"$TUNING_GUIDE.")
250+
.doubleConf
251+
.checkValue(
252+
factor => factor > 0,
253+
"Ensure that Comet memory overhead factor is a double greater than 0")
254+
.createWithDefault(0.2)
255255

256256
val COMET_MEMORY_OVERHEAD_MIN_MIB: ConfigEntry[Long] = conf("spark.comet.memory.overhead.min")
257257
.doc("Minimum amount of additional memory to be allocated per executor process for Comet, " +
258-
"in MiB.")
258+
"in MiB, when running in on-heap mode or when using the `fair_unified` pool in off-heap " +
259+
s"mode. $TUNING_GUIDE.")
259260
.bytesConf(ByteUnit.MiB)
260261
.checkValue(
261262
_ >= 0,
@@ -274,11 +275,10 @@ object CometConf extends ShimCometConf {
274275
.createWithDefault(true)
275276

276277
val COMET_SHUFFLE_MODE: ConfigEntry[String] = conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.mode")
277-
.doc("The mode of Comet shuffle. This config is only effective if Comet shuffle " +
278-
"is enabled. Available modes are 'native', 'jvm', and 'auto'. " +
279-
"'native' is for native shuffle which has best performance in general. " +
280-
"'jvm' is for jvm-based columnar shuffle which has higher coverage than native shuffle. " +
281-
"'auto' is for Comet to choose the best shuffle mode based on the query plan.")
278+
.doc(
279+
"This is test config to allow tests to force a particular shuffle implementation to be " +
280+
"used. Valid values are `jvm` for Columnar Shuffle, `native` for Native Shuffle, " +
281+
s"and `auto` to pick the best supported option (`native` has priority). $TUNING_GUIDE.")
282282
.internal()
283283
.stringConf
284284
.transform(_.toLowerCase(Locale.ROOT))
@@ -378,26 +378,16 @@ object CometConf extends ShimCometConf {
378378
val COMET_COLUMNAR_SHUFFLE_MEMORY_SIZE: OptionalConfigEntry[Long] =
379379
conf("spark.comet.columnar.shuffle.memorySize")
380380
.internal()
381-
.doc(
382-
"Test-only config. This is only used to test Comet shuffle with Spark tests. " +
383-
"The optional maximum size of the memory used for Comet columnar shuffle, in MiB. " +
384-
"Note that this config is only used when `spark.comet.exec.shuffle.mode` is " +
385-
"`jvm`. Once allocated memory size reaches this config, the current batch will be " +
386-
"flushed to disk immediately. If this is not configured, Comet will use " +
387-
"`spark.comet.shuffle.memory.factor` * `spark.comet.memoryOverhead` as " +
388-
"shuffle memory size. If final calculated value is larger than Comet memory " +
389-
"overhead, Comet will use Comet memory overhead as shuffle memory size.")
381+
.doc("Amount of memory to reserve for columnar shuffle when running in on-heap mode. " +
382+
s"$TUNING_GUIDE.")
390383
.bytesConf(ByteUnit.MiB)
391384
.createOptional
392385

393386
val COMET_COLUMNAR_SHUFFLE_MEMORY_FACTOR: ConfigEntry[Double] =
394387
conf("spark.comet.columnar.shuffle.memory.factor")
395388
.internal()
396-
.doc(
397-
"Test-only config. This is only used to test Comet shuffle with Spark tests. " +
398-
"Fraction of Comet memory to be allocated per executor process for Comet shuffle. " +
399-
"Comet memory size is specified by `spark.comet.memoryOverhead` or " +
400-
"calculated by `spark.comet.memory.overhead.factor` * `spark.executor.memory`.")
389+
.doc("Fraction of Comet memory to be allocated per executor process for columnar shuffle " +
390+
s"when running in on-heap mode. $TUNING_GUIDE.")
401391
.doubleConf
402392
.checkValue(
403393
factor => factor > 0,

docs/source/user-guide/configs.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ Comet provides the following configuration settings.
7171
| spark.comet.explain.verbose.enabled | When this setting is enabled, Comet will provide a verbose tree representation of the extended information. | false |
7272
| spark.comet.explainFallback.enabled | When this setting is enabled, Comet will provide logging explaining the reason(s) why a query stage cannot be executed natively. Set this to false to reduce the amount of logging. | false |
7373
| spark.comet.expression.allowIncompatible | Comet is not currently fully compatible with Spark for all expressions. Set this config to true to allow them anyway. For more information, refer to the Comet Compatibility Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html). | false |
74-
| spark.comet.memory.overhead.factor | Fraction of executor memory to be allocated as additional non-heap memory per executor process for Comet. | 0.2 |
75-
| spark.comet.memory.overhead.min | Minimum amount of additional memory to be allocated per executor process for Comet, in MiB. | 402653184b |
76-
| spark.comet.memoryOverhead | The amount of additional memory to be allocated per executor process for Comet, in MiB. This config is optional. If this is not specified, it will be set to `spark.comet.memory.overhead.factor` * `spark.executor.memory`. This is memory that accounts for things like Comet native execution, Comet shuffle, etc. | |
74+
| spark.comet.memory.overhead.factor | Fraction of executor memory to be allocated as additional memory for Comet when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | 0.2 |
75+
| spark.comet.memory.overhead.min | Minimum amount of additional memory to be allocated per executor process for Comet, in MiB, when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | 402653184b |
76+
| spark.comet.memoryOverhead | The amount of additional memory to be allocated per executor process for Comet, in MiB, when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. This config is optional. If this is not specified, it will be set to `spark.comet.memory.overhead.factor` * `spark.executor.memory`. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | |
7777
| spark.comet.metrics.updateInterval | The interval in milliseconds to update metrics. If interval is negative, metrics will be updated upon task completion. | 3000 |
7878
| spark.comet.nativeLoadRequired | Whether to require Comet native library to load successfully when Comet is enabled. If not, Comet will silently fallback to Spark when it fails to load the native lib. Otherwise, an error will be thrown and the Spark job will be aborted. | false |
7979
| spark.comet.parquet.enable.directBuffer | Whether to use Java direct byte buffer when reading Parquet. | false |

0 commit comments

Comments
 (0)