chore: Fix some inconsistencies in memory pool configuration (#1561)

andygrove · web-flow · commit badbd376898c · 2025-03-21T12:51:00.000-07:00
## Which issue does this PR close? Closes #1560 ## Rationale for this change - Fix some mistakes I made in #1525 - Make some changes to `fair_unified` pool memory calculations
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -236,17 +236,18 @@ object CometConf extends ShimCometConf {
   val COMET_MEMORY_OVERHEAD: OptionalConfigEntry[Long] = conf("spark.comet.memoryOverhead")
     .doc(
       "The amount of additional memory to be allocated per executor process for Comet, in MiB, " +
-        "when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. " +
+        "when running Spark in on-heap mode. " +
         "This config is optional. If this is not specified, it will be set to " +
         s"`spark.comet.memory.overhead.factor` * `spark.executor.memory`. $TUNING_GUIDE.")
     .bytesConf(ByteUnit.MiB)
     .createOptional
 
   val COMET_MEMORY_OVERHEAD_FACTOR: ConfigEntry[Double] =
     conf("spark.comet.memory.overhead.factor")
-      .doc("Fraction of executor memory to be allocated as additional memory for Comet " +
-        "when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. " +
-        s"$TUNING_GUIDE.")
+      .doc(
+        "Fraction of executor memory to be allocated as additional memory for Comet " +
+          "when running Spark in on-heap mode. " +
+          s"$TUNING_GUIDE.")
       .doubleConf
       .checkValue(
         factor => factor > 0,
@@ -255,8 +256,7 @@ object CometConf extends ShimCometConf {
 
   val COMET_MEMORY_OVERHEAD_MIN_MIB: ConfigEntry[Long] = conf("spark.comet.memory.overhead.min")
     .doc("Minimum amount of additional memory to be allocated per executor process for Comet, " +
-      "in MiB, when running in on-heap mode or when using the `fair_unified` pool in off-heap " +
-      s"mode. $TUNING_GUIDE.")
+      s"in MiB, when running Spark in on-heap mode. $TUNING_GUIDE.")
     .bytesConf(ByteUnit.MiB)
     .checkValue(
       _ >= 0,
@@ -485,13 +485,14 @@ object CometConf extends ShimCometConf {
       .createWithDefault(false)
 
   val COMET_EXEC_MEMORY_POOL_TYPE: ConfigEntry[String] = conf("spark.comet.exec.memoryPool")
-    .doc(
-      "The type of memory pool to be used for Comet native execution. " +
-        "Available memory pool types are 'greedy', 'fair_spill', 'greedy_task_shared', " +
-        "'fair_spill_task_shared', 'greedy_global', 'fair_spill_global', and `unbounded`. " +
-        "For off-heap types are 'unified' and `fair_unified`.")
+    .doc("The type of memory pool to be used for Comet native execution. " +
+      "When running Spark in on-heap mode, available pool types are 'greedy', 'fair_spill', " +
+      "'greedy_task_shared', 'fair_spill_task_shared', 'greedy_global', 'fair_spill_global', " +
+      "and `unbounded`. When running Spark in off-heap mode, available pool types are " +
+      "'unified' and `fair_unified`. The default pool type is `greedy_task_shared` for on-heap " +
+      s"mode and `unified` for off-heap mode. $TUNING_GUIDE.")
     .stringConf
-    .createWithDefault("greedy_task_shared")
+    .createWithDefault("default")
 
   val COMET_SCAN_PREFETCH_ENABLED: ConfigEntry[Boolean] =
     conf("spark.comet.scan.preFetch.enabled")
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
@@ -54,7 +54,7 @@ Comet provides the following configuration settings.
 | spark.comet.exec.hashJoin.enabled | Whether to enable hashJoin by default. | true |
 | spark.comet.exec.initCap.enabled | Whether to enable initCap by default. | false |
 | spark.comet.exec.localLimit.enabled | Whether to enable localLimit by default. | true |
-| spark.comet.exec.memoryPool | The type of memory pool to be used for Comet native execution. Available memory pool types are 'greedy', 'fair_spill', 'greedy_task_shared', 'fair_spill_task_shared', 'greedy_global', 'fair_spill_global', and `unbounded`. For off-heap types are 'unified' and `fair_unified`. | greedy_task_shared |
+| spark.comet.exec.memoryPool | The type of memory pool to be used for Comet native execution. When running Spark in on-heap mode, available pool types are 'greedy', 'fair_spill', 'greedy_task_shared', 'fair_spill_task_shared', 'greedy_global', 'fair_spill_global', and `unbounded`. When running Spark in off-heap mode, available pool types are 'unified' and `fair_unified`. The default pool type is `greedy_task_shared` for on-heap mode and `unified` for off-heap mode. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | default |
 | spark.comet.exec.project.enabled | Whether to enable project by default. | true |
 | spark.comet.exec.replaceSortMergeJoin | Experimental feature to force Spark to replace SortMergeJoin with ShuffledHashJoin for improved performance. This feature is not stable yet. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | false |
 | spark.comet.exec.shuffle.compression.codec | The codec of Comet native shuffle used to compress shuffle data. lz4, zstd, and snappy are supported. Compression can be disabled by setting spark.shuffle.compress=false. | lz4 |
@@ -71,9 +71,9 @@ Comet provides the following configuration settings.
 | spark.comet.explain.verbose.enabled | When this setting is enabled, Comet will provide a verbose tree representation of the extended information. | false |
 | spark.comet.explainFallback.enabled | When this setting is enabled, Comet will provide logging explaining the reason(s) why a query stage cannot be executed natively. Set this to false to reduce the amount of logging. | false |
 | spark.comet.expression.allowIncompatible | Comet is not currently fully compatible with Spark for all expressions. Set this config to true to allow them anyway. For more information, refer to the Comet Compatibility Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html). | false |
-| spark.comet.memory.overhead.factor | Fraction of executor memory to be allocated as additional memory for Comet when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | 0.2 |
-| spark.comet.memory.overhead.min | Minimum amount of additional memory to be allocated per executor process for Comet, in MiB, when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | 402653184b |
-| spark.comet.memoryOverhead | The amount of additional memory to be allocated per executor process for Comet, in MiB, when running in on-heap mode or when using the `fair_unified` pool in off-heap mode. This config is optional. If this is not specified, it will be set to `spark.comet.memory.overhead.factor` * `spark.executor.memory`. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | |
+| spark.comet.memory.overhead.factor | Fraction of executor memory to be allocated as additional memory for Comet when running in on-heap mode. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | 0.2 |
+| spark.comet.memory.overhead.min | Minimum amount of additional memory to be allocated per executor process for Comet, in MiB, when running in on-heap mode. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | 402653184b |
+| spark.comet.memoryOverhead | The amount of additional memory to be allocated per executor process for Comet, in MiB, when running in on-heap mode. This config is optional. If this is not specified, it will be set to `spark.comet.memory.overhead.factor` * `spark.executor.memory`. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | |
 | spark.comet.metrics.updateInterval | The interval in milliseconds to update metrics. If interval is negative, metrics will be updated upon task completion. | 3000 |
 | spark.comet.nativeLoadRequired | Whether to require Comet native library to load successfully when Comet is enabled. If not, Comet will silently fallback to Spark when it fails to load the native lib. Otherwise, an error will be thrown and the Spark job will be aborted. | false |
 | spark.comet.parquet.enable.directBuffer | Whether to use Java direct byte buffer when reading Parquet. | false |
diff --git a/docs/source/user-guide/tuning.md b/docs/source/user-guide/tuning.md
@@ -108,34 +108,29 @@ resource managers respect Apache Spark memory configuration before starting the
 
 Comet implements multiple memory pool implementations. The type of pool can be specified with `spark.comet.exec.memoryPool`.
 
-The valid pool types are:
+The valid pool types for off-heap mode are:
 
 - `unified` (default when `spark.memory.offHeap.enabled=true` is set)
 - `fair_unified`
 
+Both of these pools share off-heap memory between Spark and Comet. This approach is referred to as 
+unified memory management. The size of the pool is specified by `spark.memory.offHeap.size`.
+
 The `unified` pool type implements a greedy first-come first-serve limit. This pool works well for queries that do not
-need to spill or have a single spillable operator. The size of the pool is specified by `spark.memory.offHeap.size` 
-and the pool interacts with Spark's memory pool, effectively sharing the off-heap memory between Spark and Comet. This 
-approach is sometimes referred to as unified memory management.
+need to spill or have a single spillable operator. 
 
 The `fair_unified` pool type prevents operators from using more than an even fraction of the available memory
 (i.e. `pool_size / num_reservations`). This pool works best when you know beforehand
 the query has multiple operators that will likely all need to spill. Sometimes it will cause spills even
 when there is sufficient memory in order to leave enough memory for other operators.
 
-The pool size configuration for the `fair_unified` pool, is a little more complex. The total pool size is computed by 
-multiplying `spark.memory.offHeap.size` by `spark.comet.memory.overhead.factor` with the minimum amount being 
-`spark.comet.memory.overhead.min`. It is also possible to manually specify `spark.comet.memoryOverhead` instead to 
-override this default behavior. Note that the `fair_unified` pool does not use unified memory management to interact 
-with Spark's memory pools, which is why the allocation defaults to a fraction of off-heap memory.
-
 ### Configuring On-Heap Memory Pools
 
 When running in on-heap mode, Comet will use its own dedicated memory pools that are not shared with Spark.
 
 The type of pool can be specified with `spark.comet.exec.memoryPool`. The default setting is `greedy_task_shared`.
 
-The valid pool types are:
+The valid pool types for on-heap mode are:
 
 - `greedy`
 - `greedy_global`
diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs
@@ -302,12 +302,18 @@ fn parse_memory_pool_config(
     let memory_pool_config = if off_heap_mode {
         match memory_pool_type.as_str() {
             "fair_unified" => MemoryPoolConfig::new(MemoryPoolType::FairUnified, pool_size),
-            _ => {
-                // the Unified memory pool interacts with Spark's memory pool to allocate
+            "default" | "unified" => {
+                // the `unified` memory pool interacts with Spark's memory pool to allocate
                 // memory therefore does not need a size to be explicitly set. The pool size
                 // shared with Spark is set by `spark.memory.offHeap.size`.
                 MemoryPoolConfig::new(MemoryPoolType::Unified, 0)
             }
+            _ => {
+                return Err(CometError::Config(format!(
+                    "Unsupported memory pool type for off-heap mode: {}",
+                    memory_pool_type
+                )))
+            }
         }
     } else {
         // Use the memory pool from DF
@@ -316,7 +322,7 @@ fn parse_memory_pool_config(
             "fair_spill_task_shared" => {
                 MemoryPoolConfig::new(MemoryPoolType::FairSpillTaskShared, pool_size_per_task)
             }
-            "greedy_task_shared" => {
+            "default" | "greedy_task_shared" => {
                 MemoryPoolConfig::new(MemoryPoolType::GreedyTaskShared, pool_size_per_task)
             }
             "fair_spill_global" => {
@@ -328,7 +334,7 @@ fn parse_memory_pool_config(
             "unbounded" => MemoryPoolConfig::new(MemoryPoolType::Unbounded, 0),
             _ => {
                 return Err(CometError::Config(format!(
-                    "Unsupported memory pool type: {}",
+                    "Unsupported memory pool type for on-heap mode: {}",
                     memory_pool_type
                 )))
             }
diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
@@ -21,6 +21,7 @@ package org.apache.comet
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.sql.comet.CometMetricNode
 import org.apache.spark.sql.vectorized._
 
@@ -63,9 +64,17 @@ class CometExecIterator(
   }.toArray
   private val plan = {
     val conf = SparkEnv.get.conf
-    // Only enable unified memory manager when off-heap mode is enabled. Otherwise,
-    // we'll use the built-in memory pool from DF, and initializes with `memory_limit`
-    // and `memory_fraction` below.
+
+    val offHeapMode = CometSparkSessionExtensions.isOffHeapEnabled(conf)
+    val memoryLimit = if (offHeapMode) {
+      // in unified mode we share off-heap memory with Spark
+      ByteUnit.MiB.toBytes(conf.getSizeAsMb("spark.memory.offHeap.size"))
+    } else {
+      // we'll use the built-in memory pool from DF, and initializes with `memory_limit`
+      // and `memory_fraction` below.
+      CometSparkSessionExtensions.getCometMemoryOverhead(conf)
+    }
+
     nativeLib.createPlan(
       id,
       cometBatchIterators,
@@ -75,9 +84,9 @@ class CometExecIterator(
       metricsUpdateInterval = COMET_METRICS_UPDATE_INTERVAL.get(),
       new CometTaskMemoryManager(id),
       batchSize = COMET_BATCH_SIZE.get(),
-      offHeapMode = CometSparkSessionExtensions.isOffHeapEnabled(conf),
+      offHeapMode,
       memoryPoolType = COMET_EXEC_MEMORY_POOL_TYPE.get(),
-      memoryLimit = CometSparkSessionExtensions.getCometMemoryOverhead(conf),
+      memoryLimit,
       memoryLimitPerTask = getMemoryLimitPerTask(conf),
       taskAttemptId = TaskContext.get().taskAttemptId,
       debug = COMET_DEBUG_ENABLED.get(),
diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
@@ -1334,26 +1334,46 @@ object CometSparkSessionExtensions extends Logging {
     CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) == CometConf.SCAN_NATIVE_DATAFUSION
   }
 
-  /** Calculates required memory overhead in MB per executor process for Comet. */
+  /**
+   * Whether we should override Spark memory configuration for Comet. This only returns true when
+   * Comet native execution is enabled and/or Comet shuffle is enabled and Comet doesn't use
+   * off-heap mode (unified memory manager).
+   */
+  def shouldOverrideMemoryConf(conf: SparkConf): Boolean = {
+    val cometEnabled = getBooleanConf(conf, CometConf.COMET_ENABLED)
+    val cometShuffleEnabled = getBooleanConf(conf, CometConf.COMET_EXEC_SHUFFLE_ENABLED)
+    val cometExecEnabled = getBooleanConf(conf, CometConf.COMET_EXEC_ENABLED)
+    val offHeapMode = CometSparkSessionExtensions.isOffHeapEnabled(conf)
+    cometEnabled && (cometShuffleEnabled || cometExecEnabled) && !offHeapMode
+  }
+
+  /**
+   * Calculates required memory overhead in MB per executor process for Comet when running in
+   * on-heap mode.
+   *
+   * If `COMET_MEMORY_OVERHEAD` is defined then that value will be used, otherwise the overhead
+   * will be calculated by multiplying executor memory (`spark.executor.memory`) by
+   * `COMET_MEMORY_OVERHEAD_FACTOR`.
+   *
+   * In either case, a minimum value of `COMET_MEMORY_OVERHEAD_MIN_MIB` will be returned.
+   */
   def getCometMemoryOverheadInMiB(sparkConf: SparkConf): Long = {
-    val baseMemoryMiB = if (isOffHeapEnabled(sparkConf)) {
-      ConfigHelpers
-        .byteFromString(sparkConf.get("spark.memory.offHeap.size"), ByteUnit.MiB)
-    } else {
-      // `spark.executor.memory` default value is 1g
-      ConfigHelpers
-        .byteFromString(sparkConf.get("spark.executor.memory", "1024MB"), ByteUnit.MiB)
+    if (isOffHeapEnabled(sparkConf)) {
+      // when running in off-heap mode we use unified memory management to share
+      // off-heap memory with Spark so do not add overhead
+      return 0
     }
 
-    val minimum = ConfigHelpers
-      .byteFromString(
-        sparkConf.get(
-          COMET_MEMORY_OVERHEAD_MIN_MIB.key,
-          COMET_MEMORY_OVERHEAD_MIN_MIB.defaultValueString),
-        ByteUnit.MiB)
-    val overheadFactor = sparkConf.getDouble(
-      COMET_MEMORY_OVERHEAD_FACTOR.key,
-      COMET_MEMORY_OVERHEAD_FACTOR.defaultValue.get)
+    // `spark.executor.memory` default value is 1g
+    val baseMemoryMiB = ConfigHelpers
+      .byteFromString(sparkConf.get("spark.executor.memory", "1024MB"), ByteUnit.MiB)
+
+    val cometMemoryOverheadMinAsString = sparkConf.get(
+      COMET_MEMORY_OVERHEAD_MIN_MIB.key,
+      COMET_MEMORY_OVERHEAD_MIN_MIB.defaultValueString)
+
+    val minimum = ConfigHelpers.byteFromString(cometMemoryOverheadMinAsString, ByteUnit.MiB)
+    val overheadFactor = getDoubleConf(sparkConf, COMET_MEMORY_OVERHEAD_FACTOR)
 
     val overHeadMemFromConf = sparkConf
       .getOption(COMET_MEMORY_OVERHEAD.key)
@@ -1362,7 +1382,16 @@ object CometSparkSessionExtensions extends Logging {
     overHeadMemFromConf.getOrElse(math.max((overheadFactor * baseMemoryMiB).toLong, minimum))
   }
 
-  /** Calculates required memory overhead in bytes per executor process for Comet. */
+  private def getBooleanConf(conf: SparkConf, entry: ConfigEntry[Boolean]) =
+    conf.getBoolean(entry.key, entry.defaultValue.get)
+
+  private def getDoubleConf(conf: SparkConf, entry: ConfigEntry[Double]) =
+    conf.getDouble(entry.key, entry.defaultValue.get)
+
+  /**
+   * Calculates required memory overhead in bytes per executor process for Comet when running in
+   * on-heap mode.
+   */
   def getCometMemoryOverhead(sparkConf: SparkConf): Long = {
     ByteUnit.MiB.toBytes(getCometMemoryOverheadInMiB(sparkConf))
   }
@@ -1391,11 +1420,6 @@ object CometSparkSessionExtensions extends Logging {
     }
   }
 
-  /** Calculates Comet shuffle memory size in MB */
-  def getCometShuffleMemorySizeInMiB(sparkConf: SparkConf, conf: SQLConf = SQLConf.get): Long = {
-    ByteUnit.BYTE.toMiB(getCometShuffleMemorySize(sparkConf, conf))
-  }
-
   def isOffHeapEnabled(sparkConf: SparkConf): Boolean = {
     sparkConf.getBoolean("spark.memory.offHeap.enabled", false)
   }
diff --git a/spark/src/main/scala/org/apache/spark/Plugins.scala b/spark/src/main/scala/org/apache/spark/Plugins.scala
diff --git a/spark/src/test/scala/org/apache/comet/CometSparkSessionExtensionsSuite.scala b/spark/src/test/scala/org/apache/comet/CometSparkSessionExtensionsSuite.scala