perf(cube): Add execution.dont_parallelize_sort_preserving_merge_exec_inputs config option

srh · srh · commit 17a38a46a791 · 2025-07-14T20:02:38.000-07:00
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -387,6 +387,12 @@ config_namespace! {
         /// We plan to make this the default in the future.
         pub use_row_number_estimates_to_optimize_partitioning: bool, default = false
 
+        /// Cube: If true, makes SortPreservingMergeExec not parallelize merge sort inputs, to save
+        /// memory consumption of intermediate batches (1 in the mpsc channel buffer, and 1 in the
+        /// subtask waiting to be pushed onto the buffer).  If false, maintains upstream DF
+        /// behavior.
+        pub dont_parallelize_sort_preserving_merge_exec_inputs: bool, default = false
+
         /// Should DataFusion enforce batch size in joins or not. By default,
         /// DataFusion will not enforce batch size in joins. Enforcing batch size
         /// in joins can reduce memory usage when joining large
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -297,11 +297,18 @@ impl ExecutionPlan for SortPreservingMergeExec {
                 }
             },
             _ => {
+                // Cube: If true, overrides upstream DF default behavior.  The parallelized
+                // implementation will store one RecordBatch in the mpsc channel, and one more
+                // RecordBatch in the blocked subtask waiting to be pushed onto the channel.
+
+                // Cube TODO: If memory tracking can be made to account for those batches, make use of it (in general).
+                let dont_parallelize = context.session_config().options().execution.dont_parallelize_sort_preserving_merge_exec_inputs;
+
                 let receivers = (0..input_partitions)
                     .map(|partition| {
                         let stream =
                             self.input.execute(partition, Arc::clone(&context))?;
-                        Ok(spawn_buffered(stream, 1))
+                        Ok(if dont_parallelize { stream } else { spawn_buffered(stream, 1) })
                     })
                     .collect::<Result<_>>()?;
 
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -184,6 +184,7 @@ datafusion.catalog.newlines_in_values false
 datafusion.execution.batch_size 8192
 datafusion.execution.coalesce_batches true
 datafusion.execution.collect_statistics false
+datafusion.execution.dont_parallelize_sort_preserving_merge_exec_inputs false
 datafusion.execution.enable_recursive_ctes true
 datafusion.execution.enforce_batch_size_in_joins false
 datafusion.execution.keep_partition_by_columns false
@@ -281,6 +282,7 @@ datafusion.catalog.newlines_in_values false Specifies whether newlines in (quote
 datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption
 datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
 datafusion.execution.collect_statistics false Should DataFusion collect statistics after listing files
+datafusion.execution.dont_parallelize_sort_preserving_merge_exec_inputs false Cube: If true, makes SortPreservingMergeExec not parallelize merge sort inputs, to save memory consumption of intermediate batches (1 in the mpsc channel buffer, and 1 in the subtask waiting to be pushed onto the buffer).  If false, maintains upstream DF behavior.
 datafusion.execution.enable_recursive_ctes true Should DataFusion support recursive CTEs
 datafusion.execution.enforce_batch_size_in_joins false Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.
 datafusion.execution.keep_partition_by_columns false Should DataFusion keep the columns used for partition_by in the output RecordBatches