docs: update docs and tuning guide related to native shuffle (#2487)

mbutrovich · web-flow · commit b351e3391a2e · 2025-09-29T12:14:33.000-04:00
diff --git a/docs/source/user-guide/latest/tuning.md b/docs/source/user-guide/latest/tuning.md
@@ -208,14 +208,14 @@ back to Spark for shuffle operations.
 
 #### Native Shuffle
 
-Comet provides a fully native shuffle implementation, which generally provides the best performance. However,
-native shuffle currently only supports `HashPartitioning` and `SinglePartitioning` and has some restrictions on
-supported data types.
+Comet provides a fully native shuffle implementation, which generally provides the best performance. Native shuffle
+supports `HashPartitioning`, `RangePartitioning` and `SinglePartitioning` but currently only supports primitive type
+partitioning keys. Columns that are not partitioning keys may contain complex types like maps, structs, and arrays.
 
 #### Columnar (JVM) Shuffle
 
 Comet Columnar shuffle is JVM-based and supports `HashPartitioning`, `RoundRobinPartitioning`, `RangePartitioning`, and
-`SinglePartitioning`. This shuffle implementation supports more data types than native shuffle.
+`SinglePartitioning`. This shuffle implementation supports complex data types as partitioning keys.
 
 ### Shuffle Compression
 
diff --git a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala
@@ -769,9 +769,9 @@ case class CometExecRule(session: SparkSession) extends Rule[SparkPlan] {
     /**
      * Determine which data types are supported as partition columns in native shuffle.
      *
-     * For Hash Partition this defines the key that determines how data should be collocated for
-     * operations like `groupByKey`, `reduceByKey` or `join`. Native code does not support hashing
-     * complex types, see hash_funcs/utils.rs
+     * For HashPartitioning this defines the key that determines how data should be collocated for
+     * operations like `groupByKey`, `reduceByKey`, or `join`. Native code does not support
+     * hashing complex types, see hash_funcs/utils.rs
      */
     def supportedHashPartitioningDataType(dt: DataType): Boolean = dt match {
       case _: BooleanType | _: ByteType | _: ShortType | _: IntegerType | _: LongType |
@@ -782,6 +782,13 @@ case class CometExecRule(session: SparkSession) extends Rule[SparkPlan] {
         false
     }
 
+    /**
+     * Determine which data types are supported as partition columns in native shuffle.
+     *
+     * For RangePartitioning this defines the key that determines how data should be collocated
+     * for operations like `orderBy`, `repartitionByRange`. Native code does not support sorting
+     * complex types.
+     */
     def supportedRangePartitioningDataType(dt: DataType): Boolean = dt match {
       case _: BooleanType | _: ByteType | _: ShortType | _: IntegerType | _: LongType |
           _: FloatType | _: DoubleType | _: StringType | _: BinaryType | _: TimestampType |