comphead
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 1 addition & 1 deletion b/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/changelog/0.6.0.md‎
Lines changed: 79 additions & 0 deletions b/‎dev/changelog/0.6.0.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎docs/source/user-guide/compatibility.md‎
Lines changed: 31 additions & 0 deletions b/‎docs/source/user-guide/compatibility.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎docs/source/user-guide/configs.md‎
Lines changed: 7 additions & 1 deletion b/‎docs/source/user-guide/configs.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎docs/templates/compatibility-template.md‎
Lines changed: 33 additions & 2 deletions b/‎docs/templates/compatibility-template.md‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎docs/templates/configs-template.md‎
Lines changed: 6 additions & 0 deletions b/‎docs/templates/configs-template.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎fuzz-testing/src/main/scala/org/apache/comet/fuzz/Main.scala‎
Lines changed: 12 additions & 2 deletions b/‎fuzz-testing/src/main/scala/org/apache/comet/fuzz/Main.scala‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎native/Cargo.lock‎
Lines changed: 0 additions & 1 deletion b/‎native/Cargo.lock‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎native/core/Cargo.toml‎
Lines changed: 0 additions & 1 deletion b/‎native/core/Cargo.toml‎
Lines changed: 0 additions & 1 deletion
@@ -614,7 +614,7 @@ object CometConf extends ShimCometConf {
         "Comet is not currently fully compatible with Spark for all datatypes. " +
           s"Set this config to true to allow them anyway. $COMPAT_GUIDE.")
       .booleanConf
-      .createWithDefault(true)
+      .createWithDefault(false)
 
   val COMET_EXPR_ALLOW_INCOMPATIBLE: ConfigEntry[Boolean] =
     conf("spark.comet.expression.allowIncompatible")
 
@@ -0,0 +1,79 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# DataFusion Comet 0.6.0 Changelog
+
+**Fixed bugs:**
+
+- fix: cast timestamp to decimal is unsupported [#1281](https://github.com/apache/datafusion-comet/pull/1281) (wForget)
+- fix: partially fix consistency issue of hash functions with decimal input [#1295](https://github.com/apache/datafusion-comet/pull/1295) (wForget)
+- fix: Improve testing for array_remove and fallback to Spark for unsupported types [#1308](https://github.com/apache/datafusion-comet/pull/1308) (andygrove)
+- fix: address post merge comet-parquet-exec review comments [#1327](https://github.com/apache/datafusion-comet/pull/1327) (parthchandra)
+- fix: memory pool error type [#1346](https://github.com/apache/datafusion-comet/pull/1346) (kazuyukitanimura)
+- fix: Fall back to Spark when hashing decimals with precision > 18 [#1325](https://github.com/apache/datafusion-comet/pull/1325) (andygrove)
+- fix: expressions doc for ArrayRemove [#1356](https://github.com/apache/datafusion-comet/pull/1356) (kazuyukitanimura)
+- fix: pass scale to DF round in spark_round [#1341](https://github.com/apache/datafusion-comet/pull/1341) (cht42)
+- fix: Mark cast from float/double to decimal as incompatible [#1372](https://github.com/apache/datafusion-comet/pull/1372) (andygrove)
+- fix: Passthrough condition in StaticInvoke case block [#1392](https://github.com/apache/datafusion-comet/pull/1392) (EmilyMatt)
+- fix: disable checking for uint_8 and uint_16 if complex type readers are enabled [#1376](https://github.com/apache/datafusion-comet/pull/1376) (parthchandra)
+
+**Performance related:**
+
+- perf: improve performance of update metrics [#1329](https://github.com/apache/datafusion-comet/pull/1329) (wForget)
+- perf: Use DataFusion FilterExec for experimental native scans [#1395](https://github.com/apache/datafusion-comet/pull/1395) (mbutrovich)
+
+**Implemented enhancements:**
+
+- feat: Add HasRowIdMapping interface [#1288](https://github.com/apache/datafusion-comet/pull/1288) (viirya)
+- feat: Upgrade to DataFusion 45 [#1364](https://github.com/apache/datafusion-comet/pull/1364) (andygrove)
+- feat: Add fair unified memory pool [#1369](https://github.com/apache/datafusion-comet/pull/1369) (kazuyukitanimura)
+- feat: Add unbounded memory pool [#1386](https://github.com/apache/datafusion-comet/pull/1386) (kazuyukitanimura)
+- feat: make random seed configurable in fuzz-testing [#1401](https://github.com/apache/datafusion-comet/pull/1401) (wForget)
+- feat: override executor overhead memory only when comet unified memory manager is disabled [#1379](https://github.com/apache/datafusion-comet/pull/1379) (wForget)
+
+**Documentation updates:**
+
+- docs: Fix links and provide complete benchmarking scripts [#1284](https://github.com/apache/datafusion-comet/pull/1284) (andygrove)
+- doc: update memory tuning guide [#1394](https://github.com/apache/datafusion-comet/pull/1394) (kazuyukitanimura)
+
+**Other:**
+
+- chore: Start 0.6.0 development [#1286](https://github.com/apache/datafusion-comet/pull/1286) (andygrove)
+- minor: update compatibility [#1303](https://github.com/apache/datafusion-comet/pull/1303) (kazuyukitanimura)
+- chore: extract conversion_funcs, conditional_funcs, bitwise_funcs and array_funcs expressions to folders based on spark grouping [#1223](https://github.com/apache/datafusion-comet/pull/1223) (rluvaton)
+- chore: extract math_funcs expressions to folders based on spark grouping [#1219](https://github.com/apache/datafusion-comet/pull/1219) (rluvaton)
+- chore: merge comet-parquet-exec branch into main [#1318](https://github.com/apache/datafusion-comet/pull/1318) (andygrove)
+- Feat: Support array_intersect function [#1271](https://github.com/apache/datafusion-comet/pull/1271) (erenavsarogullari)
+- build(deps): bump pprof from 0.13.0 to 0.14.0 in /native [#1319](https://github.com/apache/datafusion-comet/pull/1319) (dependabot[bot])
+- chore: Fix merge conflicts from merging comet-parquet-exec into main [#1320](https://github.com/apache/datafusion-comet/pull/1320) (andygrove)
+- chore: Revert accidental re-introduction of off-heap memory requirement [#1326](https://github.com/apache/datafusion-comet/pull/1326) (andygrove)
+- chore: Fix merge conflicts from merging comet-parquet-exec into main [#1323](https://github.com/apache/datafusion-comet/pull/1323) (mbutrovich)
+- Feat: Support array_join function [#1290](https://github.com/apache/datafusion-comet/pull/1290) (erenavsarogullari)
+- Fix missing slash in spark script [#1334](https://github.com/apache/datafusion-comet/pull/1334) (xleoken)
+- chore: Refactor QueryPlanSerde to allow logic to be moved to individual classes per expression [#1331](https://github.com/apache/datafusion-comet/pull/1331) (andygrove)
+- build: re-enable upload-test-reports for macos-13 runner [#1335](https://github.com/apache/datafusion-comet/pull/1335) (viirya)
+- chore: Upgrade to Arrow 53.4.0 [#1338](https://github.com/apache/datafusion-comet/pull/1338) (andygrove)
+- Feat: Support arrays_overlap function [#1312](https://github.com/apache/datafusion-comet/pull/1312) (erenavsarogullari)
+- chore: Move all array\_\* serde to new framework, use correct INCOMPAT config [#1349](https://github.com/apache/datafusion-comet/pull/1349) (andygrove)
+- chore: Prepare for DataFusion 45 (bump to DataFusion rev 5592834 + Arrow 54.0.0) [#1332](https://github.com/apache/datafusion-comet/pull/1332) (andygrove)
+- minor: commit compatibility doc [#1358](https://github.com/apache/datafusion-comet/pull/1358) (kazuyukitanimura)
+- minor: update fuzz dependency [#1357](https://github.com/apache/datafusion-comet/pull/1357) (kazuyukitanimura)
+- chore: Remove redundant processing from exprToProtoInternal [#1351](https://github.com/apache/datafusion-comet/pull/1351) (andygrove)
+- chore: Adding an optional `hdfs` crate [#1377](https://github.com/apache/datafusion-comet/pull/1377) (comphead)
+- chore: Refactor aggregate expression serde [#1380](https://github.com/apache/datafusion-comet/pull/1380) (andygrove)
@@ -17,12 +17,43 @@ specific language governing permissions and limitations
 under the License.
 -->
 
+<!--
+TO MODIFY THIS CONTENT MAKE SURE THAT YOU MAKE YOUR CHANGES TO THE TEMPLATE FILE
+(docs/templates/compatibility-template.md) AND NOT THE GENERATED FILE
+(docs/source/user-guide/compatibility.md) OTHERWISE YOUR CHANGES MAY BE LOST
+-->
+
 # Compatibility Guide
 
 Comet aims to provide consistent results with the version of Apache Spark that is being used.
 
 This guide offers information about areas of functionality where there are known differences.
 
+## Parquet Scans
+
+Comet currently has three distinct implementations of the Parquet scan operator. The configuration property
+`spark.comet.scan.impl` is used to select an implementation.
+
+| Implementation          | Description                                                                                                                                                                            |
+| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `native_comet`          | This is the default implementation. It provides strong compatibility with Spark but does not support complex types.                                                                    |
+| `native_datafusion`     | This implementation delegates to DataFusion's `ParquetExec`.                                                                                                                           |
+| `native_iceberg_compat` | This implementation also delegates to DataFusion's `ParquetExec` but uses a hybrid approach of JVM and native code. This scan is designed to be integrated with Iceberg in the future. |
+
+The new (and currently experimental) `native_datafusion` and `native_iceberg_compat` scans are being added to
+provide the following benefits over the `native_comet` implementation:
+
+- Leverage the DataFusion community's ongoing improvements to `ParquetExec`
+- Provide support for reading complex types (structs, arrays, and maps)
+- Remove the use of reusable mutable-buffers in Comet, which is complex to maintain
+
+These new implementations are not fully implemented. Some of the current limitations are:
+
+- Scanning Parquet files containing unsigned 8 or 16-bit integers can produce results that don't match Spark. By default, Comet
+will fall back to Spark when using these scan implementations to read Parquet files containing 8 or 16-bit integers.
+This behavior can be disabled by setting `spark.comet.scan.allowIncompatible=true`.
+- These implementations do not yet fully support timestamps, decimals, or complex types.
+
 ## ANSI mode
 
 Comet currently ignores ANSI mode in most cases, and therefore can produce different results than Spark. By default,
 
@@ -17,6 +17,12 @@ specific language governing permissions and limitations
 under the License.
 -->
 
+<!--
+TO MODIFY THIS CONTENT MAKE SURE THAT YOU MAKE YOUR CHANGES TO THE TEMPLATE FILE
+(docs/templates/configs-template.md) AND NOT THE GENERATED FILE
+(docs/source/user-guide/configs.md) OTHERWISE YOUR CHANGES MAY BE LOST
+-->
+
 # Comet Configuration Settings
 
 Comet provides the following configuration settings.
@@ -76,7 +82,7 @@ Comet provides the following configuration settings.
 | spark.comet.parquet.read.parallel.io.enabled | Whether to enable Comet's parallel reader for Parquet files. The parallel reader reads ranges of consecutive data in a  file in parallel. It is faster for large files and row groups but uses more resources. | true |
 | spark.comet.parquet.read.parallel.io.thread-pool.size | The maximum number of parallel threads the parallel reader will use in a single executor. For executors configured with a smaller number of cores, use a smaller number. | 16 |
 | spark.comet.regexp.allowIncompatible | Comet is not currently fully compatible with Spark for all regular expressions. Set this config to true to allow them anyway. For more information, refer to the Comet Compatibility Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html). | false |
-| spark.comet.scan.allowIncompatible | Comet is not currently fully compatible with Spark for all datatypes. Set this config to true to allow them anyway. For more information, refer to the Comet Compatibility Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html). | true |
+| spark.comet.scan.allowIncompatible | Comet is not currently fully compatible with Spark for all datatypes. Set this config to true to allow them anyway. For more information, refer to the Comet Compatibility Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html). | false |
 | spark.comet.scan.enabled | Whether to enable native scans. When this is turned on, Spark will use Comet to read supported data sources (currently only Parquet is supported natively). Note that to enable native vectorized execution, both this config and 'spark.comet.exec.enabled' need to be enabled. | true |
 | spark.comet.scan.preFetch.enabled | Whether to enable pre-fetching feature of CometScan. | false |
 | spark.comet.scan.preFetch.threadNum | The number of threads running pre-fetching for CometScan. Effective if spark.comet.scan.preFetch.enabled is enabled. Note that more pre-fetching threads means more memory requirement to store pre-fetched row groups. | 2 |
 
@@ -17,12 +17,43 @@
   under the License.
 -->
 
+<!--
+  TO MODIFY THIS CONTENT MAKE SURE THAT YOU MAKE YOUR CHANGES TO THE TEMPLATE FILE
+  (docs/templates/compatibility-template.md) AND NOT THE GENERATED FILE
+  (docs/source/user-guide/compatibility.md) OTHERWISE YOUR CHANGES MAY BE LOST
+-->
+
 # Compatibility Guide
 
 Comet aims to provide consistent results with the version of Apache Spark that is being used.
 
 This guide offers information about areas of functionality where there are known differences.
 
+## Parquet Scans
+
+Comet currently has three distinct implementations of the Parquet scan operator. The configuration property
+`spark.comet.scan.impl` is used to select an implementation.
+
+| Implementation          | Description                                                                                                                                                                            |
+| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `native_comet`          | This is the default implementation. It provides strong compatibility with Spark but does not support complex types.                                                                    |
+| `native_datafusion`     | This implementation delegates to DataFusion's `ParquetExec`.                                                                                                                           |
+| `native_iceberg_compat` | This implementation also delegates to DataFusion's `ParquetExec` but uses a hybrid approach of JVM and native code. This scan is designed to be integrated with Iceberg in the future. |
+
+The new (and currently experimental) `native_datafusion` and `native_iceberg_compat` scans are being added to
+provide the following benefits over the `native_comet` implementation:
+
+- Leverage the DataFusion community's ongoing improvements to `ParquetExec`
+- Provide support for reading complex types (structs, arrays, and maps)
+- Remove the use of reusable mutable-buffers in Comet, which is complex to maintain
+
+These new implementations are not fully implemented. Some of the current limitations are:
+
+- Scanning Parquet files containing unsigned 8 or 16-bit integers can produce results that don't match Spark. By default, Comet  
+  will fall back to Spark when using these scan implementations to read Parquet files containing 8 or 16-bit integers.
+  This behavior can be disabled by setting `spark.comet.scan.allowIncompatible=true`.
+- These implementations do not yet fully support timestamps, decimals, or complex types.
+
 ## ANSI mode
 
 Comet currently ignores ANSI mode in most cases, and therefore can produce different results than Spark. By default,
@@ -47,7 +78,7 @@ will fall back to Spark but can be enabled by setting `spark.comet.expression.al
 
 ## Array Expressions
 
-Comet has experimental support for a number of array expressions. These are experimental and currently marked 
+Comet has experimental support for a number of array expressions. These are experimental and currently marked
 as incompatible and can be enabled by setting `spark.comet.expression.allowIncompatible=true`.
 
 ## Regular Expressions
@@ -82,5 +113,5 @@ The following cast operations are not compatible with Spark for all inputs and a
 
 ### Unsupported Casts
 
-Any cast not listed in the previous tables is currently unsupported. We are working on adding more. See the 
+Any cast not listed in the previous tables is currently unsupported. We are working on adding more. See the
 [tracking issue](https://github.com/apache/datafusion-comet/issues/286) for more details.
@@ -17,6 +17,12 @@
   under the License.
 -->
 
+<!-- 
+  TO MODIFY THIS CONTENT MAKE SURE THAT YOU MAKE YOUR CHANGES TO THE TEMPLATE FILE  
+  (docs/templates/configs-template.md) AND NOT THE GENERATED FILE
+  (docs/source/user-guide/configs.md) OTHERWISE YOUR CHANGES MAY BE LOST
+-->
+
 # Comet Configuration Settings
 
 Comet provides the following configuration settings.
 
@@ -33,6 +33,8 @@ class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
     val numFiles: ScallopOption[Int] =
       opt[Int](required = true, descr = "Number of files to generate")
     val numRows: ScallopOption[Int] = opt[Int](required = true, descr = "Number of rows per file")
+    val randomSeed: ScallopOption[Long] =
+      opt[Long](required = false, descr = "Random seed to use")
     val generateArrays: ScallopOption[Boolean] =
       opt[Boolean](required = false, descr = "Whether to generate arrays")
     val generateStructs: ScallopOption[Boolean] =
@@ -48,6 +50,8 @@ class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
       opt[Int](required = false, descr = "Number of input files to use")
     val numQueries: ScallopOption[Int] =
       opt[Int](required = true, descr = "Number of queries to generate")
+    val randomSeed: ScallopOption[Long] =
+      opt[Long](required = false, descr = "Random seed to use")
   }
   addSubcommand(generateQueries)
   object runQueries extends Subcommand("run") {
@@ -67,11 +71,13 @@ object Main {
     .getOrCreate()
 
   def main(args: Array[String]): Unit = {
-    val r = new Random(42)
-
     val conf = new Conf(args.toIndexedSeq)
     conf.subcommand match {
       case Some(conf.generateData) =>
+        val r = conf.generateData.randomSeed.toOption match {
+          case Some(seed) => new Random(seed)
+          case None => new Random()
+        }
         val options = DataGenOptions(
           allowNull = true,
           generateArray = conf.generateData.generateArrays(),
@@ -87,6 +93,10 @@ object Main {
             options)
         }
       case Some(conf.generateQueries) =>
+        val r = conf.generateQueries.randomSeed.toOption match {
+          case Some(seed) => new Random(seed)
+          case None => new Random()
+        }
         QueryGen.generateRandomQueries(
           r,
           spark,
 
@@ -76,7 +76,6 @@ datafusion-comet-spark-expr = { workspace = true }
 datafusion-comet-proto = { workspace = true }
 object_store = { workspace = true }
 url = { workspace = true }
-chrono = { workspace = true }
 parking_lot = "0.12.3"
 datafusion-comet-objectstore-hdfs = { path = "../hdfs", optional = true}