apache · andygrove · Nov 26, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
@@ -118,6 +118,7 @@ jobs:
               org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
           - name: "parquet"
             value: |
+              org.apache.comet.parquet.CometParquetWriterSuite
               org.apache.comet.parquet.ParquetReadV1Suite
               org.apache.comet.parquet.ParquetReadV2Suite
               org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite

diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml
@@ -83,6 +83,7 @@ jobs:
               org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
           - name: "parquet"
             value: |
+              org.apache.comet.parquet.CometParquetWriterSuite
               org.apache.comet.parquet.ParquetReadV1Suite
               org.apache.comet.parquet.ParquetReadV2Suite
               org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite

diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -100,6 +100,17 @@ object CometConf extends ShimCometConf {
     .booleanConf
     .createWithDefault(true)
 
+  val COMET_NATIVE_PARQUET_WRITE_ENABLED: ConfigEntry[Boolean] =
+    conf("spark.comet.parquet.write.enabled")
+      .category(CATEGORY_TESTING)
+      .doc(
+        "Whether to enable native Parquet write through Comet. When enabled, " +
+          "Comet will intercept Parquet write operations and execute them natively. This " +
+          "feature is highly experimental and only partially implemented. It should not " +
+          "be used in production.")
+      .booleanConf
+      .createWithDefault(false)
+
   val SCAN_NATIVE_COMET = "native_comet"
   val SCAN_NATIVE_DATAFUSION = "native_datafusion"
   val SCAN_NATIVE_ICEBERG_COMPAT = "native_iceberg_compat"

diff --git a/docs/source/user-guide/latest/configs.md b/docs/source/user-guide/latest/configs.md
@@ -142,6 +142,7 @@ These settings can be used to determine which parts of the plan are accelerated
 | `spark.comet.exec.onHeap.enabled` | Whether to allow Comet to run in on-heap mode. Required for running Spark SQL tests. It can be overridden by the environment variable `ENABLE_COMET_ONHEAP`. | false |
 | `spark.comet.exec.onHeap.memoryPool` | The type of memory pool to be used for Comet native execution when running Spark in on-heap mode. Available pool types are `greedy`, `fair_spill`, `greedy_task_shared`, `fair_spill_task_shared`, `greedy_global`, `fair_spill_global`, and `unbounded`. | greedy_task_shared |
 | `spark.comet.memoryOverhead` | The amount of additional memory to be allocated per executor process for Comet, in MiB, when running Spark in on-heap mode. | 1024 MiB |
+| `spark.comet.parquet.write.enabled` | Whether to enable native Parquet write through Comet. When enabled, Comet will intercept Parquet write operations and execute them natively. This feature is highly experimental and only partially implemented. It should not be used in production. | false |
 | `spark.comet.sparkToColumnar.enabled` | Whether to enable Spark to Arrow columnar conversion. When this is turned on, Comet will convert operators in `spark.comet.sparkToColumnar.supportedOperatorList` into Arrow columnar format before processing. This is an experimental feature and has known issues with non-UTC timezones. | false |
 | `spark.comet.sparkToColumnar.supportedOperatorList` | A comma-separated list of operators that will be converted to Arrow columnar format when `spark.comet.sparkToColumnar.enabled` is true. | Range,InMemoryTableScan,RDDScan |
 | `spark.comet.testing.strict` | Experimental option to enable strict testing, which will fail tests that could be more comprehensive, such as checking for a specific fallback reason. It can be overridden by the environment variable `ENABLE_COMET_STRICT_TESTING`. | false |

diff --git a/docs/source/user-guide/latest/operators.md b/docs/source/user-guide/latest/operators.md
@@ -22,25 +22,26 @@
 The following Spark operators are currently replaced with native versions. Query stages that contain any operators
 not supported by Comet will fall back to regular Spark execution.
 
-| Operator                | Spark-Compatible? | Compatibility Notes                                                                                                |
-| ----------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------ |
-| BatchScanExec           | Yes               | Supports Parquet files and Apache Iceberg Parquet scans. See the [Comet Compatibility Guide] for more information. |
-| BroadcastExchangeExec   | Yes               |                                                                                                                    |
-| BroadcastHashJoinExec   | Yes               |                                                                                                                    |
-| ExpandExec              | Yes               |                                                                                                                    |
-| FileSourceScanExec      | Yes               | Supports Parquet files. See the [Comet Compatibility Guide] for more information.                                  |
-| FilterExec              | Yes               |                                                                                                                    |
-| GlobalLimitExec         | Yes               |                                                                                                                    |
-| HashAggregateExec       | Yes               |                                                                                                                    |
-| LocalLimitExec          | Yes               |                                                                                                                    |
-| LocalTableScanExec      | No                | Experimental and disabled by default.                                                                              |
-| ObjectHashAggregateExec | Yes               | Supports a limited number of aggregates, such as `bloom_filter_agg`.                                               |
-| ProjectExec             | Yes               |                                                                                                                    |
-| ShuffleExchangeExec     | Yes               |                                                                                                                    |
-| ShuffledHashJoinExec    | Yes               |                                                                                                                    |
-| SortExec                | Yes               |                                                                                                                    |
-| SortMergeJoinExec       | Yes               |                                                                                                                    |
-| UnionExec               | Yes               |                                                                                                                    |
-| WindowExec              | No                | Disabled by default due to known correctness issues.                                                               |
+| Operator                          | Spark-Compatible? | Compatibility Notes                                                                                                |
+| --------------------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------ |
+| BatchScanExec                     | Yes               | Supports Parquet files and Apache Iceberg Parquet scans. See the [Comet Compatibility Guide] for more information. |
+| BroadcastExchangeExec             | Yes               |                                                                                                                    |
+| BroadcastHashJoinExec             | Yes               |                                                                                                                    |
+| ExpandExec                        | Yes               |                                                                                                                    |
+| FileSourceScanExec                | Yes               | Supports Parquet files. See the [Comet Compatibility Guide] for more information.                                  |
+| FilterExec                        | Yes               |                                                                                                                    |
+| GlobalLimitExec                   | Yes               |                                                                                                                    |
+| HashAggregateExec                 | Yes               |                                                                                                                    |
+| InsertIntoHadoopFsRelationCommand | No                | Experimental support for native Parquet writes. Disabled by default.                                               |
+| LocalLimitExec                    | Yes               |                                                                                                                    |
+| LocalTableScanExec                | No                | Experimental and disabled by default.                                                                              |
+| ObjectHashAggregateExec           | Yes               | Supports a limited number of aggregates, such as `bloom_filter_agg`.                                               |
+| ProjectExec                       | Yes               |                                                                                                                    |
+| ShuffleExchangeExec               | Yes               |                                                                                                                    |
+| ShuffledHashJoinExec              | Yes               |                                                                                                                    |
+| SortExec                          | Yes               |                                                                                                                    |
+| SortMergeJoinExec                 | Yes               |                                                                                                                    |
+| UnionExec                         | Yes               |                                                                                                                    |
+| WindowExec                        | No                | Disabled by default due to known correctness issues.                                                               |
 
 [Comet Compatibility Guide]: compatibility.md
diff --git a/native/core/src/execution/operators/mod.rs b/native/core/src/execution/operators/mod.rs
@@ -29,6 +29,8 @@ mod copy;
 mod expand;
 pub use expand::ExpandExec;
 mod iceberg_scan;
+mod parquet_writer;
+pub use parquet_writer::ParquetWriterExec;
 mod scan;
 
 /// Error returned during executing operators.