feat: enable iceberg compat tests, more tests for complex types

comphead · comphead · commit 0e9423aeb1b2 · 2025-03-26T07:59:55.000-07:00
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -2723,6 +2723,9 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
           if isCometSink(op) && op.output.forall(a =>
             supportedDataType(
               a.dataType,
+              // Complex type supported if
+              // - Native datafusion reader enabled (experimental) OR
+              // - conversion from Parquet/JSON enabled
               allowComplex =
                 usingDataFusionParquetExec(conf) || CometConf.COMET_CONVERT_FROM_PARQUET_ENABLED
                   .get(conf) || CometConf.COMET_CONVERT_FROM_JSON_ENABLED.get(conf))) =>
diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
@@ -46,6 +46,7 @@ import org.apache.spark.unsafe.types.UTF8String
 import com.google.common.primitives.UnsignedLong
 
 import org.apache.comet.{CometConf, CometSparkSessionExtensions}
+import org.apache.comet.CometConf.SCAN_NATIVE_ICEBERG_COMPAT
 import org.apache.comet.CometSparkSessionExtensions.{isSpark40Plus, usingDataFusionParquetExec}
 
 abstract class ParquetReadSuite extends CometTestBase {
@@ -121,13 +122,24 @@ abstract class ParquetReadSuite extends CometTestBase {
   }
 
   test("unsupported Spark schema") {
-    Seq(
-      Seq(StructField("f1", IntegerType), StructField("f2", BooleanType)) -> true,
-      Seq(StructField("f1", IntegerType), StructField("f2", ArrayType(IntegerType))) -> false,
-      Seq(
-        StructField("f1", MapType(keyType = LongType, valueType = StringType)),
-        StructField("f2", ArrayType(DoubleType))) -> false).foreach { case (schema, expected) =>
+    val schemaDDLs =
+      Seq("f1 int, f2 boolean", "f1 int, f2 array<int>", "f1 map<long, string>, f2 array<double>")
+        .map(s => StructType.fromDDL(s))
+
+    // Arrays support for iceberg compat native and for Parquet V1
+    val cometScanExecSupported =
+      if (sys.env.get("COMET_PARQUET_SCAN_IMPL").contains(SCAN_NATIVE_ICEBERG_COMPAT) && this
+          .isInstanceOf[ParquetReadV1Suite])
+        Seq(true, true, false)
+      else Seq(true, false, false)
+
+    val cometBatchScanExecSupported = Seq(true, false, false)
+
+    schemaDDLs.zip(cometScanExecSupported).foreach { case (schema, expected) =>
       assert(CometScanExec.isSchemaSupported(StructType(schema)) == expected)
+    }
+
+    schemaDDLs.zip(cometBatchScanExecSupported).foreach { case (schema, expected) =>
       assert(CometBatchScanExec.isSchemaSupported(StructType(schema)) == expected)
     }
   }