@@ -29,7 +29,7 @@ import org.apache.spark.sql.functions._
2929
3030import org .apache .comet .CometSparkSessionExtensions .{isSpark35Plus , isSpark40Plus }
3131import org .apache .comet .DataTypeSupport .isComplexType
32- import org .apache .comet .serde .CometArrayExcept
32+ import org .apache .comet .serde .{ CometArrayExcept , CometArrayRemove , CometFlatten }
3333import org .apache .comet .testing .{DataGenOptions , ParquetGenerator }
3434
3535class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
@@ -71,7 +71,11 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
7171 val table = spark.read.parquet(filename)
7272 table.createOrReplaceTempView(" t1" )
7373 // test with array of each column
74- for (fieldName <- table.schema.fieldNames) {
74+ val fieldNames =
75+ table.schema.fields
76+ .filter(field => CometArrayRemove .isTypeSupported(field.dataType))
77+ .map(_.name)
78+ for (fieldName <- fieldNames) {
7579 sql(s " SELECT array( $fieldName, $fieldName) as a, $fieldName as b FROM t1 " )
7680 .createOrReplaceTempView(" t2" )
7781 val df = sql(" SELECT array_remove(a, b) FROM t2" )
@@ -623,4 +627,69 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
623627 }
624628 }
625629 }
630+
631+ test(" flatten - test all types (native Parquet reader)" ) {
632+ withTempDir { dir =>
633+ val path = new Path (dir.toURI.toString, " test.parquet" )
634+ val filename = path.toString
635+ val random = new Random (42 )
636+ withSQLConf(CometConf .COMET_ENABLED .key -> " false" ) {
637+ ParquetGenerator .makeParquetFile(
638+ random,
639+ spark,
640+ filename,
641+ 100 ,
642+ DataGenOptions (
643+ allowNull = true ,
644+ generateNegativeZero = true ,
645+ generateArray = false ,
646+ generateStruct = false ,
647+ generateMap = false ))
648+ }
649+ val table = spark.read.parquet(filename)
650+ table.createOrReplaceTempView(" t1" )
651+ val fieldNames =
652+ table.schema.fields
653+ .filter(field => CometFlatten .isTypeSupported(field.dataType))
654+ .map(_.name)
655+ for (fieldName <- fieldNames) {
656+ sql(s " SELECT array(array( $fieldName, $fieldName), array( $fieldName)) as a FROM t1 " )
657+ .createOrReplaceTempView(" t2" )
658+ checkSparkAnswerAndOperator(sql(" SELECT flatten(a) FROM t2" ))
659+ }
660+ }
661+ }
662+
663+ test(" flatten - test all types (convert from Parquet)" ) {
664+ withTempDir { dir =>
665+ val path = new Path (dir.toURI.toString, " test.parquet" )
666+ val filename = path.toString
667+ val random = new Random (42 )
668+ withSQLConf(CometConf .COMET_ENABLED .key -> " false" ) {
669+ val options = DataGenOptions (
670+ allowNull = true ,
671+ generateNegativeZero = true ,
672+ generateArray = true ,
673+ generateStruct = true ,
674+ generateMap = false )
675+ ParquetGenerator .makeParquetFile(random, spark, filename, 100 , options)
676+ }
677+ withSQLConf(
678+ CometConf .COMET_NATIVE_SCAN_ENABLED .key -> " false" ,
679+ CometConf .COMET_SPARK_TO_ARROW_ENABLED .key -> " true" ,
680+ CometConf .COMET_CONVERT_FROM_PARQUET_ENABLED .key -> " true" ) {
681+ val table = spark.read.parquet(filename)
682+ table.createOrReplaceTempView(" t1" )
683+ val fieldNames =
684+ table.schema.fields
685+ .filter(field => CometFlatten .isTypeSupported(field.dataType))
686+ .map(_.name)
687+ for (fieldName <- fieldNames) {
688+ sql(s " SELECT array(array( $fieldName, $fieldName), array( $fieldName)) as a FROM t1 " )
689+ .createOrReplaceTempView(" t2" )
690+ checkSparkAnswer(sql(" SELECT flatten(a) FROM t2" ))
691+ }
692+ }
693+ }
694+ }
626695}
0 commit comments