2020package org .apache .comet
2121
2222import java .io .File
23+ import java .text .SimpleDateFormat
2324
2425import scala .util .Random
2526
@@ -32,6 +33,8 @@ import org.apache.spark.sql.comet.{CometNativeScanExec, CometScanExec}
3233import org .apache .spark .sql .execution .SparkPlan
3334import org .apache .spark .sql .execution .adaptive .AdaptiveSparkPlanHelper
3435import org .apache .spark .sql .internal .SQLConf
36+ import org .apache .spark .sql .internal .SQLConf .ParquetOutputTimestampType
37+ import org .apache .spark .sql .types .{ArrayType , DataType , DataTypes , StructType }
3538
3639import org .apache .comet .testing .{DataGenOptions , ParquetGenerator }
3740
@@ -57,7 +60,13 @@ class CometFuzzTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {
5760 CometConf .COMET_ENABLED .key -> " false" ,
5861 SQLConf .SESSION_LOCAL_TIMEZONE .key -> defaultTimezone) {
5962 val options =
60- DataGenOptions (generateArray = true , generateStruct = true , generateNegativeZero = false )
63+ DataGenOptions (
64+ generateArray = true ,
65+ generateStruct = true ,
66+ generateNegativeZero = false ,
67+ // override base date due to known issues with experimental scans
68+ baseDate =
69+ new SimpleDateFormat (" YYYY-MM-DD hh:mm:ss" ).parse(" 2024-05-25 12:34:56" ).getTime)
6170 ParquetGenerator .makeParquetFile(random, spark, filename, 1000 , options)
6271 }
6372 }
@@ -166,6 +175,75 @@ class CometFuzzTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {
166175 }
167176 }
168177
178+ test(" Parquet temporal types written as INT96" ) {
179+
180+ // there are known issues with INT96 support in the new experimental scans
181+ // https://github.com/apache/datafusion-comet/issues/1441
182+ assume(! CometConf .isExperimentalNativeScan)
183+
184+ testParquetTemporalTypes(ParquetOutputTimestampType .INT96 )
185+ }
186+
187+ test(" Parquet temporal types written as TIMESTAMP_MICROS" ) {
188+ testParquetTemporalTypes(ParquetOutputTimestampType .TIMESTAMP_MICROS )
189+ }
190+
191+ test(" Parquet temporal types written as TIMESTAMP_MILLIS" ) {
192+ testParquetTemporalTypes(ParquetOutputTimestampType .TIMESTAMP_MILLIS )
193+ }
194+
195+ private def testParquetTemporalTypes (
196+ outputTimestampType : ParquetOutputTimestampType .Value ): Unit = {
197+
198+ val options =
199+ DataGenOptions (generateArray = true , generateStruct = true , generateNegativeZero = false )
200+
201+ withTempPath { filename =>
202+ val random = new Random (42 )
203+ withSQLConf(
204+ CometConf .COMET_ENABLED .key -> " false" ,
205+ SQLConf .PARQUET_OUTPUT_TIMESTAMP_TYPE .key -> outputTimestampType.toString,
206+ SQLConf .SESSION_LOCAL_TIMEZONE .key -> defaultTimezone) {
207+ ParquetGenerator .makeParquetFile(random, spark, filename.toString, 100 , options)
208+ }
209+
210+ Seq (defaultTimezone, " UTC" , " America/Denver" ).foreach { tz =>
211+ Seq (true , false ).foreach { inferTimestampNtzEnabled =>
212+ Seq (true , false ).foreach { int96TimestampConversion =>
213+ Seq (true , false ).foreach { int96AsTimestamp =>
214+ withSQLConf(
215+ CometConf .COMET_ENABLED .key -> " true" ,
216+ SQLConf .SESSION_LOCAL_TIMEZONE .key -> tz,
217+ SQLConf .PARQUET_INT96_AS_TIMESTAMP .key -> int96AsTimestamp.toString,
218+ SQLConf .PARQUET_INT96_TIMESTAMP_CONVERSION .key -> int96TimestampConversion.toString,
219+ SQLConf .PARQUET_INFER_TIMESTAMP_NTZ_ENABLED .key -> inferTimestampNtzEnabled.toString) {
220+
221+ val df = spark.read.parquet(filename.toString)
222+ df.createOrReplaceTempView(" t1" )
223+
224+ def hasTemporalType (t : DataType ): Boolean = t match {
225+ case DataTypes .DateType | DataTypes .TimestampType |
226+ DataTypes .TimestampNTZType =>
227+ true
228+ case t : StructType => t.exists(f => hasTemporalType(f.dataType))
229+ case t : ArrayType => hasTemporalType(t.elementType)
230+ case _ => false
231+ }
232+
233+ val columns =
234+ df.schema.fields.filter(f => hasTemporalType(f.dataType)).map(_.name)
235+
236+ for (col <- columns) {
237+ checkSparkAnswer(s " SELECT $col FROM t1 ORDER BY $col" )
238+ }
239+ }
240+ }
241+ }
242+ }
243+ }
244+ }
245+ }
246+
169247 override protected def test (testName : String , testTags : Tag * )(testFun : => Any )(implicit
170248 pos : Position ): Unit = {
171249 Seq (" native" , " jvm" ).foreach { shuffleMode =>
0 commit comments