1919
2020package org .apache .comet
2121
22- import java .io .File
23- import java .text .SimpleDateFormat
24-
2522import scala .util .Random
2623
27- import org .scalactic .source .Position
28- import org .scalatest .Tag
29-
3024import org .apache .commons .codec .binary .Hex
31- import org .apache .commons .io .FileUtils
32- import org .apache .spark .sql .CometTestBase
33- import org .apache .spark .sql .comet .{CometNativeScanExec , CometScanExec }
34- import org .apache .spark .sql .comet .execution .shuffle .CometShuffleExchangeExec
35- import org .apache .spark .sql .execution .SparkPlan
36- import org .apache .spark .sql .execution .adaptive .{AdaptiveSparkPlanExec , AdaptiveSparkPlanHelper }
25+ import org .apache .spark .sql .execution .adaptive .AdaptiveSparkPlanExec
3726import org .apache .spark .sql .internal .SQLConf
3827import org .apache .spark .sql .internal .SQLConf .ParquetOutputTimestampType
3928import org .apache .spark .sql .types ._
4029
4130import org .apache .comet .DataTypeSupport .isComplexType
4231import org .apache .comet .testing .{DataGenOptions , ParquetGenerator }
4332
44- class CometFuzzTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {
45-
46- private var filename : String = null
47-
48- /**
49- * We use Asia/Kathmandu because it has a non-zero number of minutes as the offset, so is an
50- * interesting edge case. Also, this timezone tends to be different from the default system
51- * timezone.
52- *
53- * Represents UTC+5:45
54- */
55- private val defaultTimezone = " Asia/Kathmandu"
56-
57- override def beforeAll (): Unit = {
58- super .beforeAll()
59- val tempDir = System .getProperty(" java.io.tmpdir" )
60- filename = s " $tempDir/CometFuzzTestSuite_ ${System .currentTimeMillis()}.parquet "
61- val random = new Random (42 )
62- withSQLConf(
63- CometConf .COMET_ENABLED .key -> " false" ,
64- SQLConf .SESSION_LOCAL_TIMEZONE .key -> defaultTimezone) {
65- val options =
66- DataGenOptions (
67- generateArray = true ,
68- generateStruct = true ,
69- generateNegativeZero = false ,
70- // override base date due to known issues with experimental scans
71- baseDate =
72- new SimpleDateFormat (" YYYY-MM-DD hh:mm:ss" ).parse(" 2024-05-25 12:34:56" ).getTime)
73- ParquetGenerator .makeParquetFile(random, spark, filename, 1000 , options)
74- }
75- }
76-
77- protected override def afterAll (): Unit = {
78- super .afterAll()
79- FileUtils .deleteDirectory(new File (filename))
80- }
33+ class CometFuzzTestSuite extends CometFuzzTestBase {
8134
8235 test(" select *" ) {
8336 val df = spark.read.parquet(filename)
@@ -168,18 +121,6 @@ class CometFuzzTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {
168121 }
169122 }
170123
171- test(" count distinct" ) {
172- val df = spark.read.parquet(filename)
173- df.createOrReplaceTempView(" t1" )
174- for (col <- df.columns) {
175- val sql = s " SELECT count(distinct $col) FROM t1 "
176- val (_, cometPlan) = checkSparkAnswer(sql)
177- if (usingDataSourceExec) {
178- assert(1 == collectNativeScans(cometPlan).length)
179- }
180- }
181- }
182-
183124 test(" order by multiple columns" ) {
184125 val df = spark.read.parquet(filename)
185126 df.createOrReplaceTempView(" t1" )
@@ -192,32 +133,6 @@ class CometFuzzTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {
192133 }
193134 }
194135
195- test(" aggregate group by single column" ) {
196- val df = spark.read.parquet(filename)
197- df.createOrReplaceTempView(" t1" )
198- for (col <- df.columns) {
199- // cannot run fully natively due to range partitioning and sort
200- val sql = s " SELECT $col, count(*) FROM t1 GROUP BY $col ORDER BY $col"
201- val (_, cometPlan) = checkSparkAnswer(sql)
202- if (usingDataSourceExec) {
203- assert(1 == collectNativeScans(cometPlan).length)
204- }
205- }
206- }
207-
208- test(" min/max aggregate" ) {
209- val df = spark.read.parquet(filename)
210- df.createOrReplaceTempView(" t1" )
211- for (col <- df.columns) {
212- // cannot run fully native due to HashAggregate
213- val sql = s " SELECT min( $col), max( $col) FROM t1 "
214- val (_, cometPlan) = checkSparkAnswer(sql)
215- if (usingDataSourceExec) {
216- assert(1 == collectNativeScans(cometPlan).length)
217- }
218- }
219- }
220-
221136 test(" distribute by single column (complex types)" ) {
222137 val df = spark.read.parquet(filename)
223138 df.createOrReplaceTempView(" t1" )
@@ -371,36 +286,4 @@ class CometFuzzTestSuite extends CometTestBase with AdaptiveSparkPlanHelper {
371286 }
372287 }
373288
374- override protected def test (testName : String , testTags : Tag * )(testFun : => Any )(implicit
375- pos : Position ): Unit = {
376- Seq (" native" , " jvm" ).foreach { shuffleMode =>
377- Seq (
378- CometConf .SCAN_NATIVE_COMET ,
379- CometConf .SCAN_NATIVE_DATAFUSION ,
380- CometConf .SCAN_NATIVE_ICEBERG_COMPAT ).foreach { scanImpl =>
381- super .test(testName + s " ( $scanImpl, $shuffleMode shuffle) " , testTags : _* ) {
382- withSQLConf(
383- CometConf .COMET_NATIVE_SCAN_IMPL .key -> scanImpl,
384- CometConf .COMET_SCAN_ALLOW_INCOMPATIBLE .key -> " true" ,
385- CometConf .COMET_SHUFFLE_MODE .key -> shuffleMode) {
386- testFun
387- }
388- }
389- }
390- }
391- }
392-
393- private def collectNativeScans (plan : SparkPlan ): Seq [SparkPlan ] = {
394- collect(plan) {
395- case scan : CometScanExec => scan
396- case scan : CometNativeScanExec => scan
397- }
398- }
399-
400- private def collectCometShuffleExchanges (plan : SparkPlan ): Seq [SparkPlan ] = {
401- collect(plan) { case exchange : CometShuffleExchangeExec =>
402- exchange
403- }
404- }
405-
406289}
0 commit comments