@@ -19,16 +19,15 @@ package za.co.absa.standardization
1919import org .apache .spark .sql .types ._
2020import org .scalatest .funsuite .AnyFunSuite
2121import za .co .absa .spark .commons .test .SparkTestBase
22- import za .co .absa .standardization .config .{BasicMetadataColumnsConfig , BasicStandardizationConfig , ErrorCodesConfig }
22+ import za .co .absa .standardization .config .{BasicMetadataColumnsConfig , BasicStandardizationConfig }
2323import za .co .absa .standardization .schema .MetadataKeys
2424import za .co .absa .standardization .types .{CommonTypeDefaults , TypeDefaults }
2525import za .co .absa .standardization .udf .UDFLibrary
26+ import org .apache .spark .sql .functions ._
2627import org .apache .spark .sql .{DataFrame ,SparkSession }
2728import java .sql .{Date , Timestamp }
28- import java .util .TimeZone
2929
3030class TypeParserSuite extends AnyFunSuite with SparkTestBase {
31-
3231 import spark .implicits ._
3332
3433 private implicit val defaults : TypeDefaults = CommonTypeDefaults
@@ -37,38 +36,45 @@ class TypeParserSuite extends AnyFunSuite with SparkTestBase {
3736
3837
3938 private val testData : DataFrame = spark.createDataFrame(Seq (
40- (1 ," 2025-08-05" ," 2025-08-05 12:34:56" ," 250805" ),
41- (2 ," -INF" ," -INF" ," -INF" ),
42- (3 ," INF" ," INF" ," INF" )
43- )).toDF(" id" ," date" ," timestamp" ," custom_date" )
39+ (1 ,42.0 , " 2025-08-05" ," 2025-08-05 12:34:56" ," 250805" ),
40+ (2 ,- 42.0 , " -INF" ," -INF" ," -INF" ),
41+ (3 ,42.0 , " INF" ," INF" ," INF" )
42+ )).toDF(" id" ," numeric_double " , " date" ," timestamp" ," custom_date" )
4443
4544 test(" Test TypeParser infinity handling for date and timestamp" ){
4645 val schema : StructType = StructType (Seq (
4746 StructField (" id" ,IntegerType , nullable = false ),
47+ StructField (" numeric_double" , DoubleType , nullable = true , Metadata .fromJson(""" {"allow_infinity":true,"minus_infinity_symbol":"-INF","minus_infinity_value":"-1.7976931348623157E308","plus_infinity_symbol":"INF","plus_infinity_value":"1.7976931348623157E308"}""" )),
4848 StructField (" date" ,DateType , nullable = true , Metadata .fromJson(""" {"pattern":"yyyy-MM-dd","minus_infinity_symbol":"-INF","minus_infinity_value":"1000-01-01","plus_infinity_symbol":"INF","plus_infinity_value":"9999-12-31"}""" )),
4949 StructField (" timestamp" ,TimestampType , nullable = true , Metadata .fromJson(""" {"pattern":"yyyy-MM-dd HH:mm:ss","minus_infinity_symbol":"-INF","minus_infinity_value":"1000-01-01 00:00:00","plus_infinity_symbol":"INF","plus_infinity_value":"9999-12-31 23:59:59"}""" )),
5050 StructField (" custom_date" ,DateType , nullable = true , Metadata .fromJson(""" {"pattern":"yyMMdd","minus_infinity_symbol":"-INF","minus_infinity_value":"1000-01-01","plus_infinity_symbol":"INF","plus_infinity_value":"9999-12-31"}""" )),
5151 ))
5252
5353 val stdDF = Standardization .standardize(testData,schema,stdConfig).cache()
5454
55- val results = stdDF.select(" id" ," date" , " timestamp" , " custom_date" ," errCol" ).collect()
56-
57-
58- assert(results(0 ).getAs[Date ](1 ) == Date .valueOf(" 2025-08-05" ))
59- assert(results(0 ).getAs[Timestamp ](2 ) == Timestamp .valueOf(" 2025-08-05 12:34:56" ))
60- assert(results(0 ).getAs[Date ](3 ) == Date .valueOf(" 2025-08-05" ))
61- assert(results(0 ).getAs[Seq [ErrorMessage ]](" errCol" ).isEmpty)
62-
63- assert(results(1 ).getAs[Date ](1 ) == Date .valueOf(" 1000-01-01" ))
64- assert(results(1 ).getAs[Timestamp ](2 ) == Timestamp .valueOf(" 1000-01-01 00:00:00" ))
65- assert(results(1 ).getAs[Date ](3 ) == Date .valueOf(" 1000-01-01" ))
66- assert(results(1 ).getAs[Seq [ErrorMessage ]](" errCol" ).isEmpty)
67-
68- assert(results(2 ).getAs[Date ](1 ) == Date .valueOf(" 9999-12-31" ))
69- assert(results(2 ).getAs[Timestamp ](2 ) == Timestamp .valueOf(" 9999-12-31 23:59:59" ))
70- assert(results(2 ).getAs[Date ](3 ) == Date .valueOf(" 9999-12-31" ))
71- assert(results(2 ).getAs[Seq [ErrorMessage ]](" errCol" ).isEmpty)
55+ val results = stdDF.select(" id" ," numeric_double" ," date" , " timestamp" , " custom_date" ," errCol" ).collect()
56+
57+
58+ assert(results(0 ).getInt(0 ) == 1 )
59+ assert(results(0 ).getDouble(1 ) == 42.0 )
60+ assert(results(0 ).getDate(2 ) == Date .valueOf(" 2025-08-05" ))
61+ assert(results(0 ).getTimestamp(3 ) == Timestamp .valueOf(" 2025-08-05 12:34:56" ))
62+ assert(results(0 ).getDate(4 ) == Date .valueOf(" 2025-08-05" ))
63+ assert(results(0 ).getAs[Seq [ErrorMessage ]](5 ).isEmpty)
64+
65+ assert(results(1 ).getInt(0 ) == 2 )
66+ assert(results(1 ).getDouble(1 ) == - 1.7976931348623157E308 )
67+ assert(results(1 ).getDate(2 ) == Date .valueOf(" 1000-01-01" ))
68+ assert(results(1 ).getTimestamp(3 ) == Timestamp .valueOf(" 1000-01-01 00:00:00" ))
69+ assert(results(1 ).getDate(4 ) == Date .valueOf(" 1000-01-01" ))
70+ assert(results(1 ).getAs[Seq [ErrorMessage ]](5 ).isEmpty)
71+
72+ assert(results(2 ).getInt(0 ) == 3 )
73+ assert(results(2 ).getDouble(1 ) == 1.7976931348623157E308 )
74+ assert(results(2 ).getDate(2 ) == Date .valueOf(" 9999-12-31" ))
75+ assert(results(2 ).getTimestamp(3 ) == Timestamp .valueOf(" 9999-12-31 23:59:59" ))
76+ assert(results(2 ).getDate(4 ) == Date .valueOf(" 9999-12-31" ))
77+ assert(results(1 ).getAs[Seq [ErrorMessage ]](5 ).isEmpty)
7278
7379 }
7480}
0 commit comments