Skip to content

Commit f156f1c

Browse files
test case fix and additon of numeric_double test
1 parent 1b242cb commit f156f1c

File tree

1 file changed

+30
-24
lines changed

1 file changed

+30
-24
lines changed

src/test/scala/za/co/absa/standardization/TypeParserSuite.scala

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,15 @@ package za.co.absa.standardization
1919
import org.apache.spark.sql.types._
2020
import org.scalatest.funsuite.AnyFunSuite
2121
import za.co.absa.spark.commons.test.SparkTestBase
22-
import za.co.absa.standardization.config.{BasicMetadataColumnsConfig, BasicStandardizationConfig, ErrorCodesConfig}
22+
import za.co.absa.standardization.config.{BasicMetadataColumnsConfig, BasicStandardizationConfig}
2323
import za.co.absa.standardization.schema.MetadataKeys
2424
import za.co.absa.standardization.types.{CommonTypeDefaults, TypeDefaults}
2525
import za.co.absa.standardization.udf.UDFLibrary
26+
import org.apache.spark.sql.functions._
2627
import org.apache.spark.sql.{DataFrame,SparkSession}
2728
import java.sql.{Date, Timestamp}
28-
import java.util.TimeZone
2929

3030
class TypeParserSuite extends AnyFunSuite with SparkTestBase {
31-
3231
import spark.implicits._
3332

3433
private implicit val defaults: TypeDefaults = CommonTypeDefaults
@@ -37,38 +36,45 @@ class TypeParserSuite extends AnyFunSuite with SparkTestBase {
3736

3837

3938
private val testData: DataFrame = spark.createDataFrame(Seq(
40-
(1,"2025-08-05","2025-08-05 12:34:56","250805"),
41-
(2,"-INF","-INF","-INF"),
42-
(3,"INF","INF","INF")
43-
)).toDF("id","date","timestamp","custom_date")
39+
(1,42.0,"2025-08-05","2025-08-05 12:34:56","250805"),
40+
(2,-42.0,"-INF","-INF","-INF"),
41+
(3,42.0,"INF","INF","INF")
42+
)).toDF("id","numeric_double","date","timestamp","custom_date")
4443

4544
test("Test TypeParser infinity handling for date and timestamp"){
4645
val schema: StructType = StructType(Seq(
4746
StructField("id",IntegerType, nullable = false),
47+
StructField("numeric_double", DoubleType, nullable = true, Metadata.fromJson("""{"allow_infinity":true,"minus_infinity_symbol":"-INF","minus_infinity_value":"-1.7976931348623157E308","plus_infinity_symbol":"INF","plus_infinity_value":"1.7976931348623157E308"}""")),
4848
StructField("date",DateType, nullable = true, Metadata.fromJson("""{"pattern":"yyyy-MM-dd","minus_infinity_symbol":"-INF","minus_infinity_value":"1000-01-01","plus_infinity_symbol":"INF","plus_infinity_value":"9999-12-31"}""")),
4949
StructField("timestamp",TimestampType, nullable = true, Metadata.fromJson("""{"pattern":"yyyy-MM-dd HH:mm:ss","minus_infinity_symbol":"-INF","minus_infinity_value":"1000-01-01 00:00:00","plus_infinity_symbol":"INF","plus_infinity_value":"9999-12-31 23:59:59"}""")),
5050
StructField("custom_date",DateType, nullable = true, Metadata.fromJson("""{"pattern":"yyMMdd","minus_infinity_symbol":"-INF","minus_infinity_value":"1000-01-01","plus_infinity_symbol":"INF","plus_infinity_value":"9999-12-31"}""")),
5151
))
5252

5353
val stdDF = Standardization.standardize(testData,schema,stdConfig).cache()
5454

55-
val results = stdDF.select("id","date", "timestamp", "custom_date","errCol").collect()
56-
57-
58-
assert(results(0).getAs[Date](1) == Date.valueOf("2025-08-05"))
59-
assert(results(0).getAs[Timestamp](2) == Timestamp.valueOf("2025-08-05 12:34:56"))
60-
assert(results(0).getAs[Date](3) == Date.valueOf("2025-08-05"))
61-
assert(results(0).getAs[Seq[ErrorMessage]]("errCol").isEmpty)
62-
63-
assert(results(1).getAs[Date](1) == Date.valueOf("1000-01-01"))
64-
assert(results(1).getAs[Timestamp](2) == Timestamp.valueOf("1000-01-01 00:00:00"))
65-
assert(results(1).getAs[Date](3) == Date.valueOf("1000-01-01"))
66-
assert(results(1).getAs[Seq[ErrorMessage]]("errCol").isEmpty)
67-
68-
assert(results(2).getAs[Date](1) == Date.valueOf("9999-12-31"))
69-
assert(results(2).getAs[Timestamp](2) == Timestamp.valueOf("9999-12-31 23:59:59"))
70-
assert(results(2).getAs[Date](3) == Date.valueOf("9999-12-31"))
71-
assert(results(2).getAs[Seq[ErrorMessage]]("errCol").isEmpty)
55+
val results = stdDF.select("id","numeric_double","date", "timestamp", "custom_date","errCol").collect()
56+
57+
58+
assert(results(0).getInt(0) == 1)
59+
assert(results(0).getDouble(1) == 42.0)
60+
assert(results(0).getDate(2) == Date.valueOf("2025-08-05"))
61+
assert(results(0).getTimestamp(3) == Timestamp.valueOf("2025-08-05 12:34:56"))
62+
assert(results(0).getDate(4) == Date.valueOf("2025-08-05"))
63+
assert(results(0).getAs[Seq[ErrorMessage]](5).isEmpty)
64+
65+
assert(results(1).getInt(0) == 2)
66+
assert(results(1).getDouble(1) == -1.7976931348623157E308)
67+
assert(results(1).getDate(2) == Date.valueOf("1000-01-01"))
68+
assert(results(1).getTimestamp(3) == Timestamp.valueOf("1000-01-01 00:00:00"))
69+
assert(results(1).getDate(4) == Date.valueOf("1000-01-01"))
70+
assert(results(1).getAs[Seq[ErrorMessage]](5).isEmpty)
71+
72+
assert(results(2).getInt(0) == 3)
73+
assert(results(2).getDouble(1) == 1.7976931348623157E308)
74+
assert(results(2).getDate(2) == Date.valueOf("9999-12-31"))
75+
assert(results(2).getTimestamp(3) == Timestamp.valueOf("9999-12-31 23:59:59"))
76+
assert(results(2).getDate(4) == Date.valueOf("9999-12-31"))
77+
assert(results(1).getAs[Seq[ErrorMessage]](5).isEmpty)
7278

7379
}
7480
}

0 commit comments

Comments
 (0)