Skip to content

Commit 968f4ad

Browse files
committed
#23 Add more tests for nestedUnstruct(), cover more edge cases.
1 parent 0cce970 commit 968f4ad

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed

src/test/scala/za/co/absa/spark/hats/transformations/DeepArrayTransformationSuite.scala

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,6 +1001,87 @@ class DeepArrayTransformationSuite extends FunSuite with SparkTestBase {
10011001
assertResults(actualResults, expectedResults)
10021002
}
10031003

1004+
test("Test unstruct on the root level") {
1005+
val expectedSchema =
1006+
"""root
1007+
| |-- id: integer (nullable = false)
1008+
| |-- name: string (nullable = true)
1009+
| |-- address: struct (nullable = true)
1010+
| | |-- city: string (nullable = true)
1011+
| | |-- street: string (nullable = true)
1012+
|""".stripMargin.replace("\r\n", "\n")
1013+
val expectedResults =
1014+
"""{"id":1,"name":"Martin","address":{"city":"Olomuc","street":"Vodickova"}}
1015+
|{"id":1,"name":"Petr","address":{"city":"Ostrava","street":"Vlavska"}}
1016+
|{"id":1,"name":"Vojta","address":{"city":"Plzen","street":"Kralova"}}"""
1017+
.stripMargin.replace("\r\n", "\n")
1018+
1019+
val df = spark.sparkContext.parallelize(structOfStructSampleN).toDF
1020+
1021+
val dfOut = df
1022+
.nestedUnstruct("employee")
1023+
1024+
val actualSchema = dfOut.schema.treeString
1025+
val actualResults = dfOut.toJSON.collect.mkString("\n")
1026+
1027+
assertSchema(actualSchema, expectedSchema)
1028+
assertResults(actualResults, expectedResults)
1029+
}
1030+
1031+
test("Test unstruct on the nested level") {
1032+
val expectedSchema =
1033+
"""root
1034+
| |-- id: integer (nullable = false)
1035+
| |-- employee: struct (nullable = false)
1036+
| | |-- name: string (nullable = true)
1037+
| | |-- city: string (nullable = true)
1038+
| | |-- street: string (nullable = true)
1039+
|""".stripMargin.replace("\r\n", "\n")
1040+
val expectedResults =
1041+
"""{"id":1,"employee":{"name":"Martin","city":"Olomuc","street":"Vodickova"}}
1042+
|{"id":1,"employee":{"name":"Petr","city":"Ostrava","street":"Vlavska"}}
1043+
|{"id":1,"employee":{"name":"Vojta","city":"Plzen","street":"Kralova"}}"""
1044+
.stripMargin.replace("\r\n", "\n")
1045+
1046+
val df = spark.sparkContext.parallelize(structOfStructSampleN).toDF
1047+
1048+
val dfOut = df
1049+
.nestedUnstruct("employee.address")
1050+
1051+
val actualSchema = dfOut.schema.treeString
1052+
val actualResults = dfOut.toJSON.collect.mkString("\n")
1053+
1054+
assertSchema(actualSchema, expectedSchema)
1055+
assertResults(actualResults, expectedResults)
1056+
}
1057+
1058+
test("Test unstruct 2 times") {
1059+
val expectedSchema =
1060+
"""root
1061+
| |-- id: integer (nullable = false)
1062+
| |-- name: string (nullable = true)
1063+
| |-- city: string (nullable = true)
1064+
| |-- street: string (nullable = true)
1065+
|""".stripMargin.replace("\r\n", "\n")
1066+
val expectedResults =
1067+
"""{"id":1,"name":"Martin","city":"Olomuc","street":"Vodickova"}
1068+
|{"id":1,"name":"Petr","city":"Ostrava","street":"Vlavska"}
1069+
|{"id":1,"name":"Vojta","city":"Plzen","street":"Kralova"}"""
1070+
.stripMargin.replace("\r\n", "\n")
1071+
1072+
val df = spark.sparkContext.parallelize(structOfStructSampleN).toDF
1073+
1074+
val dfOut = df
1075+
.nestedUnstruct("employee.address")
1076+
.nestedUnstruct("employee")
1077+
1078+
val actualSchema = dfOut.schema.treeString
1079+
val actualResults = dfOut.toJSON.collect.mkString("\n")
1080+
1081+
assertSchema(actualSchema, expectedSchema)
1082+
assertResults(actualResults, expectedResults)
1083+
}
1084+
10041085
test("Test unstruct within an array") {
10051086
val expectedSchema =
10061087
"""root

0 commit comments

Comments
 (0)