1
1
package org.jetbrains.kotlinx.dataframe.io
2
2
3
+ import com.beust.klaxon.JsonArray
4
+ import com.beust.klaxon.JsonObject
3
5
import io.kotest.matchers.collections.shouldBeIn
4
6
import io.kotest.matchers.shouldBe
5
7
import io.kotest.matchers.string.shouldContain
@@ -22,10 +24,19 @@ import org.jetbrains.kotlinx.dataframe.api.schema
22
24
import org.jetbrains.kotlinx.dataframe.api.toDouble
23
25
import org.jetbrains.kotlinx.dataframe.api.toMap
24
26
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
27
+ import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
25
28
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
26
29
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
27
30
import org.jetbrains.kotlinx.dataframe.impl.nothingType
28
31
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.*
32
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.COLUMNS
33
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.DATA
34
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.KIND
35
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.KOTLIN_DATAFRAME
36
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.METADATA
37
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.NCOL
38
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.NROW
39
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.VERSION
29
40
import org.jetbrains.kotlinx.dataframe.type
30
41
import org.jetbrains.kotlinx.dataframe.values
31
42
import org.junit.Test
@@ -951,4 +962,97 @@ class JsonTests {
951
962
val df = dataFrameOf(" a" , " b" )(" 1" , null , " 2" , 12 )
952
963
df.toJson(canonical = true ) shouldContain " \" b\" :null"
953
964
}
965
+
966
+ @Test
967
+ @Suppress(" UNCHECKED_CAST" )
968
+ fun `json with metadata flat table` () {
969
+ @Language(" json" )
970
+ val data = """
971
+ [{"id":3602279,"node_id":"MDEwOlJlcG9zaXRvcnkzNjAyMjc5","name":"kotlin-web-demo","full_name":"JetBrains/kotlin-web-demo"}]
972
+ """ .trimIndent()
973
+ val df = DataFrame .readJsonStr(data)
974
+ val json = df.toJsonWithMetadata(df.rowsCount())
975
+ json[VERSION ] shouldBe SERIALIZATION_VERSION
976
+
977
+ val metadata = (json[METADATA ] as JsonObject )
978
+ metadata[NROW ] shouldBe 1
979
+ metadata[NCOL ] shouldBe 4
980
+ val columns = metadata[COLUMNS ] as List <String >
981
+ columns shouldBe listOf (" id" , " node_id" , " name" , " full_name" )
982
+
983
+ val decodedData = json[KOTLIN_DATAFRAME ] as JsonArray <* >
984
+ val decodedDf = DataFrame .readJsonStr(decodedData.toJsonString())
985
+ decodedDf shouldBe df
986
+ }
987
+
988
+ @Test
989
+ fun `json with metadata column group` () {
990
+ @Language(" json" )
991
+ val data = """
992
+ [{"permissions":{"admin":false,"maintain":false,"push":false,"triage":false,"pull":true}}]
993
+ """ .trimIndent()
994
+ val df = DataFrame .readJsonStr(data)
995
+ val json = df.toJsonWithMetadata(df.rowsCount())
996
+
997
+ val row = (json[KOTLIN_DATAFRAME ] as JsonArray <* >)[0 ] as JsonObject
998
+
999
+ val permissions = row[" permissions" ] as JsonObject
1000
+ val metadata = permissions[METADATA ] as JsonObject
1001
+ metadata[KIND ] shouldBe ColumnKind .Group .name
1002
+
1003
+ val decodedData = permissions[DATA ] as JsonObject
1004
+
1005
+ decodedData[" admin" ] shouldBe false
1006
+ decodedData[" maintain" ] shouldBe false
1007
+ decodedData[" push" ] shouldBe false
1008
+ decodedData[" triage" ] shouldBe false
1009
+ decodedData[" pull" ] shouldBe true
1010
+ }
1011
+
1012
+ @Test
1013
+ fun `json with metadata frame column` () {
1014
+ @Language(" json" )
1015
+ val data = """
1016
+ [{"contributors":[{"login":"satamas","id":5521317,"node_id":"MDQ6VXNlcjU1MjEzMTc=","gravatar_id":"","url":"https://api.github.com/users/satamas","type":"User","site_admin":false,"contributions":998},{"login":"NataliaUkhorskaya","id":968879,"node_id":"MDQ6VXNlcjk2ODg3OQ==","gravatar_id":"","url":"https://api.github.com/users/NataliaUkhorskaya","type":"User","site_admin":false,"contributions":371},{"login":"AlexanderPrendota","id":10503748,"node_id":"MDQ6VXNlcjEwNTAzNzQ4","gravatar_id":"","url":"https://api.github.com/users/AlexanderPrendota","type":"User","site_admin":false,"contributions":190},{"login":"svtk","id":1447386,"node_id":"MDQ6VXNlcjE0NDczODY=","gravatar_id":"","url":"https://api.github.com/users/svtk","type":"User","site_admin":false,"contributions":53},{"login":"zarechenskiy","id":3757088,"node_id":"MDQ6VXNlcjM3NTcwODg=","gravatar_id":"","url":"https://api.github.com/users/zarechenskiy","type":"User","site_admin":false,"contributions":18},{"login":"abreslav","id":888318,"node_id":"MDQ6VXNlcjg4ODMxOA==","gravatar_id":"","url":"https://api.github.com/users/abreslav","type":"User","site_admin":false,"contributions":13},{"login":"yole","id":46553,"node_id":"MDQ6VXNlcjQ2NTUz","gravatar_id":"","url":"https://api.github.com/users/yole","type":"User","site_admin":false,"contributions":11},{"login":"zoobestik","id":242514,"node_id":"MDQ6VXNlcjI0MjUxNA==","gravatar_id":"","url":"https://api.github.com/users/zoobestik","type":"User","site_admin":false,"contributions":5},{"login":"ilya-g","id":4257577,"node_id":"MDQ6VXNlcjQyNTc1Nzc=","gravatar_id":"","url":"https://api.github.com/users/ilya-g","type":"User","site_admin":false,"contributions":5},{"login":"pTalanov","id":442640,"node_id":"MDQ6VXNlcjQ0MjY0MA==","gravatar_id":"","url":"https://api.github.com/users/pTalanov","type":"User","site_admin":false,"contributions":4},{"login":"bashor","id":485321,"node_id":"MDQ6VXNlcjQ4NTMyMQ==","gravatar_id":"","url":"https://api.github.com/users/bashor","type":"User","site_admin":false,"contributions":3},{"login":"nikpachoo","id":3338311,"node_id":"MDQ6VXNlcjMzMzgzMTE=","gravatar_id":"","url":"https://api.github.com/users/nikpachoo","type":"User","site_admin":false,"contributions":3},{"login":"udalov","id":292714,"node_id":"MDQ6VXNlcjI5MjcxNA==","gravatar_id":"","url":"https://api.github.com/users/udalov","type":"User","site_admin":false,"contributions":2},{"login":"anton-bannykh","id":1115872,"node_id":"MDQ6VXNlcjExMTU4NzI=","gravatar_id":"","url":"https://api.github.com/users/anton-bannykh","type":"User","site_admin":false,"contributions":2},{"login":"rayshade","id":5259872,"node_id":"MDQ6VXNlcjUyNTk4NzI=","gravatar_id":"","url":"https://api.github.com/users/rayshade","type":"User","site_admin":false,"contributions":2},{"login":"yu-ishicawa","id":843678,"node_id":"MDQ6VXNlcjg0MzY3OA==","gravatar_id":"","url":"https://api.github.com/users/yu-ishicawa","type":"User","site_admin":false,"contributions":2},{"login":"gildor","id":186017,"node_id":"MDQ6VXNlcjE4NjAxNw==","gravatar_id":"","url":"https://api.github.com/users/gildor","type":"User","site_admin":false,"contributions":1},{"login":"AndreOnCrypto","id":3066457,"node_id":"MDQ6VXNlcjMwNjY0NTc=","gravatar_id":"","url":"https://api.github.com/users/AndreOnCrypto","type":"User","site_admin":false,"contributions":1},{"login":"DipanshKhandelwal","id":24923974,"node_id":"MDQ6VXNlcjI0OTIzOTc0","gravatar_id":"","url":"https://api.github.com/users/DipanshKhandelwal","type":"User","site_admin":false,"contributions":1},{"login":"dsavvinov","id":6999635,"node_id":"MDQ6VXNlcjY5OTk2MzU=","gravatar_id":"","url":"https://api.github.com/users/dsavvinov","type":"User","site_admin":false,"contributions":1},{"login":"Noia","id":397736,"node_id":"MDQ6VXNlcjM5NzczNg==","gravatar_id":"","url":"https://api.github.com/users/Noia","type":"User","site_admin":false,"contributions":1},{"login":"gzoritchak","id":1110254,"node_id":"MDQ6VXNlcjExMTAyNTQ=","gravatar_id":"","url":"https://api.github.com/users/gzoritchak","type":"User","site_admin":false,"contributions":1},{"login":"Harmitage","id":44910736,"node_id":"MDQ6VXNlcjQ0OTEwNzM2","gravatar_id":"","url":"https://api.github.com/users/Harmitage","type":"User","site_admin":false,"contributions":1},{"login":"JLLeitschuh","id":1323708,"node_id":"MDQ6VXNlcjEzMjM3MDg=","gravatar_id":"","url":"https://api.github.com/users/JLLeitschuh","type":"User","site_admin":false,"contributions":1},{"login":"dalinaum","id":145585,"node_id":"MDQ6VXNlcjE0NTU4NQ==","gravatar_id":"","url":"https://api.github.com/users/dalinaum","type":"User","site_admin":false,"contributions":1},{"login":"robstoll","id":5557885,"node_id":"MDQ6VXNlcjU1NTc4ODU=","gravatar_id":"","url":"https://api.github.com/users/robstoll","type":"User","site_admin":false,"contributions":1},{"login":"tginsberg","id":432945,"node_id":"MDQ6VXNlcjQzMjk0NQ==","gravatar_id":"","url":"https://api.github.com/users/tginsberg","type":"User","site_admin":false,"contributions":1},{"login":"joeldudleyr3","id":24230167,"node_id":"MDQ6VXNlcjI0MjMwMTY3","gravatar_id":"","url":"https://api.github.com/users/joeldudleyr3","type":"User","site_admin":false,"contributions":1},{"login":"ligi","id":111600,"node_id":"MDQ6VXNlcjExMTYwMA==","gravatar_id":"","url":"https://api.github.com/users/ligi","type":"User","site_admin":false,"contributions":1}]}]
1017
+ """ .trimIndent()
1018
+ val df = DataFrame .readJsonStr(data)
1019
+ val json = df.toJsonWithMetadata(df.rowsCount())
1020
+ val row = (json[KOTLIN_DATAFRAME ] as JsonArray <* >)[0 ] as JsonObject
1021
+
1022
+ val contributors = row[" contributors" ] as JsonObject
1023
+
1024
+ val metadata = contributors[METADATA ] as JsonObject
1025
+ metadata[KIND ] shouldBe ColumnKind .Frame .name
1026
+ metadata[NCOL ] shouldBe 8
1027
+ metadata[NROW ] shouldBe 29
1028
+
1029
+ val decodedData = contributors[DATA ] as JsonArray <* >
1030
+ decodedData.size shouldBe 29
1031
+
1032
+ val decodedDf = DataFrame .readJsonStr(decodedData.toJsonString())
1033
+ decodedDf shouldBe df[0 ][" contributors" ] as AnyFrame
1034
+ }
1035
+
1036
+ @Test
1037
+ fun `json with metadata test row limit` () {
1038
+ @Language(" json" )
1039
+ val data = """
1040
+ [{"contributors":[{"login":"satamas","id":5521317,"node_id":"MDQ6VXNlcjU1MjEzMTc=","gravatar_id":"","url":"https://api.github.com/users/satamas","type":"User","site_admin":false,"contributions":998},{"login":"NataliaUkhorskaya","id":968879,"node_id":"MDQ6VXNlcjk2ODg3OQ==","gravatar_id":"","url":"https://api.github.com/users/NataliaUkhorskaya","type":"User","site_admin":false,"contributions":371},{"login":"AlexanderPrendota","id":10503748,"node_id":"MDQ6VXNlcjEwNTAzNzQ4","gravatar_id":"","url":"https://api.github.com/users/AlexanderPrendota","type":"User","site_admin":false,"contributions":190},{"login":"svtk","id":1447386,"node_id":"MDQ6VXNlcjE0NDczODY=","gravatar_id":"","url":"https://api.github.com/users/svtk","type":"User","site_admin":false,"contributions":53},{"login":"zarechenskiy","id":3757088,"node_id":"MDQ6VXNlcjM3NTcwODg=","gravatar_id":"","url":"https://api.github.com/users/zarechenskiy","type":"User","site_admin":false,"contributions":18},{"login":"abreslav","id":888318,"node_id":"MDQ6VXNlcjg4ODMxOA==","gravatar_id":"","url":"https://api.github.com/users/abreslav","type":"User","site_admin":false,"contributions":13},{"login":"yole","id":46553,"node_id":"MDQ6VXNlcjQ2NTUz","gravatar_id":"","url":"https://api.github.com/users/yole","type":"User","site_admin":false,"contributions":11},{"login":"zoobestik","id":242514,"node_id":"MDQ6VXNlcjI0MjUxNA==","gravatar_id":"","url":"https://api.github.com/users/zoobestik","type":"User","site_admin":false,"contributions":5},{"login":"ilya-g","id":4257577,"node_id":"MDQ6VXNlcjQyNTc1Nzc=","gravatar_id":"","url":"https://api.github.com/users/ilya-g","type":"User","site_admin":false,"contributions":5},{"login":"pTalanov","id":442640,"node_id":"MDQ6VXNlcjQ0MjY0MA==","gravatar_id":"","url":"https://api.github.com/users/pTalanov","type":"User","site_admin":false,"contributions":4},{"login":"bashor","id":485321,"node_id":"MDQ6VXNlcjQ4NTMyMQ==","gravatar_id":"","url":"https://api.github.com/users/bashor","type":"User","site_admin":false,"contributions":3},{"login":"nikpachoo","id":3338311,"node_id":"MDQ6VXNlcjMzMzgzMTE=","gravatar_id":"","url":"https://api.github.com/users/nikpachoo","type":"User","site_admin":false,"contributions":3},{"login":"udalov","id":292714,"node_id":"MDQ6VXNlcjI5MjcxNA==","gravatar_id":"","url":"https://api.github.com/users/udalov","type":"User","site_admin":false,"contributions":2},{"login":"anton-bannykh","id":1115872,"node_id":"MDQ6VXNlcjExMTU4NzI=","gravatar_id":"","url":"https://api.github.com/users/anton-bannykh","type":"User","site_admin":false,"contributions":2},{"login":"rayshade","id":5259872,"node_id":"MDQ6VXNlcjUyNTk4NzI=","gravatar_id":"","url":"https://api.github.com/users/rayshade","type":"User","site_admin":false,"contributions":2},{"login":"yu-ishicawa","id":843678,"node_id":"MDQ6VXNlcjg0MzY3OA==","gravatar_id":"","url":"https://api.github.com/users/yu-ishicawa","type":"User","site_admin":false,"contributions":2},{"login":"gildor","id":186017,"node_id":"MDQ6VXNlcjE4NjAxNw==","gravatar_id":"","url":"https://api.github.com/users/gildor","type":"User","site_admin":false,"contributions":1},{"login":"AndreOnCrypto","id":3066457,"node_id":"MDQ6VXNlcjMwNjY0NTc=","gravatar_id":"","url":"https://api.github.com/users/AndreOnCrypto","type":"User","site_admin":false,"contributions":1},{"login":"DipanshKhandelwal","id":24923974,"node_id":"MDQ6VXNlcjI0OTIzOTc0","gravatar_id":"","url":"https://api.github.com/users/DipanshKhandelwal","type":"User","site_admin":false,"contributions":1},{"login":"dsavvinov","id":6999635,"node_id":"MDQ6VXNlcjY5OTk2MzU=","gravatar_id":"","url":"https://api.github.com/users/dsavvinov","type":"User","site_admin":false,"contributions":1},{"login":"Noia","id":397736,"node_id":"MDQ6VXNlcjM5NzczNg==","gravatar_id":"","url":"https://api.github.com/users/Noia","type":"User","site_admin":false,"contributions":1},{"login":"gzoritchak","id":1110254,"node_id":"MDQ6VXNlcjExMTAyNTQ=","gravatar_id":"","url":"https://api.github.com/users/gzoritchak","type":"User","site_admin":false,"contributions":1},{"login":"Harmitage","id":44910736,"node_id":"MDQ6VXNlcjQ0OTEwNzM2","gravatar_id":"","url":"https://api.github.com/users/Harmitage","type":"User","site_admin":false,"contributions":1},{"login":"JLLeitschuh","id":1323708,"node_id":"MDQ6VXNlcjEzMjM3MDg=","gravatar_id":"","url":"https://api.github.com/users/JLLeitschuh","type":"User","site_admin":false,"contributions":1},{"login":"dalinaum","id":145585,"node_id":"MDQ6VXNlcjE0NTU4NQ==","gravatar_id":"","url":"https://api.github.com/users/dalinaum","type":"User","site_admin":false,"contributions":1},{"login":"robstoll","id":5557885,"node_id":"MDQ6VXNlcjU1NTc4ODU=","gravatar_id":"","url":"https://api.github.com/users/robstoll","type":"User","site_admin":false,"contributions":1},{"login":"tginsberg","id":432945,"node_id":"MDQ6VXNlcjQzMjk0NQ==","gravatar_id":"","url":"https://api.github.com/users/tginsberg","type":"User","site_admin":false,"contributions":1},{"login":"joeldudleyr3","id":24230167,"node_id":"MDQ6VXNlcjI0MjMwMTY3","gravatar_id":"","url":"https://api.github.com/users/joeldudleyr3","type":"User","site_admin":false,"contributions":1},{"login":"ligi","id":111600,"node_id":"MDQ6VXNlcjExMTYwMA==","gravatar_id":"","url":"https://api.github.com/users/ligi","type":"User","site_admin":false,"contributions":1}]}]
1041
+ """ .trimIndent()
1042
+ val df = DataFrame .readJsonStr(data)
1043
+
1044
+ val nestedFrameRowLimit = 20
1045
+ val json = df.toJsonWithMetadata(df.rowsCount(), nestedFrameRowLimit)
1046
+ val row = (json[KOTLIN_DATAFRAME ] as JsonArray <* >)[0 ] as JsonObject
1047
+
1048
+ val contributors = row[" contributors" ] as JsonObject
1049
+
1050
+ val metadata = contributors[METADATA ] as JsonObject
1051
+ metadata[KIND ] shouldBe ColumnKind .Frame .name
1052
+ metadata[NCOL ] shouldBe 8
1053
+ metadata[NROW ] shouldBe 29
1054
+
1055
+ val decodedData = contributors[DATA ] as JsonArray <* >
1056
+ decodedData.size shouldBe nestedFrameRowLimit
1057
+ }
954
1058
}
0 commit comments