@@ -49,6 +49,14 @@ import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
49
49
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic
50
50
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
51
51
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
52
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.COLUMNS
53
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.DATA
54
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.KIND
55
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.KOTLIN_DATAFRAME
56
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.METADATA
57
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.NCOL
58
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.NROW
59
+ import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.VERSION
52
60
import org.jetbrains.kotlinx.dataframe.ncol
53
61
import org.jetbrains.kotlinx.dataframe.nrow
54
62
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
@@ -859,42 +867,103 @@ private val valueTypes =
859
867
860
868
internal fun KlaxonJson.encodeRow (frame : ColumnsContainer <* >, index : Int ): JsonObject ? {
861
869
val values = frame.columns().map { col ->
862
- when {
863
- col is ColumnGroup <* > -> encodeRow(col, index)
864
- col is FrameColumn <* > -> encodeFrame(col[index])
865
- col.isList() -> {
866
- col[index]?.let { array(it as List <* >) } ? : array()
867
- }
870
+ when (col) {
871
+ is ColumnGroup <* > -> encodeRow(col, index)
872
+ is FrameColumn <* > -> encodeFrame(col[index])
873
+ else -> encodePrimitiveData(col, index)
874
+ }.let { col.name to it }
875
+ }
876
+ if (values.isEmpty()) return null
877
+ return obj(values)
878
+ }
868
879
869
- col.typeClass in valueTypes -> {
870
- val v = col[index]
871
- if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) {
872
- v.toString()
873
- } else v
880
+ internal object SerializationKeys {
881
+ const val DATA = " data"
882
+ const val METADATA = " metadata"
883
+ const val KIND = " kind"
884
+ const val NCOL = " ncol"
885
+ const val NROW = " nrow"
886
+ const val VERSION = " \$ version"
887
+ const val COLUMNS = " columns"
888
+ const val KOTLIN_DATAFRAME = " kotlin_dataframe"
889
+ }
890
+
891
+ internal const val SERIALIZATION_VERSION = " 2.0.0"
892
+
893
+ internal fun KlaxonJson.encodeRowWithMetadata (
894
+ frame : ColumnsContainer <* >,
895
+ index : Int ,
896
+ rowLimit : Int? = null
897
+ ): JsonObject ? {
898
+ val values = frame.columns().map { col ->
899
+ when (col) {
900
+ is ColumnGroup <* > -> obj(
901
+ DATA to encodeRowWithMetadata(col, index, rowLimit),
902
+ METADATA to obj(KIND to ColumnKind .Group .name)
903
+ )
904
+
905
+ is FrameColumn <* > -> {
906
+ val data = if (rowLimit == null ) encodeFrameWithMetadata(col[index])
907
+ else encodeFrameWithMetadata(col[index].rows().take(rowLimit).toDataFrame(), rowLimit)
908
+ obj(
909
+ DATA to data,
910
+ METADATA to obj(
911
+ KIND to ColumnKind .Frame .name,
912
+ NCOL to col[index].ncol,
913
+ NROW to col[index].nrow
914
+ )
915
+ )
874
916
}
875
917
876
- else -> col[ index]?.toString( )
918
+ else -> encodePrimitiveData( col, index)
877
919
}.let { col.name to it }
878
920
}
879
921
if (values.isEmpty()) return null
880
922
return obj(values)
881
923
}
882
924
883
- internal fun KlaxonJson.encodeFrame (frame : AnyFrame ): JsonArray <* > {
884
- val allColumns = frame.columns()
925
+ internal fun KlaxonJson.encodePrimitiveData (col : AnyCol , index : Int ): Any? = when {
926
+ col.isList() -> col[index]?.let { array(it as List <* >) } ? : array()
927
+ col.typeClass in valueTypes -> {
928
+ val v = col[index]
929
+ if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) {
930
+ v.toString()
931
+ } else v
932
+ }
933
+
934
+ else -> col[index]?.toString()
935
+ }
936
+
937
+ internal fun KlaxonJson.encodeFrameWithMetadata (frame : AnyFrame , rowLimit : Int? = null): JsonArray <* > {
938
+ val valueColumn = frame.extractValueColumn()
939
+ val arrayColumn = frame.extractArrayColumn()
940
+
941
+ val arraysAreFrames = arrayColumn?.kind() == ColumnKind .Frame
885
942
886
- // if there is only 1 column, then `isValidValueColumn` always true.
887
- // But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column
888
- // because it was created by user.
889
- val isPossibleToFindUnnamedColumns = allColumns.size != 1
890
- val valueColumn = allColumns.filter { it.name.startsWith(valueColumnName) }
943
+ val data = frame.indices().map { rowIndex ->
944
+ valueColumn
945
+ ?.get(rowIndex)
946
+ ? : arrayColumn?.get(rowIndex)
947
+ ?.let {
948
+ if (arraysAreFrames) encodeFrameWithMetadata(it as AnyFrame , rowLimit) else null
949
+ }
950
+ ? : encodeRowWithMetadata(frame, rowIndex, rowLimit)
951
+ }
952
+
953
+ return array(data)
954
+ }
955
+
956
+ internal fun AnyFrame.extractValueColumn (): DataColumn <* >? {
957
+ val allColumns = columns()
958
+
959
+ return allColumns.filter { it.name.startsWith(valueColumnName) }
891
960
.takeIf { isPossibleToFindUnnamedColumns }
892
961
?.maxByOrNull { it.name }?.let { valueCol ->
893
962
if (valueCol.kind() != ColumnKind .Value ) { // check that value in this column is not null only when other values are null
894
963
null
895
964
} else {
896
965
// check that value in this column is not null only when other values are null
897
- val isValidValueColumn = frame. rows().all { row ->
966
+ val isValidValueColumn = rows().all { row ->
898
967
if (valueCol[row] != null ) {
899
968
allColumns.all { col ->
900
969
if (col.name != valueCol.name) col[row] == null
@@ -906,14 +975,24 @@ internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> {
906
975
else null
907
976
}
908
977
}
978
+ }
979
+
980
+ // if there is only 1 column, then `isValidValueColumn` always true.
981
+ // But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column
982
+ // because it was created by user.
983
+ internal val AnyFrame .isPossibleToFindUnnamedColumns: Boolean
984
+ get() = columns().size != 1
985
+
986
+ internal fun AnyFrame.extractArrayColumn (): DataColumn <* >? {
987
+ val allColumns = columns()
909
988
910
- val arrayColumn = frame. columns().filter { it.name.startsWith(arrayColumnName) }
989
+ return columns().filter { it.name.startsWith(arrayColumnName) }
911
990
.takeIf { isPossibleToFindUnnamedColumns }
912
991
?.maxByOrNull { it.name }?.let { arrayCol ->
913
992
if (arrayCol.kind() == ColumnKind .Group ) null
914
993
else {
915
994
// check that value in this column is not null only when other values are null
916
- val isValidArrayColumn = frame. rows().all { row ->
995
+ val isValidArrayColumn = rows().all { row ->
917
996
if (arrayCol[row] != null ) {
918
997
allColumns.all { col ->
919
998
if (col.name != arrayCol.name) col[row] == null
@@ -925,13 +1004,24 @@ internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> {
925
1004
else null
926
1005
}
927
1006
}
1007
+ }
1008
+
1009
+ internal fun KlaxonJson.encodeFrame (frame : AnyFrame ): JsonArray <* > {
1010
+ val valueColumn = frame.extractValueColumn()
1011
+ val arrayColumn = frame.extractArrayColumn()
928
1012
929
1013
val arraysAreFrames = arrayColumn?.kind() == ColumnKind .Frame
930
1014
931
1015
val data = frame.indices().map { rowIndex ->
932
- valueColumn?.get(rowIndex) ? : arrayColumn?.get(rowIndex)
933
- ?.let { if (arraysAreFrames) encodeFrame(it as AnyFrame ) else null } ? : encodeRow(frame, rowIndex)
1016
+ valueColumn
1017
+ ?.get(rowIndex)
1018
+ ? : arrayColumn?.get(rowIndex)
1019
+ ?.let {
1020
+ if (arraysAreFrames) encodeFrame(it as AnyFrame ) else null
1021
+ }
1022
+ ? : encodeRow(frame, rowIndex)
934
1023
}
1024
+
935
1025
return array(data)
936
1026
}
937
1027
@@ -941,6 +1031,26 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false, canonical: Boolean = fa
941
1031
}.toJsonString(prettyPrint, canonical)
942
1032
}
943
1033
1034
+ public fun AnyFrame.toJsonWithMetadata (
1035
+ rowLimit : Int ,
1036
+ nestedRowLimit : Int? = null
1037
+ ): JsonObject {
1038
+ return json {
1039
+ obj(
1040
+ VERSION to SERIALIZATION_VERSION ,
1041
+ METADATA to obj(
1042
+ COLUMNS to columnNames(),
1043
+ NROW to rowsCount(),
1044
+ NCOL to columnsCount()
1045
+ ),
1046
+ KOTLIN_DATAFRAME to encodeFrameWithMetadata(
1047
+ rows().take(rowLimit).toDataFrame(),
1048
+ rowLimit = nestedRowLimit
1049
+ ),
1050
+ )
1051
+ }
1052
+ }
1053
+
944
1054
public fun AnyRow.toJson (prettyPrint : Boolean = false, canonical : Boolean = false): String {
945
1055
return json {
946
1056
encodeRow(df(), index())
0 commit comments