Skip to content

Commit 15a55e5

Browse files
committed
Update serialization for rendering of dataframe in Kotlin notebooks plugin
* Added the necessary metadata to the serialization format of the dataframe for it to be rendered in the Kotlin notebooks plugin. * Introduced the method `toJsonWithMetadata` which includes the metadata during the serialization process of the dataframe.
1 parent 8ac0ace commit 15a55e5

File tree

4 files changed

+288
-118
lines changed

4 files changed

+288
-118
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 134 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
4949
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic
5050
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
5151
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
52+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.COLUMNS
53+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.DATA
54+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.KIND
55+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.KOTLIN_DATAFRAME
56+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.METADATA
57+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.NCOL
58+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.NROW
59+
import org.jetbrains.kotlinx.dataframe.io.SerializationKeys.VERSION
5260
import org.jetbrains.kotlinx.dataframe.ncol
5361
import org.jetbrains.kotlinx.dataframe.nrow
5462
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
@@ -859,42 +867,103 @@ private val valueTypes =
859867

860868
internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? {
861869
val values = frame.columns().map { col ->
862-
when {
863-
col is ColumnGroup<*> -> encodeRow(col, index)
864-
col is FrameColumn<*> -> encodeFrame(col[index])
865-
col.isList() -> {
866-
col[index]?.let { array(it as List<*>) } ?: array()
867-
}
870+
when (col) {
871+
is ColumnGroup<*> -> encodeRow(col, index)
872+
is FrameColumn<*> -> encodeFrame(col[index])
873+
else -> encodePrimitiveData(col, index)
874+
}.let { col.name to it }
875+
}
876+
if (values.isEmpty()) return null
877+
return obj(values)
878+
}
868879

869-
col.typeClass in valueTypes -> {
870-
val v = col[index]
871-
if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) {
872-
v.toString()
873-
} else v
880+
internal object SerializationKeys {
881+
const val DATA = "data"
882+
const val METADATA = "metadata"
883+
const val KIND = "kind"
884+
const val NCOL = "ncol"
885+
const val NROW = "nrow"
886+
const val VERSION = "\$version"
887+
const val COLUMNS = "columns"
888+
const val KOTLIN_DATAFRAME = "kotlin_dataframe"
889+
}
890+
891+
internal const val SERIALIZATION_VERSION = "2.0.0"
892+
893+
internal fun KlaxonJson.encodeRowWithMetadata(
894+
frame: ColumnsContainer<*>,
895+
index: Int,
896+
rowLimit: Int? = null
897+
): JsonObject? {
898+
val values = frame.columns().map { col ->
899+
when (col) {
900+
is ColumnGroup<*> -> obj(
901+
DATA to encodeRowWithMetadata(col, index, rowLimit),
902+
METADATA to obj(KIND to ColumnKind.Group.name)
903+
)
904+
905+
is FrameColumn<*> -> {
906+
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index])
907+
else encodeFrameWithMetadata(col[index].rows().take(rowLimit).toDataFrame(), rowLimit)
908+
obj(
909+
DATA to data,
910+
METADATA to obj(
911+
KIND to ColumnKind.Frame.name,
912+
NCOL to col[index].ncol,
913+
NROW to col[index].nrow
914+
)
915+
)
874916
}
875917

876-
else -> col[index]?.toString()
918+
else -> encodePrimitiveData(col, index)
877919
}.let { col.name to it }
878920
}
879921
if (values.isEmpty()) return null
880922
return obj(values)
881923
}
882924

883-
internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> {
884-
val allColumns = frame.columns()
925+
internal fun KlaxonJson.encodePrimitiveData(col: AnyCol, index: Int): Any? = when {
926+
col.isList() -> col[index]?.let { array(it as List<*>) } ?: array()
927+
col.typeClass in valueTypes -> {
928+
val v = col[index]
929+
if ((v is Double && v.isNaN()) || (v is Float && v.isNaN())) {
930+
v.toString()
931+
} else v
932+
}
933+
934+
else -> col[index]?.toString()
935+
}
936+
937+
internal fun KlaxonJson.encodeFrameWithMetadata(frame: AnyFrame, rowLimit: Int? = null): JsonArray<*> {
938+
val valueColumn = frame.extractValueColumn()
939+
val arrayColumn = frame.extractArrayColumn()
940+
941+
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame
885942

886-
// if there is only 1 column, then `isValidValueColumn` always true.
887-
// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column
888-
// because it was created by user.
889-
val isPossibleToFindUnnamedColumns = allColumns.size != 1
890-
val valueColumn = allColumns.filter { it.name.startsWith(valueColumnName) }
943+
val data = frame.indices().map { rowIndex ->
944+
valueColumn
945+
?.get(rowIndex)
946+
?: arrayColumn?.get(rowIndex)
947+
?.let {
948+
if (arraysAreFrames) encodeFrameWithMetadata(it as AnyFrame, rowLimit) else null
949+
}
950+
?: encodeRowWithMetadata(frame, rowIndex, rowLimit)
951+
}
952+
953+
return array(data)
954+
}
955+
956+
internal fun AnyFrame.extractValueColumn(): DataColumn<*>? {
957+
val allColumns = columns()
958+
959+
return allColumns.filter { it.name.startsWith(valueColumnName) }
891960
.takeIf { isPossibleToFindUnnamedColumns }
892961
?.maxByOrNull { it.name }?.let { valueCol ->
893962
if (valueCol.kind() != ColumnKind.Value) { // check that value in this column is not null only when other values are null
894963
null
895964
} else {
896965
// check that value in this column is not null only when other values are null
897-
val isValidValueColumn = frame.rows().all { row ->
966+
val isValidValueColumn = rows().all { row ->
898967
if (valueCol[row] != null) {
899968
allColumns.all { col ->
900969
if (col.name != valueCol.name) col[row] == null
@@ -906,14 +975,24 @@ internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> {
906975
else null
907976
}
908977
}
978+
}
979+
980+
// if there is only 1 column, then `isValidValueColumn` always true.
981+
// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like unnamed column
982+
// because it was created by user.
983+
internal val AnyFrame.isPossibleToFindUnnamedColumns: Boolean
984+
get() = columns().size != 1
985+
986+
internal fun AnyFrame.extractArrayColumn(): DataColumn<*>? {
987+
val allColumns = columns()
909988

910-
val arrayColumn = frame.columns().filter { it.name.startsWith(arrayColumnName) }
989+
return columns().filter { it.name.startsWith(arrayColumnName) }
911990
.takeIf { isPossibleToFindUnnamedColumns }
912991
?.maxByOrNull { it.name }?.let { arrayCol ->
913992
if (arrayCol.kind() == ColumnKind.Group) null
914993
else {
915994
// check that value in this column is not null only when other values are null
916-
val isValidArrayColumn = frame.rows().all { row ->
995+
val isValidArrayColumn = rows().all { row ->
917996
if (arrayCol[row] != null) {
918997
allColumns.all { col ->
919998
if (col.name != arrayCol.name) col[row] == null
@@ -925,13 +1004,24 @@ internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> {
9251004
else null
9261005
}
9271006
}
1007+
}
1008+
1009+
internal fun KlaxonJson.encodeFrame(frame: AnyFrame): JsonArray<*> {
1010+
val valueColumn = frame.extractValueColumn()
1011+
val arrayColumn = frame.extractArrayColumn()
9281012

9291013
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame
9301014

9311015
val data = frame.indices().map { rowIndex ->
932-
valueColumn?.get(rowIndex) ?: arrayColumn?.get(rowIndex)
933-
?.let { if (arraysAreFrames) encodeFrame(it as AnyFrame) else null } ?: encodeRow(frame, rowIndex)
1016+
valueColumn
1017+
?.get(rowIndex)
1018+
?: arrayColumn?.get(rowIndex)
1019+
?.let {
1020+
if (arraysAreFrames) encodeFrame(it as AnyFrame) else null
1021+
}
1022+
?: encodeRow(frame, rowIndex)
9341023
}
1024+
9351025
return array(data)
9361026
}
9371027

@@ -941,6 +1031,26 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false, canonical: Boolean = fa
9411031
}.toJsonString(prettyPrint, canonical)
9421032
}
9431033

1034+
public fun AnyFrame.toJsonWithMetadata(
1035+
rowLimit: Int,
1036+
nestedRowLimit: Int? = null
1037+
): JsonObject {
1038+
return json {
1039+
obj(
1040+
VERSION to SERIALIZATION_VERSION,
1041+
METADATA to obj(
1042+
COLUMNS to columnNames(),
1043+
NROW to rowsCount(),
1044+
NCOL to columnsCount()
1045+
),
1046+
KOTLIN_DATAFRAME to encodeFrameWithMetadata(
1047+
rows().take(rowLimit).toDataFrame(),
1048+
rowLimit = nestedRowLimit
1049+
),
1050+
)
1051+
}
1052+
}
1053+
9441054
public fun AnyRow.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String {
9451055
return json {
9461056
encodeRow(df(), index())

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
package org.jetbrains.kotlinx.dataframe.jupyter
22

3-
import com.beust.klaxon.json
4-
import org.jetbrains.kotlinx.dataframe.api.rows
5-
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
63
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
74
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
8-
import org.jetbrains.kotlinx.dataframe.io.encodeFrame
95
import org.jetbrains.kotlinx.dataframe.io.toHTML
6+
import org.jetbrains.kotlinx.dataframe.io.toJsonWithMetadata
107
import org.jetbrains.kotlinx.dataframe.io.toStaticHtml
118
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame
129
import org.jetbrains.kotlinx.dataframe.nrow
13-
import org.jetbrains.kotlinx.dataframe.size
1410
import org.jetbrains.kotlinx.jupyter.api.HtmlData
1511
import org.jetbrains.kotlinx.jupyter.api.JupyterClientType
1612
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelVersion
@@ -60,21 +56,18 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
6056
val staticHtml = df.toStaticHtml(reifiedDisplayConfiguration, DefaultCellRenderer).toJupyterHtmlData()
6157

6258
if (notebook.kernelVersion >= KotlinKernelVersion.from(MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI)!!) {
63-
val jsonEncodedDf = json {
64-
obj(
65-
"nrow" to df.size.nrow,
66-
"ncol" to df.size.ncol,
67-
"columns" to df.columnNames(),
68-
"kotlin_dataframe" to encodeFrame(df.rows().take(limit).toDataFrame()),
69-
)
70-
}.toJsonString()
71-
notebook.renderAsIFrameAsNeeded(html, staticHtml, jsonEncodedDf)
59+
val jsonEncodedDf = df.toJsonWithMetadata(limit, reifiedDisplayConfiguration.rowsLimit)
60+
notebook.renderAsIFrameAsNeeded(html, staticHtml, jsonEncodedDf.toJsonString())
7261
} else {
7362
notebook.renderHtmlAsIFrameIfNeeded(html)
7463
}
7564
}
7665

77-
internal fun Notebook.renderAsIFrameAsNeeded(data: HtmlData, staticData: HtmlData, jsonEncodedDf: String): MimeTypedResult {
66+
internal fun Notebook.renderAsIFrameAsNeeded(
67+
data: HtmlData,
68+
staticData: HtmlData,
69+
jsonEncodedDf: String
70+
): MimeTypedResult {
7871
val textHtml = if (jupyterClientType == JupyterClientType.KOTLIN_NOTEBOOK) {
7972
data.generateIframePlaneText(currentColorScheme) +
8073
staticData.toString(currentColorScheme)

0 commit comments

Comments
 (0)