Skip to content

Commit d4aa42f

Browse files
committed
Refactor encoding options to use custom encoders
Replace `EncodingOptions` with `CustomEncoder` to improve flexibility and clarity in the JSON serialization process. This update includes introducing `CustomEncoder` interface and dedicated encoders for DataFrameConvertable and BufferedImage, alongside necessary adjustments across related classes and tests.
1 parent 6a999b0 commit d4aa42f

File tree

13 files changed

+221
-187
lines changed

13 files changed

+221
-187
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/CellKind.kt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
package org.jetbrains.kotlinx.dataframe.columns
22

3-
public enum class CellKind {
3+
/**
4+
* Represents special kinds of elements that can be found within a Column.
5+
* This is similar to the [ColumnKind], but it applies to specific elements of the Column.
6+
* Its main use is to provide metadata during serialization for visualization within the KTNB plugin.
7+
*/
8+
internal enum class CellKind {
9+
/**
10+
* Represents a cell kind within a Column that is specifically convertible to a DataFrame.
11+
*/
412
DataFrameConvertable {
513
override fun toString(): String = "DataFrameConvertable"
614
},

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt

Lines changed: 66 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPES
3838
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
3939
import org.jetbrains.kotlinx.dataframe.io.ARRAY_COLUMN_NAME
4040
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
41-
import org.jetbrains.kotlinx.dataframe.io.DataframeConvertableEncodingOptions
42-
import org.jetbrains.kotlinx.dataframe.io.EncodingOptions
41+
import org.jetbrains.kotlinx.dataframe.io.CustomEncoder
4342
import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME
4443
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils
4544
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.isDataframeConvertable
@@ -53,7 +52,7 @@ import java.io.IOException
5352

5453
// See docs/serialization_format.md for a description of
5554
// serialization versions and format.
56-
internal const val SERIALIZATION_VERSION = "2.1.0"
55+
internal const val SERIALIZATION_VERSION = "2.1.1"
5756

5857
internal object SerializationKeys {
5958
const val DATA = "data"
@@ -113,14 +112,14 @@ internal fun encodeRowWithMetadata(
113112
frame: ColumnsContainer<*>,
114113
index: Int,
115114
rowLimit: Int? = null,
116-
encodingOptions: List<EncodingOptions>,
115+
customEncoders: List<CustomEncoder> = emptyList(),
117116
): JsonElement? {
118117
val values: List<Pair<String, JsonElement>> = frame.columns().map { col ->
119118
when (col) {
120119
is ColumnGroup<*> -> {
121120
val schema = col.schema()
122121
buildJsonObject {
123-
put(DATA, encodeRowWithMetadata(col, index, rowLimit, encodingOptions) ?: JsonPrimitive(null))
122+
put(DATA, encodeRowWithMetadata(col, index, rowLimit, customEncoders) ?: JsonPrimitive(null))
124123
putJsonObject(METADATA) {
125124
put(KIND, JsonPrimitive(ColumnKind.Group.toString()))
126125
put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys))
@@ -137,9 +136,9 @@ internal fun encodeRowWithMetadata(
137136

138137
is FrameColumn<*> -> {
139138
val data = if (rowLimit == null) {
140-
encodeFrameWithMetadata(col[index], null, encodingOptions)
139+
encodeFrameWithMetadata(col[index], null, customEncoders)
141140
} else {
142-
encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, encodingOptions)
141+
encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, customEncoders)
143142
}
144143
val schema = col.schema.value
145144
buildJsonObject {
@@ -160,32 +159,18 @@ internal fun encodeRowWithMetadata(
160159
}
161160
}
162161

163-
else -> encodeValue(col, index, encodingOptions)
162+
else -> encodeValue(col, index, customEncoders)
164163
}.let { col.name to it }
165164
}
166165
if (values.isEmpty()) return null
167166
return JsonObject(values.toMap())
168167
}
169168

170-
internal fun encodeValue(col: AnyCol, index: Int, encodingOptions: List<EncodingOptions>): JsonElement =
169+
internal fun encodeValue(col: AnyCol, index: Int, customEncoders: List<CustomEncoder> = emptyList()): JsonElement =
171170
when {
172-
isDataframeConvertable(col[index]) && encodingOptions.get<DataframeConvertableEncodingOptions>() != null ->
173-
if (col[index] == null) {
174-
JsonPrimitive(null)
175-
} else {
176-
val options = encodingOptions.get<DataframeConvertableEncodingOptions>()!!
177-
val data = encodeFrameWithMetadata(
178-
KotlinNotebookPluginUtils.convertToDataFrame(col[index]!!),
179-
options.rowsLimit,
180-
encodingOptions,
181-
)
182-
buildJsonObject {
183-
put(DATA, data)
184-
putJsonObject(METADATA) {
185-
put(KIND, JsonPrimitive(CellKind.DataFrameConvertable.toString()))
186-
}
187-
}
188-
}
171+
customEncoders.any { it.canEncode(col[index]) } -> {
172+
customEncoders.first { it.canEncode(col[index]) }.encode(col[index])
173+
}
189174

190175
col.isList() -> col[index]?.let { list ->
191176
val values = (list as List<*>).map { convert(it) }
@@ -194,43 +179,61 @@ internal fun encodeValue(col: AnyCol, index: Int, encodingOptions: List<Encoding
194179

195180
col.typeClass in valueTypes -> convert(col[index])
196181

197-
col.typeClass == BufferedImage::class && encodingOptions.get<Base64ImageEncodingOptions>() != null ->
198-
col[index]?.let { image ->
199-
JsonPrimitive(
200-
encodeBufferedImageAsBase64(
201-
image as BufferedImage,
202-
encodingOptions.get<Base64ImageEncodingOptions>()!!,
203-
),
204-
)
205-
} ?: JsonPrimitive("")
206-
207182
else -> JsonPrimitive(col[index]?.toString())
208183
}
209184

210-
@Suppress("UNCHECKED_CAST")
211-
private inline fun <reified T : EncodingOptions> List<EncodingOptions>.get(): T? = this.find { it is T } as T?
212-
213-
private fun encodeBufferedImageAsBase64(
214-
image: BufferedImage,
215-
imageEncodingOptions: Base64ImageEncodingOptions = Base64ImageEncodingOptions(),
216-
): String? =
217-
try {
218-
val preparedImage = if (imageEncodingOptions.isLimitSizeOn) {
219-
image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit)
220-
} else {
221-
image
222-
}
185+
internal class DataframeConvertableEncoder(
186+
private val encoders: List<CustomEncoder>,
187+
private val rowLimit: Int? = null,
188+
) : CustomEncoder {
189+
override fun canEncode(input: Any?): Boolean = isDataframeConvertable(input)
190+
191+
override fun encode(input: Any?): JsonElement =
192+
input?.let {
193+
val data = encodeFrameWithMetadata(
194+
KotlinNotebookPluginUtils.convertToDataFrame(input),
195+
rowLimit,
196+
encoders,
197+
)
198+
buildJsonObject {
199+
put(DATA, data)
200+
putJsonObject(METADATA) {
201+
put(KIND, JsonPrimitive(CellKind.DataFrameConvertable.toString()))
202+
}
203+
}
204+
} ?: JsonPrimitive(null)
205+
}
223206

224-
val bytes = if (imageEncodingOptions.isGzipOn) {
225-
preparedImage.toByteArray().encodeGzip()
226-
} else {
227-
preparedImage.toByteArray()
228-
}
207+
internal class BufferedImageEncoder(private val options: Base64ImageEncodingOptions) : CustomEncoder {
208+
override fun canEncode(input: Any?): Boolean = input is BufferedImage
229209

230-
bytes.toBase64()
231-
} catch (e: IOException) {
232-
null
233-
}
210+
override fun encode(input: Any?): JsonElement =
211+
JsonPrimitive(
212+
input?.let { image -> encodeBufferedImageAsBase64(image as BufferedImage, options) } ?: "",
213+
)
214+
215+
private fun encodeBufferedImageAsBase64(
216+
image: BufferedImage,
217+
imageEncodingOptions: Base64ImageEncodingOptions = Base64ImageEncodingOptions(),
218+
): String =
219+
try {
220+
val preparedImage = if (imageEncodingOptions.isLimitSizeOn) {
221+
image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit)
222+
} else {
223+
image
224+
}
225+
226+
val bytes = if (imageEncodingOptions.isGzipOn) {
227+
preparedImage.toByteArray().encodeGzip()
228+
} else {
229+
preparedImage.toByteArray()
230+
}
231+
232+
bytes.toBase64()
233+
} catch (_: IOException) {
234+
""
235+
}
236+
}
234237

235238
private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject =
236239
JsonObject(
@@ -244,7 +247,7 @@ private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject =
244247
internal fun encodeFrameWithMetadata(
245248
frame: AnyFrame,
246249
rowLimit: Int? = null,
247-
encodingOptions: List<EncodingOptions>,
250+
customEncoders: List<CustomEncoder> = emptyList(),
248251
): JsonArray {
249252
val valueColumn = frame.extractValueColumn()
250253
val arrayColumn = frame.extractArrayColumn()
@@ -258,13 +261,13 @@ internal fun encodeFrameWithMetadata(
258261
encodeFrameWithMetadata(
259262
it as AnyFrame,
260263
rowLimit,
261-
encodingOptions,
264+
customEncoders,
262265
)
263266
} else {
264267
null
265268
}
266269
}
267-
?: encodeRowWithMetadata(frame, rowIndex, rowLimit, encodingOptions)
270+
?: encodeRowWithMetadata(frame, rowIndex, rowLimit, customEncoders)
268271
}
269272

270273
return buildJsonArray { addAll(data.map { convert(it) }) }
@@ -372,7 +375,7 @@ internal fun encodeDataFrameWithMetadata(
372375
frame: AnyFrame,
373376
rowLimit: Int,
374377
nestedRowLimit: Int? = null,
375-
encodingOptions: List<EncodingOptions>,
378+
customEncoders: List<CustomEncoder> = emptyList(),
376379
): JsonObject =
377380
buildJsonObject {
378381
put(VERSION, JsonPrimitive(SERIALIZATION_VERSION))
@@ -393,7 +396,7 @@ internal fun encodeDataFrameWithMetadata(
393396
encodeFrameWithMetadata(
394397
frame = frame.take(rowLimit),
395398
rowLimit = nestedRowLimit,
396-
encodingOptions = encodingOptions,
399+
customEncoders = customEncoders,
397400
),
398401
)
399402
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata
2222
import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame
2323
import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow
2424
import org.jetbrains.kotlinx.dataframe.impl.io.readJson
25-
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON
26-
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON
2725
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic
2826
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
2927
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
@@ -303,7 +301,7 @@ public fun AnyFrame.toJsonWithMetadata(
303301
rowLimit: Int,
304302
nestedRowLimit: Int? = null,
305303
prettyPrint: Boolean = false,
306-
encodingOptions: List<EncodingOptions> = emptyList(),
304+
customEncoders: List<CustomEncoder> = emptyList(),
307305
): String {
308306
val json = Json {
309307
this.prettyPrint = prettyPrint
@@ -312,18 +310,32 @@ public fun AnyFrame.toJsonWithMetadata(
312310
}
313311
return json.encodeToString(
314312
JsonElement.serializer(),
315-
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, encodingOptions),
313+
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, customEncoders),
316314
)
317315
}
318316

319-
internal const val DEFAULT_IMG_SIZE = 600
320-
321317
/**
322-
* Interface representing encoding options that can be applied when converting a data structure to JSON format.
323-
* Implementations of this interface can provide specific behaviors for encoding various data types,
324-
* such as images or data frames, when serializing to JSON.
318+
* Interface for defining a custom encoder. That applied to the value during dataframe JSON serialization
325319
*/
326-
public interface EncodingOptions
320+
public interface CustomEncoder {
321+
/**
322+
* Determines whether this encoder can encode the given input.
323+
*
324+
* @param input The input object to be checked for suitability.
325+
* @return `true` if the input can be encoded, otherwise `false`.
326+
*/
327+
public fun canEncode(input: Any?): Boolean
328+
329+
/**
330+
* Encodes the provided input into a JSON element.
331+
*
332+
* @param input The input object to be encoded.
333+
* @return A JsonElement representing the encoded input.
334+
*/
335+
public fun encode(input: Any?): JsonElement
336+
}
337+
338+
internal const val DEFAULT_IMG_SIZE = 600
327339

328340
/**
329341
* Class representing the options for encoding images.
@@ -334,7 +346,7 @@ public interface EncodingOptions
334346
public class Base64ImageEncodingOptions(
335347
public val imageSizeLimit: Int = DEFAULT_IMG_SIZE,
336348
private val options: Int = GZIP_ON or LIMIT_SIZE_ON,
337-
) : EncodingOptions {
349+
) {
338350
public val isGzipOn: Boolean
339351
get() = options and GZIP_ON == GZIP_ON
340352

@@ -348,14 +360,6 @@ public class Base64ImageEncodingOptions(
348360
}
349361
}
350362

351-
/**
352-
* Represents encoding options for converting to JSON objects that can be convertible to DataFrame
353-
*
354-
* @param rowsLimit Optional limit on the number of rows to be included in the JSON output.
355-
* Default is null, meaning no limit is imposed.
356-
*/
357-
public class DataframeConvertableEncodingOptions(public val rowsLimit: Int? = null) : EncodingOptions
358-
359363
public fun AnyRow.toJson(prettyPrint: Boolean = false): String {
360364
val json = Json {
361365
this.prettyPrint = prettyPrint

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterHtmlRenderer.kt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,17 @@ import kotlinx.serialization.json.buildJsonObject
66
import kotlinx.serialization.json.put
77
import kotlinx.serialization.json.putJsonArray
88
import org.jetbrains.kotlinx.dataframe.api.take
9+
import org.jetbrains.kotlinx.dataframe.impl.io.BufferedImageEncoder
10+
import org.jetbrains.kotlinx.dataframe.impl.io.DataframeConvertableEncoder
911
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
1012
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
1113
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
1214
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
1315
import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame
1416
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
17+
import org.jetbrains.kotlinx.dataframe.io.CustomEncoder
1518
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
16-
import org.jetbrains.kotlinx.dataframe.io.DataframeConvertableEncodingOptions
1719
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
18-
import org.jetbrains.kotlinx.dataframe.io.EncodingOptions
1920
import org.jetbrains.kotlinx.dataframe.io.toHTML
2021
import org.jetbrains.kotlinx.dataframe.io.toJsonWithMetadata
2122
import org.jetbrains.kotlinx.dataframe.io.toStaticHtml
@@ -88,19 +89,19 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
8889
}
8990

9091
else -> {
91-
val encodingOptions = buildList<EncodingOptions> {
92+
val encoders = buildList<CustomEncoder> {
9293
if (ideBuildNumber.supportsDataFrameConvertableValues()) {
93-
add(DataframeConvertableEncodingOptions())
94+
add(DataframeConvertableEncoder(this))
9495
}
9596
if (ideBuildNumber.supportsImageViewer()) {
96-
add(Base64ImageEncodingOptions())
97+
add(BufferedImageEncoder(Base64ImageEncodingOptions()))
9798
}
9899
}
99100

100101
df.toJsonWithMetadata(
101102
rowLimit = limit,
102103
nestedRowLimit = reifiedDisplayConfiguration.rowsLimit,
103-
encodingOptions = encodingOptions,
104+
customEncoders = encoders,
104105
)
105106
}
106107
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import kotlinx.serialization.json.jsonArray
77
import kotlinx.serialization.json.jsonObject
88
import kotlinx.serialization.json.jsonPrimitive
99
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
10+
import org.jetbrains.kotlinx.dataframe.impl.io.BufferedImageEncoder
1011
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
1112
import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio
1213
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.ALL_OFF
@@ -65,7 +66,7 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp
6566
val jsonStr = df.toJsonWithMetadata(
6667
20,
6768
nestedRowLimit = 20,
68-
encodingOptions = if (encodingOptions != null) listOf(encodingOptions) else emptyList(),
69+
customEncoders = listOfNotNull(encodingOptions?.let { BufferedImageEncoder(encodingOptions) }),
6970
)
7071

7172
return parseJsonStr(jsonStr)

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ class RenderingTests : JupyterReplTestCase() {
235235
val expectedOutput =
236236
"""
237237
{
238-
"${'$'}version": "2.1.0",
238+
"${'$'}version": "2.1.1",
239239
"metadata": {
240240
"columns": ["group", "col3", "col4"],
241241
"types": [{

0 commit comments

Comments
 (0)