Skip to content

Commit b9f7e53

Browse files
committed
Add image encoding options to JSON serialization
The commit introduces an ImageEncodingOptions class which controls how images are encoded when serializing a DataFrame to JSON with metadata. These options include whether to encode images as Base64, a size limit for resizing images, and an optional GZIP compression.
1 parent ee7cc61 commit b9f7e53

File tree

4 files changed

+178
-24
lines changed

4 files changed

+178
-24
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
2323
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
2424
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
2525
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
26+
import org.jetbrains.kotlinx.dataframe.io.ImageEncodingOptions
2627
import org.jetbrains.kotlinx.dataframe.io.arrayColumnName
2728
import org.jetbrains.kotlinx.dataframe.io.valueColumnName
2829
import org.jetbrains.kotlinx.dataframe.ncol
2930
import org.jetbrains.kotlinx.dataframe.nrow
3031
import org.jetbrains.kotlinx.dataframe.typeClass
32+
import java.awt.image.BufferedImage
33+
import java.io.IOException
3134

3235
internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? {
3336
val values = frame.columns().map { col ->
@@ -57,18 +60,19 @@ internal const val SERIALIZATION_VERSION = "2.0.0"
5760
internal fun KlaxonJson.encodeRowWithMetadata(
5861
frame: ColumnsContainer<*>,
5962
index: Int,
60-
rowLimit: Int? = null
63+
rowLimit: Int? = null,
64+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
6165
): JsonObject? {
6266
val values = frame.columns().map { col ->
6367
when (col) {
6468
is ColumnGroup<*> -> obj(
65-
DATA to encodeRowWithMetadata(col, index, rowLimit),
69+
DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions),
6670
METADATA to obj(KIND to ColumnKind.Group.toString())
6771
)
6872

6973
is FrameColumn<*> -> {
70-
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index])
71-
else encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit)
74+
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index], null, imageEncodingOptions)
75+
else encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions)
7276
obj(
7377
DATA to data,
7478
METADATA to obj(
@@ -79,7 +83,7 @@ internal fun KlaxonJson.encodeRowWithMetadata(
7983
)
8084
}
8185

82-
else -> encodeValue(col, index)
86+
else -> encodeValue(col, index, imageEncodingOptions)
8387
}.let { col.name to it }
8488
}
8589
if (values.isEmpty()) return null
@@ -89,7 +93,11 @@ internal fun KlaxonJson.encodeRowWithMetadata(
8993
private val valueTypes =
9094
setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class)
9195

92-
internal fun KlaxonJson.encodeValue(col: AnyCol, index: Int): Any? = when {
96+
internal fun KlaxonJson.encodeValue(
97+
col: AnyCol,
98+
index: Int,
99+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions(encodeAsBase64 = false)
100+
): Any? = when {
93101
col.isList() -> col[index]?.let { list ->
94102
val values = (list as List<*>).map {
95103
when (it) {
@@ -108,10 +116,45 @@ internal fun KlaxonJson.encodeValue(col: AnyCol, index: Int): Any? = when {
108116
} else v
109117
}
110118

119+
col.typeClass == BufferedImage::class -> col[index]?.let { image ->
120+
encodeBufferedImage(image as BufferedImage, imageEncodingOptions)
121+
} ?: ""
122+
111123
else -> col[index]?.toString()
112124
}
113125

114-
internal fun KlaxonJson.encodeFrameWithMetadata(frame: AnyFrame, rowLimit: Int? = null): JsonArray<*> {
126+
private fun encodeBufferedImage(
127+
image: BufferedImage,
128+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
129+
): String? {
130+
if (!imageEncodingOptions.encodeAsBase64) {
131+
return image.toString()
132+
}
133+
134+
return try {
135+
val preparedImage = if (imageEncodingOptions.isLimitSizeOn) {
136+
image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit)
137+
} else {
138+
image
139+
}
140+
141+
val bytes = if (imageEncodingOptions.isGzipOn) {
142+
preparedImage.toByteArray().encodeGzip()
143+
} else {
144+
preparedImage.toByteArray()
145+
}
146+
147+
bytes.toBase64()
148+
} catch (e: IOException) {
149+
null
150+
}
151+
}
152+
153+
internal fun KlaxonJson.encodeFrameWithMetadata(
154+
frame: AnyFrame,
155+
rowLimit: Int? = null,
156+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
157+
): JsonArray<*> {
115158
val valueColumn = frame.extractValueColumn()
116159
val arrayColumn = frame.extractArrayColumn()
117160

@@ -122,9 +165,13 @@ internal fun KlaxonJson.encodeFrameWithMetadata(frame: AnyFrame, rowLimit: Int?
122165
?.get(rowIndex)
123166
?: arrayColumn?.get(rowIndex)
124167
?.let {
125-
if (arraysAreFrames) encodeFrameWithMetadata(it as AnyFrame, rowLimit) else null
168+
if (arraysAreFrames) encodeFrameWithMetadata(
169+
it as AnyFrame,
170+
rowLimit,
171+
imageEncodingOptions
172+
) else null
126173
}
127-
?: encodeRowWithMetadata(frame, rowIndex, rowLimit)
174+
?: encodeRowWithMetadata(frame, rowIndex, rowLimit, imageEncodingOptions)
128175
}
129176

130177
return array(data)
@@ -206,6 +253,7 @@ internal fun KlaxonJson.encodeDataFrameWithMetadata(
206253
frame: AnyFrame,
207254
rowLimit: Int,
208255
nestedRowLimit: Int? = null,
256+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
209257
): JsonObject {
210258
return obj(
211259
VERSION to SERIALIZATION_VERSION,
@@ -216,7 +264,8 @@ internal fun KlaxonJson.encodeDataFrameWithMetadata(
216264
),
217265
KOTLIN_DATAFRAME to encodeFrameWithMetadata(
218266
frame.take(rowLimit),
219-
rowLimit = nestedRowLimit
267+
rowLimit = nestedRowLimit,
268+
imageEncodingOptions
220269
),
221270
)
222271
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,20 +276,48 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false, canonical: Boolean = fa
276276
* Applied for each frame column recursively
277277
* @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks.
278278
* @param canonical Specifies whether the output JSON should be in a canonical form.
279+
* @param imageEncodingOptions The options for encoding images in the DataFrame. Defaults to encode images as Base64.
279280
*
280281
* @return The DataFrame converted to a JSON string with metadata.
281282
*/
282283
public fun AnyFrame.toJsonWithMetadata(
283284
rowLimit: Int,
284285
nestedRowLimit: Int? = null,
285286
prettyPrint: Boolean = false,
286-
canonical: Boolean = false
287+
canonical: Boolean = false,
288+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions(encodeAsBase64 = true)
287289
): String {
288290
return json {
289-
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit)
291+
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, imageEncodingOptions)
290292
}.toJsonString(prettyPrint, canonical)
291293
}
292294

295+
internal const val DEFAULT_IMG_SIZE = 600
296+
297+
/**
298+
* Class representing the options for encoding images.
299+
*
300+
* @property encodeAsBase64 Specifies whether the images should be encoded as Base64. Defaults to false.
301+
* @property imageSizeLimit The maximum size to which images should be resized. Defaults to the value of DEFAULT_IMG_SIZE.
302+
* @property options Bitwise-OR of the [GZIP_ON] and [LIMIT_SIZE_ON] constants. Defaults to [GZIP_ON] or [LIMIT_SIZE_ON].
303+
*/
304+
public class ImageEncodingOptions(
305+
public val encodeAsBase64: Boolean = false,
306+
public val imageSizeLimit: Int = DEFAULT_IMG_SIZE,
307+
private val options: Int = GZIP_ON or LIMIT_SIZE_ON
308+
) {
309+
public val isGzipOn: Boolean
310+
get() = options and GZIP_ON == GZIP_ON
311+
312+
public val isLimitSizeOn: Boolean
313+
get() = options and LIMIT_SIZE_ON == LIMIT_SIZE_ON
314+
315+
public companion object {
316+
public const val GZIP_ON: Int = 1 // 2^0
317+
public const val LIMIT_SIZE_ON: Int = 2 // 2^1
318+
}
319+
}
320+
293321
public fun AnyRow.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String {
294322
return json {
295323
encodeRow(df(), index())

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
2323
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
2424
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
2525
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
26+
import org.jetbrains.kotlinx.dataframe.io.ImageEncodingOptions
2627
import org.jetbrains.kotlinx.dataframe.io.arrayColumnName
2728
import org.jetbrains.kotlinx.dataframe.io.valueColumnName
2829
import org.jetbrains.kotlinx.dataframe.ncol
2930
import org.jetbrains.kotlinx.dataframe.nrow
3031
import org.jetbrains.kotlinx.dataframe.typeClass
32+
import java.awt.image.BufferedImage
33+
import java.io.IOException
3134

3235
internal fun KlaxonJson.encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject? {
3336
val values = frame.columns().map { col ->
@@ -57,18 +60,19 @@ internal const val SERIALIZATION_VERSION = "2.0.0"
5760
internal fun KlaxonJson.encodeRowWithMetadata(
5861
frame: ColumnsContainer<*>,
5962
index: Int,
60-
rowLimit: Int? = null
63+
rowLimit: Int? = null,
64+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
6165
): JsonObject? {
6266
val values = frame.columns().map { col ->
6367
when (col) {
6468
is ColumnGroup<*> -> obj(
65-
DATA to encodeRowWithMetadata(col, index, rowLimit),
69+
DATA to encodeRowWithMetadata(col, index, rowLimit, imageEncodingOptions),
6670
METADATA to obj(KIND to ColumnKind.Group.toString())
6771
)
6872

6973
is FrameColumn<*> -> {
70-
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index])
71-
else encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit)
74+
val data = if (rowLimit == null) encodeFrameWithMetadata(col[index], null, imageEncodingOptions)
75+
else encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, imageEncodingOptions)
7276
obj(
7377
DATA to data,
7478
METADATA to obj(
@@ -79,7 +83,7 @@ internal fun KlaxonJson.encodeRowWithMetadata(
7983
)
8084
}
8185

82-
else -> encodeValue(col, index)
86+
else -> encodeValue(col, index, imageEncodingOptions)
8387
}.let { col.name to it }
8488
}
8589
if (values.isEmpty()) return null
@@ -89,7 +93,11 @@ internal fun KlaxonJson.encodeRowWithMetadata(
8993
private val valueTypes =
9094
setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class)
9195

92-
internal fun KlaxonJson.encodeValue(col: AnyCol, index: Int): Any? = when {
96+
internal fun KlaxonJson.encodeValue(
97+
col: AnyCol,
98+
index: Int,
99+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions(encodeAsBase64 = false)
100+
): Any? = when {
93101
col.isList() -> col[index]?.let { list ->
94102
val values = (list as List<*>).map {
95103
when (it) {
@@ -108,10 +116,45 @@ internal fun KlaxonJson.encodeValue(col: AnyCol, index: Int): Any? = when {
108116
} else v
109117
}
110118

119+
col.typeClass == BufferedImage::class -> col[index]?.let { image ->
120+
encodeBufferedImage(image as BufferedImage, imageEncodingOptions)
121+
} ?: ""
122+
111123
else -> col[index]?.toString()
112124
}
113125

114-
internal fun KlaxonJson.encodeFrameWithMetadata(frame: AnyFrame, rowLimit: Int? = null): JsonArray<*> {
126+
private fun encodeBufferedImage(
127+
image: BufferedImage,
128+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
129+
): String? {
130+
if (!imageEncodingOptions.encodeAsBase64) {
131+
return image.toString()
132+
}
133+
134+
return try {
135+
val preparedImage = if (imageEncodingOptions.isLimitSizeOn) {
136+
image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit)
137+
} else {
138+
image
139+
}
140+
141+
val bytes = if (imageEncodingOptions.isGzipOn) {
142+
preparedImage.toByteArray().encodeGzip()
143+
} else {
144+
preparedImage.toByteArray()
145+
}
146+
147+
bytes.toBase64()
148+
} catch (e: IOException) {
149+
null
150+
}
151+
}
152+
153+
internal fun KlaxonJson.encodeFrameWithMetadata(
154+
frame: AnyFrame,
155+
rowLimit: Int? = null,
156+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
157+
): JsonArray<*> {
115158
val valueColumn = frame.extractValueColumn()
116159
val arrayColumn = frame.extractArrayColumn()
117160

@@ -122,9 +165,13 @@ internal fun KlaxonJson.encodeFrameWithMetadata(frame: AnyFrame, rowLimit: Int?
122165
?.get(rowIndex)
123166
?: arrayColumn?.get(rowIndex)
124167
?.let {
125-
if (arraysAreFrames) encodeFrameWithMetadata(it as AnyFrame, rowLimit) else null
168+
if (arraysAreFrames) encodeFrameWithMetadata(
169+
it as AnyFrame,
170+
rowLimit,
171+
imageEncodingOptions
172+
) else null
126173
}
127-
?: encodeRowWithMetadata(frame, rowIndex, rowLimit)
174+
?: encodeRowWithMetadata(frame, rowIndex, rowLimit, imageEncodingOptions)
128175
}
129176

130177
return array(data)
@@ -206,6 +253,7 @@ internal fun KlaxonJson.encodeDataFrameWithMetadata(
206253
frame: AnyFrame,
207254
rowLimit: Int,
208255
nestedRowLimit: Int? = null,
256+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions()
209257
): JsonObject {
210258
return obj(
211259
VERSION to SERIALIZATION_VERSION,
@@ -216,7 +264,8 @@ internal fun KlaxonJson.encodeDataFrameWithMetadata(
216264
),
217265
KOTLIN_DATAFRAME to encodeFrameWithMetadata(
218266
frame.take(rowLimit),
219-
rowLimit = nestedRowLimit
267+
rowLimit = nestedRowLimit,
268+
imageEncodingOptions
220269
),
221270
)
222271
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,20 +276,48 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false, canonical: Boolean = fa
276276
* Applied for each frame column recursively
277277
* @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks.
278278
* @param canonical Specifies whether the output JSON should be in a canonical form.
279+
* @param imageEncodingOptions The options for encoding images in the DataFrame. Defaults to encode images as Base64.
279280
*
280281
* @return The DataFrame converted to a JSON string with metadata.
281282
*/
282283
public fun AnyFrame.toJsonWithMetadata(
283284
rowLimit: Int,
284285
nestedRowLimit: Int? = null,
285286
prettyPrint: Boolean = false,
286-
canonical: Boolean = false
287+
canonical: Boolean = false,
288+
imageEncodingOptions: ImageEncodingOptions = ImageEncodingOptions(encodeAsBase64 = true)
287289
): String {
288290
return json {
289-
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit)
291+
encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, imageEncodingOptions)
290292
}.toJsonString(prettyPrint, canonical)
291293
}
292294

295+
internal const val DEFAULT_IMG_SIZE = 600
296+
297+
/**
298+
* Class representing the options for encoding images.
299+
*
300+
* @property encodeAsBase64 Specifies whether the images should be encoded as Base64. Defaults to false.
301+
* @property imageSizeLimit The maximum size to which images should be resized. Defaults to the value of DEFAULT_IMG_SIZE.
302+
* @property options Bitwise-OR of the [GZIP_ON] and [LIMIT_SIZE_ON] constants. Defaults to [GZIP_ON] or [LIMIT_SIZE_ON].
303+
*/
304+
public class ImageEncodingOptions(
305+
public val encodeAsBase64: Boolean = false,
306+
public val imageSizeLimit: Int = DEFAULT_IMG_SIZE,
307+
private val options: Int = GZIP_ON or LIMIT_SIZE_ON
308+
) {
309+
public val isGzipOn: Boolean
310+
get() = options and GZIP_ON == GZIP_ON
311+
312+
public val isLimitSizeOn: Boolean
313+
get() = options and LIMIT_SIZE_ON == LIMIT_SIZE_ON
314+
315+
public companion object {
316+
public const val GZIP_ON: Int = 1 // 2^0
317+
public const val LIMIT_SIZE_ON: Int = 2 // 2^1
318+
}
319+
}
320+
293321
public fun AnyRow.toJson(prettyPrint: Boolean = false, canonical: Boolean = false): String {
294322
return json {
295323
encodeRow(df(), index())

0 commit comments

Comments
 (0)