1
1
package org.jetbrains.kotlinx.dataframe.impl.io
2
2
3
- import com.beust.klaxon.JsonArray
4
- import com.beust.klaxon.JsonObject
3
+ import kotlinx.serialization.json.JsonArray
4
+ import kotlinx.serialization.json.JsonNull
5
+ import kotlinx.serialization.json.JsonObject
6
+ import kotlinx.serialization.json.JsonPrimitive
7
+ import kotlinx.serialization.json.boolean
8
+ import kotlinx.serialization.json.booleanOrNull
9
+ import kotlinx.serialization.json.double
10
+ import kotlinx.serialization.json.doubleOrNull
11
+ import kotlinx.serialization.json.float
12
+ import kotlinx.serialization.json.floatOrNull
13
+ import kotlinx.serialization.json.int
14
+ import kotlinx.serialization.json.intOrNull
15
+ import kotlinx.serialization.json.jsonArray
16
+ import kotlinx.serialization.json.jsonPrimitive
17
+ import kotlinx.serialization.json.long
18
+ import kotlinx.serialization.json.longOrNull
5
19
import org.jetbrains.kotlinx.dataframe.AnyCol
6
20
import org.jetbrains.kotlinx.dataframe.AnyFrame
7
21
import org.jetbrains.kotlinx.dataframe.DataColumn
@@ -73,8 +87,8 @@ internal fun readJson(
73
87
val df: AnyFrame = when (typeClashTactic) {
74
88
ARRAY_AND_VALUE_COLUMNS -> {
75
89
when (parsed) {
76
- is JsonArray < * > -> fromJsonListArrayAndValueColumns(
77
- records = parsed.value ,
90
+ is JsonArray -> fromJsonListArrayAndValueColumns(
91
+ records = parsed,
78
92
header = header,
79
93
keyValuePaths = keyValuePaths,
80
94
)
@@ -88,8 +102,8 @@ internal fun readJson(
88
102
89
103
ANY_COLUMNS -> {
90
104
when (parsed) {
91
- is JsonArray < * > -> fromJsonListAnyColumns(
92
- records = parsed.value ,
105
+ is JsonArray -> fromJsonListAnyColumns(
106
+ records = parsed,
93
107
header = header,
94
108
keyValuePaths = keyValuePaths,
95
109
)
@@ -126,18 +140,16 @@ internal fun fromJsonListAnyColumns(
126
140
127
141
// list element type can be JsonObject, JsonArray or primitive
128
142
val nameGenerator = ColumnNameGenerator ()
129
- records.forEach {
130
- when (it ) {
143
+ records.forEach { record ->
144
+ when (record ) {
131
145
is JsonObject -> {
132
146
hasObject = true
133
- it.entries.forEach {
134
- nameGenerator.addIfAbsent(it.key)
135
- }
147
+ record.entries.forEach { nameGenerator.addIfAbsent(it.key) }
136
148
}
137
149
138
- is JsonArray < * > -> hasArray = true
139
- null -> Unit
140
- else -> hasPrimitive = true
150
+ is JsonArray -> hasArray = true
151
+ is JsonNull , null -> Unit
152
+ is JsonPrimitive -> hasPrimitive = true
141
153
}
142
154
}
143
155
@@ -155,7 +167,7 @@ internal fun fromJsonListAnyColumns(
155
167
156
168
@Suppress(" KotlinConstantConditions" )
157
169
val columns: List <AnyCol > = when {
158
- // Create one column of type Any? (or guessed primitive type) from all the records
170
+ // Create one column of type Any? (or guessed a primitive type) from all the records
159
171
colType == AnyColType .ANY -> {
160
172
val collector: DataCollectorBase <Any ?> =
161
173
if (justPrimitives) createDataCollector(records.size) // guess the type
@@ -177,7 +189,7 @@ internal fun fromJsonListAnyColumns(
177
189
)
178
190
}
179
191
180
- is JsonArray < * > -> {
192
+ is JsonArray -> {
181
193
val parsed = fromJsonListAnyColumns(
182
194
records = v,
183
195
keyValuePaths = keyValuePaths,
@@ -189,9 +201,21 @@ internal fun fromJsonListAnyColumns(
189
201
)
190
202
}
191
203
192
- " NaN" -> {
193
- nanIndices.add(i)
194
- collector.add(null )
204
+ is JsonPrimitive -> {
205
+ when {
206
+ v.content == " NaN" -> {
207
+ nanIndices.add(i)
208
+ collector.add(null )
209
+ }
210
+
211
+ v.isString -> collector.add(v.content)
212
+ v.booleanOrNull != null -> collector.add(v.boolean)
213
+ v.intOrNull != null -> collector.add(v.int)
214
+ v.longOrNull != null -> collector.add(v.long)
215
+ v.doubleOrNull != null -> collector.add(v.double)
216
+ v.floatOrNull != null -> collector.add(v.float)
217
+ v.jsonPrimitive is JsonNull -> collector.add(null )
218
+ }
195
219
}
196
220
197
221
else -> collector.add(v)
@@ -227,8 +251,8 @@ internal fun fromJsonListAnyColumns(
227
251
records.forEach {
228
252
startIndices.add(values.size)
229
253
when (it) {
230
- is JsonArray < * > -> values.addAll(it.value )
231
- null -> Unit
254
+ is JsonArray -> values.addAll(it)
255
+ is JsonNull , null -> Unit
232
256
else -> error(" Expected JsonArray, got $it " )
233
257
}
234
258
}
@@ -242,10 +266,10 @@ internal fun fromJsonListAnyColumns(
242
266
parsed.isSingleUnnamedColumn() -> {
243
267
val col = (parsed.getColumn(0 ) as UnnamedColumn ).col
244
268
val elementType = col.type
245
- val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList()
269
+ val columnValues = col.values.asList().splitByIndices(startIndices.asSequence()).toList()
246
270
DataColumn .createValueColumn(
247
271
name = arrayColumnName,
248
- values = values ,
272
+ values = columnValues ,
249
273
type = List ::class .createType(listOf (KTypeProjection .invariant(elementType))),
250
274
)
251
275
}
@@ -263,10 +287,10 @@ internal fun fromJsonListAnyColumns(
263
287
colType == AnyColType .OBJECTS && isKeyValue -> {
264
288
// collect the value types to make sure Value columns with lists and other values aren't all turned into lists
265
289
val valueTypes = mutableSetOf<KType >()
266
- val dataFrames = records.map {
267
- when (it ) {
290
+ val dataFrames = records.map { record ->
291
+ when (record ) {
268
292
is JsonObject -> {
269
- val map = it.map .mapValues { (key, value) ->
293
+ val map = record .mapValues { (key, value) ->
270
294
val parsed = fromJsonListAnyColumns(
271
295
records = listOf (value),
272
296
keyValuePaths = keyValuePaths,
@@ -288,8 +312,8 @@ internal fun fromJsonListAnyColumns(
288
312
)
289
313
}
290
314
291
- null -> DataFrame .emptyOf<AnyKeyValueProperty >()
292
- else -> error(" Expected JsonObject, got $it " )
315
+ is JsonNull , null -> DataFrame .emptyOf<AnyKeyValueProperty >()
316
+ else -> error(" Expected JsonObject, got $record " )
293
317
}
294
318
}
295
319
@@ -328,7 +352,7 @@ internal fun fromJsonListAnyColumns(
328
352
records.forEach {
329
353
when (it) {
330
354
is JsonObject -> values.add(it[colName])
331
- null -> values.add(null )
355
+ is JsonNull , null -> values.add(null )
332
356
else -> error(" Expected JsonObject, got $it " )
333
357
}
334
358
}
@@ -395,24 +419,24 @@ internal fun fromJsonListArrayAndValueColumns(
395
419
396
420
// list element type can be JsonObject, JsonArray or primitive
397
421
// So first, we gather all properties of objects to merge including "array" and "value" if needed
398
- // so the resulting type of a property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be
422
+ // so the resulting type of property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be
399
423
// { array: List<String>, value: Int?, a: Int?, b: Int? }
400
424
// and instances will look like
401
425
// { "array": [], "value": 123, "a": null, "b": null }
402
426
403
427
val nameGenerator = ColumnNameGenerator ()
404
- records.forEach {
405
- when (it ) {
406
- is JsonObject -> it .entries.forEach {
428
+ records.forEach { record ->
429
+ when (record ) {
430
+ is JsonObject -> record .entries.forEach {
407
431
nameGenerator.addIfAbsent(it.key)
408
432
}
409
433
410
- is JsonArray < * > -> hasArray = true
411
- null -> Unit
412
- else -> hasPrimitive = true
434
+ is JsonArray -> hasArray = true
435
+ is JsonNull , null -> Unit
436
+ is JsonPrimitive -> hasPrimitive = true
413
437
}
414
438
}
415
- if (records.all { it == null }) hasPrimitive = true
439
+ if (records.all { it == null || it is JsonNull }) hasPrimitive = true
416
440
417
441
// Add a value column to the collected names if needed
418
442
val valueColumn = if (hasPrimitive || records.isEmpty()) {
@@ -433,10 +457,10 @@ internal fun fromJsonListArrayAndValueColumns(
433
457
val columns: List <AnyCol > = when {
434
458
// instead of using the names, generate a single key/value frame column
435
459
isKeyValue -> {
436
- val dataFrames = records.map {
437
- when (it ) {
460
+ val dataFrames = records.map { record ->
461
+ when (record ) {
438
462
is JsonObject -> {
439
- val map = it.map .mapValues { (key, value) ->
463
+ val map = record .mapValues { (key, value) ->
440
464
val parsed = fromJsonListArrayAndValueColumns(
441
465
records = listOf (value),
442
466
keyValuePaths = keyValuePaths,
@@ -459,8 +483,8 @@ internal fun fromJsonListArrayAndValueColumns(
459
483
)
460
484
}
461
485
462
- null -> DataFrame .emptyOf<AnyKeyValueProperty >()
463
- else -> error(" Expected JsonObject, got $it " )
486
+ is JsonNull , null -> DataFrame .emptyOf<AnyKeyValueProperty >()
487
+ else -> error(" Expected JsonObject, got $record " )
464
488
}
465
489
}
466
490
@@ -488,10 +512,23 @@ internal fun fromJsonListArrayAndValueColumns(
488
512
records.forEachIndexed { i, v ->
489
513
when (v) {
490
514
is JsonObject -> collector.add(null )
491
- is JsonArray <* > -> collector.add(null )
492
- " NaN" -> {
493
- nanIndices.add(i)
494
- collector.add(null )
515
+ is JsonArray -> collector.add(null )
516
+ is JsonPrimitive -> {
517
+ when {
518
+ v.content == " NaN" -> {
519
+ nanIndices.add(i)
520
+ collector.add(null )
521
+ }
522
+
523
+ v.isString -> collector.add(v.content)
524
+ v.booleanOrNull != null -> collector.add(v.boolean)
525
+ v.intOrNull != null -> collector.add(v.int)
526
+ v.longOrNull != null -> collector.add(v.long)
527
+ v.doubleOrNull != null -> collector.add(v.double)
528
+ v.floatOrNull != null -> collector.add(v.float)
529
+ v is JsonNull -> collector.add(null )
530
+ else -> collector.add(v)
531
+ }
495
532
}
496
533
497
534
else -> collector.add(v)
@@ -526,7 +563,7 @@ internal fun fromJsonListArrayAndValueColumns(
526
563
val startIndices = ArrayList <Int >()
527
564
records.forEach {
528
565
startIndices.add(values.size)
529
- if (it is JsonArray < * > ) values.addAll(it.value )
566
+ if (it is JsonArray ) values.addAll(it.jsonArray )
530
567
}
531
568
val parsed = fromJsonListArrayAndValueColumns(
532
569
records = values,
@@ -538,10 +575,11 @@ internal fun fromJsonListArrayAndValueColumns(
538
575
parsed.isSingleUnnamedColumn() -> {
539
576
val col = (parsed.getColumn(0 ) as UnnamedColumn ).col
540
577
val elementType = col.type
541
- val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList()
578
+ val columnValues =
579
+ col.values.asList().splitByIndices(startIndices.asSequence()).toList()
542
580
DataColumn .createValueColumn(
543
581
name = colName,
544
- values = values ,
582
+ values = columnValues ,
545
583
type = List ::class .createType(listOf (KTypeProjection .invariant(elementType))),
546
584
)
547
585
}
0 commit comments