Skip to content

Commit 7fe4d6c

Browse files
committed
fixed edge cases unify numbers for json and fixed tests
1 parent ff833c1 commit 7fe4d6c

File tree

4 files changed

+34
-25
lines changed
  • core/src
  • tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api

4 files changed

+34
-25
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -472,10 +472,15 @@ internal fun guessValueType(
472472
!nullsInCollection
473473

474474
if (unifyNumbers) {
475-
val usedNumberClasses = classes.filter { it.isSubclassOf(Number::class) }
476-
val unifiedNumberClass = usedNumberClasses.unifiedNumberClass() as KClass<Number>
477-
classes -= usedNumberClasses
478-
classes += unifiedNumberClass
475+
val nothingClass = Nothing::class
476+
val usedNumberClasses = classes.filter {
477+
it.isSubclassOf(Number::class) && it != nothingClass
478+
}
479+
if (usedNumberClasses.isNotEmpty()) {
480+
val unifiedNumberClass = usedNumberClasses.unifiedNumberClass() as KClass<Number>
481+
classes -= usedNumberClasses
482+
classes += unifiedNumberClass
483+
}
479484
}
480485

481486
return when {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl
3636
import org.jetbrains.kotlinx.dataframe.impl.api.createConverter
3737
import org.jetbrains.kotlinx.dataframe.impl.asList
3838
import org.jetbrains.kotlinx.dataframe.impl.guessValueType
39+
import org.jetbrains.kotlinx.dataframe.impl.isNothing
3940
import org.jetbrains.kotlinx.dataframe.impl.replaceGenericTypeParametersWithUpperbound
4041
import org.jetbrains.kotlinx.dataframe.index
4142
import org.jetbrains.kotlinx.dataframe.nrow
@@ -257,7 +258,7 @@ internal fun <T> createColumnGuessingType(
257258
to = targetType,
258259
) as (Number) -> Number?
259260

260-
return { value -> if (value is Number) converter(value) else value }
261+
return { value -> if (value != null && value is Number) converter(value) else value }
261262
}
262263

263264
return when (type.classifier!! as KClass<*>) {
@@ -292,7 +293,9 @@ internal fun <T> createColumnGuessingType(
292293
var isListOfRows: Boolean? = null
293294
val subType = type.arguments.first().type!! // List<T> -> T
294295

295-
val needsNumberConversion = unifyNumbers && subType.isSubtypeOf(typeOf<Number?>())
296+
val needsNumberConversion = unifyNumbers &&
297+
subType.isSubtypeOf(typeOf<Number?>()) &&
298+
!subType.isNothing
296299
val numberConverter: (Any?) -> Any? by lazy { getSafeNumberConverter(subType) }
297300

298301
val lists = values.map { value ->
@@ -334,7 +337,9 @@ internal fun <T> createColumnGuessingType(
334337
}
335338

336339
else -> {
337-
val needsNumberConversion = unifyNumbers && type.isSubtypeOf(typeOf<Number?>())
340+
val needsNumberConversion = unifyNumbers &&
341+
type.isSubtypeOf(typeOf<Number?>()) &&
342+
!type.isNothing
338343
val numberConverter by lazy { getSafeNumberConverter(type) }
339344

340345
DataColumn.createValueColumn(

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.jetbrains.kotlinx.dataframe.DataRow
2222
import org.jetbrains.kotlinx.dataframe.alsoDebug
2323
import org.jetbrains.kotlinx.dataframe.api.JsonPath
2424
import org.jetbrains.kotlinx.dataframe.api.allNulls
25+
import org.jetbrains.kotlinx.dataframe.api.colsOf
2526
import org.jetbrains.kotlinx.dataframe.api.columnsCount
2627
import org.jetbrains.kotlinx.dataframe.api.convert
2728
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
@@ -47,13 +48,15 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
4748
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
4849
import org.jetbrains.kotlinx.dataframe.impl.io.readJson
4950
import org.jetbrains.kotlinx.dataframe.impl.nothingType
51+
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
5052
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
5153
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
5254
import org.jetbrains.kotlinx.dataframe.parseJsonStr
5355
import org.jetbrains.kotlinx.dataframe.testJson
5456
import org.jetbrains.kotlinx.dataframe.type
5557
import org.jetbrains.kotlinx.dataframe.values
5658
import org.junit.Test
59+
import kotlin.Double
5760
import kotlin.reflect.typeOf
5861

5962
@Suppress("ktlint:standard:argument-list-wrapping")
@@ -122,7 +125,7 @@ class JsonTests {
122125
df.rowsCount() shouldBe 2
123126
df["a"].type() shouldBe typeOf<Int>()
124127
df["b"].type() shouldBe typeOf<Comparable<*>>()
125-
df["c"].type() shouldBe typeOf<Double?>()
128+
df["c"].type() shouldBe typeOf<Float?>()
126129
}
127130

128131
@Test
@@ -140,7 +143,7 @@ class JsonTests {
140143
df.rowsCount() shouldBe 2
141144
df["a"].type() shouldBe typeOf<Int>()
142145
df["b"].type() shouldBe typeOf<Comparable<*>>()
143-
df["c"].type() shouldBe typeOf<Double?>()
146+
df["c"].type() shouldBe typeOf<Float?>()
144147
}
145148

146149
@Test
@@ -199,7 +202,7 @@ class JsonTests {
199202
val df = DataFrame.readJsonStr(json).alsoDebug()
200203
df.columnsCount() shouldBe 1
201204
df.rowsCount() shouldBe 3
202-
df["a"].type() shouldBe typeOf<List<Number>>()
205+
df["a"].type() shouldBe typeOf<List<Double>>()
203206
df[1]["a"] shouldBe emptyList<Int>()
204207
}
205208

@@ -217,7 +220,7 @@ class JsonTests {
217220
val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug()
218221
df.columnsCount() shouldBe 1
219222
df.rowsCount() shouldBe 3
220-
df["a"].type() shouldBe typeOf<List<Number>>()
223+
df["a"].type() shouldBe typeOf<List<Double>>()
221224
df[1]["a"] shouldBe emptyList<Int>()
222225
}
223226

@@ -249,7 +252,7 @@ class JsonTests {
249252
group[1].alsoDebug().let {
250253
it.columnsCount() shouldBe 3
251254
it.rowsCount() shouldBe 2
252-
it["b"].type() shouldBe typeOf<Int?>()
255+
it["b"].type() shouldBe typeOf<Double?>()
253256
it["c"].type() shouldBe typeOf<Int?>()
254257
it["d"].type() shouldBe typeOf<Int?>()
255258
it["b"].values.toList() shouldBe listOf(4, null)
@@ -383,7 +386,7 @@ class JsonTests {
383386
).alsoDebug("df:")
384387

385388
val res = DataFrame.readJsonStr(df.toJson()).alsoDebug("res:")
386-
res shouldBe df
389+
res shouldBe df.convert { colsOf<Double?>() }.toFloat()
387390
}
388391

389392
@Test
@@ -396,21 +399,17 @@ class JsonTests {
396399

397400
val res =
398401
DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS).alsoDebug("res:")
399-
res shouldBe df
400-
}
401-
402-
@Test
403-
fun `NaN double serialization`() {
404-
val df = dataFrameOf("v")(1.1, Double.NaN)
405-
df["v"].type() shouldBe typeOf<Double>()
406-
DataFrame.readJsonStr(df.toJson()) shouldBe df
402+
res shouldBe df.convert { colsOf<Double?>() }.toFloat()
407403
}
408404

409405
@Test
410406
fun `NaN double serialization Any`() {
411407
val df = dataFrameOf("v")(1.1, Double.NaN)
412408
df["v"].type() shouldBe typeOf<Double>()
413-
DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df
409+
410+
val df2 = DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS)
411+
df2["v"].type() shouldBe typeOf<Float>()
412+
df2 shouldBe df.convert("v").toFloat()
414413
}
415414

416415
@Test
@@ -583,7 +582,7 @@ class JsonTests {
583582
val group = df["a"] as ColumnGroup<*>
584583
group.columnsCount() shouldBe 6
585584
group["b"].type() shouldBe typeOf<Int?>()
586-
group["value"].type() shouldBe typeOf<Double?>()
585+
group["value"].type() shouldBe typeOf<Float?>()
587586
group["value1"].type() shouldBe typeOf<String?>()
588587
group["array"].type() shouldBe nothingType(nullable = true)
589588

@@ -922,7 +921,7 @@ class JsonTests {
922921

923922
it["b"].type() shouldBe typeOf<Any?>()
924923
it["c"].type() shouldBe typeOf<Int?>()
925-
it["d"].type() shouldBe typeOf<Any?>()
924+
it["d"].type() shouldBe nullableNothingType
926925

927926
it[0].toMap() shouldBe mapOf("b" to 1, "c" to null, "d" to null)
928927
it[1].toMap() shouldBe mapOf("b" to listOf(1, 2, 3), "c" to 2, "d" to null)

tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class Read {
6262
val row = DataRow.readJson(file)
6363
// SampleEnd
6464
row.columnNames() shouldBe listOf("A", "B", "C", "D")
65-
row.columnTypes() shouldBe listOf(typeOf<String>(), typeOf<Int>(), typeOf<Double>(), typeOf<Boolean>())
65+
row.columnTypes() shouldBe listOf(typeOf<String>(), typeOf<Int>(), typeOf<Float>(), typeOf<Boolean>())
6666
}
6767

6868
@Test

0 commit comments

Comments
 (0)