Skip to content

Commit e003d88

Browse files
committed
Fix bugs with nullability handling convert and convertTo. Add special exceptions for type conversions. Add DSL for parsers and converters configuration in convertTo.
1 parent bea3202 commit e003d88

File tree

16 files changed

+429
-88
lines changed

16 files changed

+429
-88
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ import kotlin.reflect.typeOf
1919

2020
public fun <T> AnyFrame.cast(): DataFrame<T> = this as DataFrame<T>
2121

22-
public inline fun <reified T> AnyFrame.cast(verify: Boolean = true): DataFrame<T> = if (verify) convertToImpl(typeOf<T>(), allowConversion = false, ExtraColumns.Keep).cast()
22+
public inline fun <reified T> AnyFrame.cast(verify: Boolean = true): DataFrame<T> = if (verify) convertToImpl(
23+
typeOf<T>(),
24+
allowConversion = false,
25+
ExtraColumns.Keep
26+
).cast()
2327
else cast()
2428

2529
public fun <T> AnyRow.cast(): DataRow<T> = this as DataRow<T>

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public inline fun <T, C, reified R> Convert<T, C?>.notNull(crossinline expressio
7474
else expression(this, it)
7575
}
7676

77-
public data class Convert<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>) {
77+
public data class Convert<T, out C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>) {
7878
public fun <R> cast(): Convert<T, R> = Convert(df, columns as ColumnsSelector<T, R>)
7979

8080
public inline fun <reified D> to(): DataFrame<T> = to(typeOf<D>())
@@ -289,13 +289,33 @@ public fun <T> Convert<T, *>.toLocalDateTime(): DataFrame<T> = to { it.convertTo
289289

290290
// endregion
291291

292-
public fun <T> Convert<T, *>.toInt(): DataFrame<T> = to<Int>()
293-
public fun <T> Convert<T, *>.toLong(): DataFrame<T> = to<Long>()
294-
public fun <T> Convert<T, *>.toStr(): DataFrame<T> = to<String>()
295-
public fun <T> Convert<T, *>.toDouble(): DataFrame<T> = to<Double>()
296-
public fun <T> Convert<T, *>.toFloat(): DataFrame<T> = to<Float>()
297-
public fun <T> Convert<T, *>.toBigDecimal(): DataFrame<T> = to<BigDecimal>()
298-
public fun <T> Convert<T, *>.toBoolean(): DataFrame<T> = to<Boolean>()
292+
@JvmName("toIntTAny")
293+
public fun <T> Convert<T, Any>.toInt(): DataFrame<T> = to<Int>()
294+
public fun <T> Convert<T, Any?>.toInt(): DataFrame<T> = to<Int?>()
295+
296+
@JvmName("toLongTAny")
297+
public fun <T> Convert<T, Any>.toLong(): DataFrame<T> = to<Long>()
298+
public fun <T> Convert<T, Any?>.toLong(): DataFrame<T> = to<Long?>()
299+
300+
@JvmName("toStrTAny")
301+
public fun <T> Convert<T, Any>.toStr(): DataFrame<T> = to<String>()
302+
public fun <T> Convert<T, Any?>.toStr(): DataFrame<T> = to<String?>()
303+
304+
@JvmName("toDoubleTAny")
305+
public fun <T> Convert<T, Any>.toDouble(): DataFrame<T> = to<Double>()
306+
public fun <T> Convert<T, Any?>.toDouble(): DataFrame<T> = to<Double?>()
307+
308+
@JvmName("toFloatTAny")
309+
public fun <T> Convert<T, Any>.toFloat(): DataFrame<T> = to<Float>()
310+
public fun <T> Convert<T, Any?>.toFloat(): DataFrame<T> = to<Float?>()
311+
312+
@JvmName("toBigDecimalTAny")
313+
public fun <T> Convert<T, Any>.toBigDecimal(): DataFrame<T> = to<BigDecimal>()
314+
public fun <T> Convert<T, Any?>.toBigDecimal(): DataFrame<T> = to<BigDecimal?>()
315+
316+
@JvmName("toBooleanTAny")
317+
public fun <T> Convert<T, Any>.toBoolean(): DataFrame<T> = to<Boolean>()
318+
public fun <T> Convert<T, Any?>.toBoolean(): DataFrame<T> = to<Boolean?>()
299319

300320
public fun <T, C> Convert<T, List<List<C>>>.toDataFrames(containsColumns: Boolean = false): DataFrame<T> =
301321
to { it.toDataFrames(containsColumns) }
Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,70 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.AnyFrame
4+
import org.jetbrains.kotlinx.dataframe.DataColumn
45
import org.jetbrains.kotlinx.dataframe.DataFrame
6+
import org.jetbrains.kotlinx.dataframe.exceptions.*
57
import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl
8+
import kotlin.reflect.KProperty
69
import kotlin.reflect.KType
710
import kotlin.reflect.typeOf
811

912
public enum class ExtraColumns { Remove, Keep, Fail }
1013

14+
public interface ConvertSchemaDsl<in T> {
15+
16+
public fun <A, B> convert(from: KType, to: KType, converter: (A) -> B)
17+
}
18+
19+
/**
20+
* Defines how to convert `String` values into given type [C].
21+
*/
22+
public inline fun <reified C> ConvertSchemaDsl<*>.parser(noinline parser: (String) -> C): Unit = convert<String>().with(parser)
23+
24+
/**
25+
* Defines how to convert values of given type [C]
26+
*/
27+
public inline fun <reified C> ConvertSchemaDsl<*>.convert(): ConvertType<C> = ConvertType(this, typeOf<C>())
28+
29+
/**
30+
* Defines how to convert values of type [C] into type [R]
31+
*/
32+
public inline fun <C, reified R> ConvertType<C>.with(noinline converter: (C) -> R): Unit = dsl.convert(from, typeOf<R>(), converter)
33+
34+
public class ConvertType<T>(
35+
@PublishedApi internal val dsl: ConvertSchemaDsl<*>,
36+
@PublishedApi internal val from: KType,
37+
internal val property: KProperty<T>? = null
38+
)
39+
1140
// region DataFrame
1241

13-
public inline fun <reified T> AnyFrame.convertTo(extraColumnsBehavior: ExtraColumns = ExtraColumns.Keep): DataFrame<T> = convertTo(typeOf<T>(), extraColumnsBehavior).cast()
42+
/**
43+
* Converts values in [DataFrame] to match given column schema [T].
44+
*
45+
* Original columns are mapped to destination columns by column [path][DataColumn.path].
46+
*
47+
* Type converters for every column are selected automatically. See [convert] operation for details.
48+
*
49+
* To specify custom type converters for the particular types use [ConvertSchemaDsl].
50+
*
51+
* @param [T] class that defines target schema for conversion.
52+
* @param [excessiveColumnsBehavior] how to handle excessive columns in the original [DataFrame].
53+
* @throws [ColumnNotFoundException] if [DataFrame] doesn't contain columns that are required by destination schema.
54+
* @throws [ExcessiveColumnsException] if [DataFrame] contains columns that are not required by destination schema and [excessiveColumnsBehavior] is set to [ExtraColumns.Fail].
55+
* @throws [TypeConverterNotFoundException] if suitable type converter for some column was not found.
56+
* @throws [TypeConversionException] if type converter failed to convert column values.
57+
* @return converted [DataFrame].
58+
*/
59+
public inline fun <reified T : Any> AnyFrame.convertTo(
60+
excessiveColumnsBehavior: ExtraColumns = ExtraColumns.Keep,
61+
noinline body: ConvertSchemaDsl<T>.() -> Unit = {}
62+
): DataFrame<T> = convertTo(typeOf<T>(), excessiveColumnsBehavior, body).cast()
1463

15-
public fun AnyFrame.convertTo(schemaType: KType, extraColumnsBehavior: ExtraColumns = ExtraColumns.Keep): AnyFrame = convertToImpl(schemaType, true, extraColumnsBehavior)
64+
public fun AnyFrame.convertTo(
65+
schemaType: KType,
66+
extraColumnsBehavior: ExtraColumns = ExtraColumns.Keep,
67+
body: ConvertSchemaDsl<Any>.() -> Unit = {}
68+
): AnyFrame = convertToImpl(schemaType, true, extraColumnsBehavior, body)
1669

1770
// endregion
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
package org.jetbrains.kotlinx.dataframe.exceptions
2+
3+
public class ColumnNotFoundException(public val columnName: String, public override val message: String) : RuntimeException()
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package org.jetbrains.kotlinx.dataframe.exceptions
2+
3+
public class ExcessiveColumnsException(public val columns: List<String>) : RuntimeException() {
4+
5+
override val message: String
6+
get() = "Excess columns in DataFrame: $columns"
7+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package org.jetbrains.kotlinx.dataframe.exceptions
2+
3+
import kotlin.reflect.*
4+
5+
public class TypeConversionException(public val value: Any?, public val from: KType, public val to: KType) : RuntimeException() {
6+
7+
override val message: String
8+
get() = "Failed to convert '$value' from $from to $to"
9+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package org.jetbrains.kotlinx.dataframe.exceptions
2+
3+
import kotlin.reflect.*
4+
5+
public class TypeConverterNotFoundException(public val from: KType, public val to: KType) : IllegalArgumentException() {
6+
7+
override val message: String
8+
get() = "Type converter from $from to $to is not found"
9+
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ import org.jetbrains.kotlinx.dataframe.api.to
2525
import org.jetbrains.kotlinx.dataframe.columns.values
2626
import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
2727
import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
28-
import org.jetbrains.kotlinx.dataframe.hasNulls
28+
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
29+
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
2930
import org.jetbrains.kotlinx.dataframe.impl.columns.DataColumnInternal
3031
import org.jetbrains.kotlinx.dataframe.impl.columns.newColumn
3132
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
@@ -59,33 +60,46 @@ internal fun <T, C, R> Convert<T, C>.convertRowColumnImpl(
5960
): DataFrame<T> =
6061
to { col -> df.newColumn(type, col.name, infer) { rowConverter(it, col) } }
6162

62-
internal fun AnyCol.convertToTypeImpl(newType: KType): AnyCol {
63+
internal fun AnyCol.convertToTypeImpl(to: KType): AnyCol {
6364
val from = type
64-
val tartypeOf = newType.withNullability(hasNulls)
65+
66+
val nullsAreAllowed = to.isMarkedNullable
67+
68+
var nullsFound = false
69+
70+
fun Any?.checkNulls() = when {
71+
this != null -> this
72+
nullsAreAllowed -> {
73+
nullsFound = true
74+
null
75+
}
76+
else -> throw TypeConversionException(null, from, to)
77+
}
78+
6579
return when {
66-
from == newType -> this
67-
from.isSubtypeOf(newType) -> (this as DataColumnInternal<*>).changeType(tartypeOf)
68-
else -> when (val converter = getConverter(from, newType)) {
80+
from == to -> this
81+
from.isSubtypeOf(to) -> (this as DataColumnInternal<*>).changeType(to.withNullability(hasNulls()))
82+
else -> when (val converter = getConverter(from, to)) {
6983
null -> when (from.classifier) {
7084
Any::class, Number::class, java.io.Serializable::class -> {
85+
// find converter for every value
7186
val values = values.map {
72-
if (it == null) null else {
87+
it?.let {
7388
val clazz = it.javaClass.kotlin
7489
val type = clazz.createStarProjectedType(false)
75-
val conv = getConverter(type, newType) ?: error("Can't find converter from $type to $newType")
76-
conv(it) ?: error("Can't convert '$it' to '$newType'")
77-
}
90+
val converter = getConverter(type, to) ?: throw TypeConverterNotFoundException(from, to)
91+
converter(it)
92+
}.checkNulls()
7893
}
79-
DataColumn.createValueColumn(name, values, tartypeOf)
94+
DataColumn.createValueColumn(name, values, to.withNullability(nullsFound))
8095
}
81-
else -> error("Can't find converter from $from to $newType")
96+
else -> throw TypeConverterNotFoundException(from, to)
8297
}
8398
else -> {
8499
val values = values.map {
85-
if (it == null) null
86-
else converter(it) ?: error("Can't convert '$it' to $newType")
100+
it?.let { converter(it) }.checkNulls()
87101
}
88-
DataColumn.createValueColumn(name, values, tartypeOf)
102+
DataColumn.createValueColumn(name, values, to.withNullability(nullsFound))
89103
}
90104
}
91105
}
@@ -100,8 +114,8 @@ internal typealias TypeConverter = (Any) -> Any?
100114
internal fun Any.convertTo(type: KType): Any? {
101115
val clazz = javaClass.kotlin
102116
if (clazz.isSubclassOf(type.jvmErasure)) return this
103-
val converter = getConverter(clazz.createStarProjectedType(false), type)
104-
require(converter != null) { "Can not convert `$this` to $type" }
117+
val from = clazz.createStarProjectedType(false)
118+
val converter = getConverter(from, type) ?: throw TypeConverterNotFoundException(from, type)
105119
return converter(this)
106120
}
107121

@@ -125,9 +139,12 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n
125139
toClass.primaryConstructor ?: error("Value type $toClass doesn't have primary constructor")
126140
val underlyingType = constructor.parameters.single().type
127141
val converter = getConverter(from, underlyingType)
128-
?: error("Can't find converter from $underlyingType to $to")
142+
?: throw TypeConverterNotFoundException(from, underlyingType)
129143
return convert<Any> {
130144
val converted = converter(it)
145+
if (converted == null && !underlyingType.isMarkedNullable) {
146+
throw TypeConversionException(it, from, underlyingType)
147+
}
131148
constructor.call(converted)
132149
}
133150
}
@@ -138,12 +155,13 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n
138155
val constructorParameter = constructor.parameters.single()
139156
val underlyingType = constructorParameter.type
140157
val converter = getConverter(underlyingType, to)
141-
?: error("Can't find converter from $underlyingType to $to")
158+
?: throw TypeConverterNotFoundException(underlyingType, to)
142159
val property = fromClass.memberProperties.single { it.name == constructorParameter.name } as kotlin.reflect.KProperty1<Any, *>
143160

144161
return convert<Any> {
145-
val value = property.get(it)!!
146-
converter(value)
162+
property.get(it)?.let {
163+
converter(it)
164+
}
147165
}
148166
}
149167

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,63 @@ package org.jetbrains.kotlinx.dataframe.impl.api
22

33
import org.jetbrains.kotlinx.dataframe.AnyFrame
44
import org.jetbrains.kotlinx.dataframe.DataColumn
5+
import org.jetbrains.kotlinx.dataframe.api.ConvertSchemaDsl
56
import org.jetbrains.kotlinx.dataframe.api.ExtraColumns
7+
import org.jetbrains.kotlinx.dataframe.api.Infer
68
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
79
import org.jetbrains.kotlinx.dataframe.api.convertTo
10+
import org.jetbrains.kotlinx.dataframe.api.map
811
import org.jetbrains.kotlinx.dataframe.api.name
912
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
1013
import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor
1114
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
15+
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
16+
import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException
17+
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
1218
import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
19+
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
1320
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
1421
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
1522
import org.jetbrains.kotlinx.dataframe.kind
1623
import org.jetbrains.kotlinx.dataframe.ncol
1724
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
1825
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
1926
import kotlin.reflect.KType
27+
import kotlin.reflect.full.withNullability
2028
import kotlin.reflect.jvm.jvmErasure
2129

30+
private class Converter(val transform: (Any?) -> Any?, val skipNulls: Boolean)
31+
32+
private class ConvertSchemaDslImpl<T> : ConvertSchemaDsl<T> {
33+
val converters = mutableMapOf<Pair<KType, KType>, Converter>()
34+
35+
override fun <A, B> convert(from: KType, to: KType, converter: (A) -> B) {
36+
converters[from.withNullability(false) to to.withNullability(false)] = Converter(converter as (Any?) -> Any?, !from.isMarkedNullable)
37+
}
38+
39+
fun getConverter(from: KType, to: KType): Converter? {
40+
return converters[from.withNullability(false) to to.withNullability(false)]
41+
}
42+
}
43+
2244
@PublishedApi
23-
internal fun AnyFrame.convertToImpl(type: KType, allowConversion: Boolean, extraColumns: ExtraColumns): AnyFrame {
24-
fun AnyFrame.convertToSchema(schema: DataFrameSchema): AnyFrame {
45+
internal fun AnyFrame.convertToImpl(
46+
type: KType,
47+
allowConversion: Boolean,
48+
extraColumns: ExtraColumns,
49+
body: ConvertSchemaDsl<Any>.() -> Unit = {}
50+
): AnyFrame {
51+
val dsl = ConvertSchemaDslImpl<Any>()
52+
dsl.body()
53+
54+
fun AnyFrame.convertToSchema(schema: DataFrameSchema, path: ColumnPath): AnyFrame {
2555
if (ncol == 0) return schema.createEmptyDataFrame()
2656
var visited = 0
2757
val newColumns = columns().mapNotNull {
2858
val targetColumn = schema.columns[it.name()]
2959
if (targetColumn == null) {
3060
when (extraColumns) {
31-
ExtraColumns.Fail -> throw IllegalArgumentException("Column `${it.name}` is not present in target class")
61+
ExtraColumns.Fail -> throw ExcessiveColumnsException(listOf(it.name))
3262
ExtraColumns.Keep -> it
3363
ExtraColumns.Remove -> null
3464
}
@@ -39,13 +69,20 @@ internal fun AnyFrame.convertToImpl(type: KType, allowConversion: Boolean, extra
3969
targetColumn == currentSchema -> it
4070
!allowConversion -> throw IllegalArgumentException("Column `${it.name}` has type `${it.type()}` that differs from target type `${targetColumn.type}`")
4171
else -> {
72+
val columnPath = path + it.name
4273
when (targetColumn.kind) {
4374
ColumnKind.Value -> {
44-
val tartypeOf = targetColumn.type
45-
require(!it.hasNulls() || tartypeOf.isMarkedNullable) {
46-
"Column `${it.name}` has nulls and can not be converted to non-nullable type `$tartypeOf`"
47-
}
48-
it.convertTo(tartypeOf)
75+
val from = it.type()
76+
val to = targetColumn.type
77+
val converter = dsl.getConverter(from, to)
78+
if (converter != null) {
79+
val nullsAllowed = to.isMarkedNullable
80+
it.map(to, Infer.Nulls) {
81+
val result = if (it != null || !converter.skipNulls) converter.transform(it) else it
82+
if (!nullsAllowed && result == null) throw TypeConversionException(it, from, to)
83+
result
84+
}
85+
} else it.convertTo(to)
4986
}
5087
ColumnKind.Group -> {
5188
require(it.kind == ColumnKind.Group) {
@@ -54,7 +91,7 @@ internal fun AnyFrame.convertToImpl(type: KType, allowConversion: Boolean, extra
5491
val columnGroup = it.asColumnGroup()
5592
DataColumn.createColumnGroup(
5693
it.name(),
57-
columnGroup.convertToSchema((targetColumn as ColumnSchema.Group).schema)
94+
columnGroup.convertToSchema((targetColumn as ColumnSchema.Group).schema, columnPath)
5895
)
5996
}
6097
ColumnKind.Frame -> {
@@ -63,7 +100,7 @@ internal fun AnyFrame.convertToImpl(type: KType, allowConversion: Boolean, extra
63100
}
64101
val frameColumn = it.asAnyFrameColumn()
65102
val frameSchema = (targetColumn as ColumnSchema.Frame).schema
66-
val frames = frameColumn.values().map { it.convertToSchema(frameSchema) }
103+
val frames = frameColumn.values().map { it.convertToSchema(frameSchema, columnPath) }
67104
DataColumn.createFrameColumn(it.name(), frames, schema = lazy { frameSchema })
68105
}
69106
}
@@ -81,5 +118,5 @@ internal fun AnyFrame.convertToImpl(type: KType, allowConversion: Boolean, extra
81118

82119
val clazz = type.jvmErasure
83120
val marker = MarkersExtractor[clazz]
84-
return convertToSchema(marker.schema)
121+
return convertToSchema(marker.schema, emptyPath())
85122
}

0 commit comments

Comments
 (0)