Skip to content

Commit 50fa7d7

Browse files
authored
Merge pull request #207 from nikitinas/convertTo-fill
Support custom filling of missing columns in `convertTo`
2 parents 1c95cf4 + 2b8c5de commit 50fa7d7

File tree

16 files changed

+215
-67
lines changed

16 files changed

+215
-67
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
2929
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
3030
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
3131
import org.jetbrains.kotlinx.dataframe.io.toDataFrame
32+
import org.jetbrains.kotlinx.dataframe.path
3233
import java.math.BigDecimal
3334
import java.net.URL
3435
import java.time.LocalTime
@@ -165,10 +166,10 @@ public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColu
165166
try {
166167
return mapIndexed { row, value ->
167168
currentRow = row
168-
value?.let { parser(value.trim()) ?: throw TypeConversionException(value, typeOf<String>(), typeOf<Double>()) }
169+
value?.let { parser(value.trim()) ?: throw TypeConversionException(value, typeOf<String>(), typeOf<Double>(), path) }
169170
}
170171
} catch (e: TypeConversionException) {
171-
throw CellConversionException(e.value, e.from, e.to, this.name(), currentRow, e)
172+
throw CellConversionException(e.value, e.from, e.to, path, currentRow, e)
172173
}
173174
}
174175

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,64 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.AnyFrame
4+
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
45
import org.jetbrains.kotlinx.dataframe.DataColumn
56
import org.jetbrains.kotlinx.dataframe.DataFrame
7+
import org.jetbrains.kotlinx.dataframe.RowExpression
68
import org.jetbrains.kotlinx.dataframe.exceptions.ColumnNotFoundException
79
import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException
810
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
911
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
12+
import org.jetbrains.kotlinx.dataframe.impl.api.ConvertSchemaDslInternal
1013
import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl
1114
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
1215
import kotlin.reflect.KProperty
1316
import kotlin.reflect.KType
1417
import kotlin.reflect.typeOf
1518

16-
public enum class ExcessiveColumns { Remove, Keep, Fail }
19+
/**
20+
* Specifies how to handle columns in original dataframe that were not mathced to any column in destination dataframe schema.
21+
*/
22+
public enum class ExcessiveColumns {
23+
/**
24+
* Remove excessive columns from resulting dataframe
25+
*/
26+
Remove,
27+
28+
/**
29+
* Keep excessive columns in resulting dataframe
30+
*/
31+
Keep,
32+
33+
/**
34+
* Throw [ExcessiveColumnsException] if any excessive columns were found in the original dataframe
35+
*/
36+
Fail
37+
}
38+
39+
/**
40+
* Holds data context for [fill] operation
41+
*/
42+
public data class ConvertToFill<T, C>(
43+
internal val dsl: ConvertSchemaDsl<T>,
44+
val columns: ColumnsSelector<T, C>
45+
)
1746

1847
/** Provides access to [fromType] and [toSchema] in the flexible [ConvertSchemaDsl.convertIf] method. */
1948
public class ConverterScope(public val fromType: KType, public val toSchema: ColumnSchema)
2049

21-
/** Dsl to define how specific type conversion should occur.
50+
/**
51+
* Dsl to customize column conversion
2252
*
2353
* Example:
2454
* ```kotlin
2555
* df.convertTo<SomeSchema> {
2656
* // defines how to convert Int? -> String
2757
* convert<Int?>().with { it?.toString() ?: "No input given" }
58+
* // defines how to convert String -> SomeType
59+
* parser { SomeType(it) }
60+
* // fill missing column `sum` with expression `a+b`
61+
* fill { sum }.with { a + b }
2862
* }
2963
* ```
3064
*/
@@ -56,6 +90,17 @@ public interface ConvertSchemaDsl<in T> {
5690
)
5791
}
5892

93+
/**
94+
* Defines how to fill specified columns in destination schema that were not found in original dataframe.
95+
* All [fill] operations for missing columns are executed after successful conversion of matched columns, so converted values of matched columns can be safely used in [with] expression.
96+
* @param columns target columns in destination dataframe schema to be filled
97+
*/
98+
public inline fun <T, reified C> ConvertSchemaDsl<T>.fill(noinline columns: ColumnsSelector<T, C>): ConvertToFill<T, C> = ConvertToFill(this, columns)
99+
100+
public fun <T, C> ConvertToFill<T, C>.with(expr: RowExpression<T, C>) {
101+
(dsl as ConvertSchemaDslInternal<T>).fill(columns as ColumnsSelector<*, C>, expr as RowExpression<*, C>)
102+
}
103+
59104
/**
60105
* Defines how to convert `String` values into given type [C].
61106
*/
@@ -95,6 +140,10 @@ public class ConvertType<T>(
95140
* df.convertTo<SomeSchema> {
96141
* // defines how to convert Int? -> String
97142
* convert<Int?>().with { it?.toString() ?: "No input given" }
143+
* // defines how to convert String -> SomeType
144+
* parser { SomeType(it) }
145+
* // fill missing column `sum` with expression `a + b`
146+
* fill { sum }.with { a + b }
98147
* }
99148
* ```
100149
*
@@ -109,8 +158,8 @@ public class ConvertType<T>(
109158
*/
110159
public inline fun <reified T : Any> AnyFrame.convertTo(
111160
excessiveColumnsBehavior: ExcessiveColumns = ExcessiveColumns.Keep,
112-
noinline body: ConvertSchemaDsl<T>.() -> Unit = {},
113-
): DataFrame<T> = convertTo(typeOf<T>(), excessiveColumnsBehavior, body).cast()
161+
noinline body: ConvertSchemaDsl<T>.() -> Unit = {}
162+
): DataFrame<T> = convertToImpl(typeOf<T>(), true, excessiveColumnsBehavior, body).cast()
114163

115164
/**
116165
* Converts values in [DataFrame] to match given column schema [schemaType].
@@ -126,6 +175,10 @@ public inline fun <reified T : Any> AnyFrame.convertTo(
126175
* df.convertTo<SomeSchema> {
127176
* // defines how to convert Int? -> String
128177
* convert<Int?>().with { it?.toString() ?: "No input given" }
178+
* // defines how to convert String -> SomeType
179+
* parser { SomeType(it) }
180+
* // fill missing column `sum` with expression `a+b`
181+
* fill { sum }.with { a + b }
129182
* }
130183
* ```
131184
*

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnPath.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,7 @@ public data class ColumnPath(val path: List<String>) : List<String> by path, Col
5151

5252
override fun toString(): String = path.toString()
5353

54+
public fun joinToString(separator: String = "/"): String = path.joinToString(separator)
55+
5456
override fun <C> get(column: ColumnReference<C>): ColumnAccessor<C> = ColumnAccessorImpl(this + column.path())
5557
}
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
package org.jetbrains.kotlinx.dataframe.exceptions
22

3+
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
34
import kotlin.reflect.KType
45

56
public class CellConversionException(
67
value: Any?,
78
from: KType,
89
to: KType,
9-
public val column: String,
10+
column: ColumnPath,
1011
public val row: Int?,
1112
override val cause: Throwable?
12-
) : TypeConversionException(value, from, to) {
13+
) : TypeConversionException(value, from, to, column) {
1314
override val message: String
1415
get() = "${super.message} in column $column, row $row"
1516
}
Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
package org.jetbrains.kotlinx.dataframe.exceptions
22

3+
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
4+
import kotlin.reflect.*
35
import kotlin.reflect.KType
46

5-
public open class TypeConversionException(public val value: Any?, public val from: KType, public val to: KType) : RuntimeException() {
7+
public open class TypeConversionException(
8+
public val value: Any?,
9+
public val from: KType,
10+
public val to: KType,
11+
public val column: ColumnPath?
12+
) : RuntimeException() {
613

714
override val message: String
8-
get() = "Failed to convert '$value' from $from to $to"
15+
get() = "Failed to convert '$value' from $from to $to" + (column?.let { " in column ${it.joinToString()}" } ?: "")
916
}
Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
package org.jetbrains.kotlinx.dataframe.exceptions
22

3+
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
4+
import kotlin.reflect.*
35
import kotlin.reflect.KType
46

5-
public class TypeConverterNotFoundException(public val from: KType, public val to: KType) : IllegalArgumentException() {
7+
public class TypeConverterNotFoundException(
8+
public val from: KType,
9+
public val to: KType,
10+
public val column: ColumnPath?
11+
) : IllegalArgumentException() {
612

713
override val message: String
8-
get() = "Type converter from $from to $to is not found"
14+
get() = "Type converter from $from to $to is not found" + (column?.let { " for column ${it.joinToString()}" } ?: "")
915
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
3434
import org.jetbrains.kotlinx.dataframe.impl.columns.DataColumnInternal
3535
import org.jetbrains.kotlinx.dataframe.impl.columns.newColumn
3636
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
37+
import org.jetbrains.kotlinx.dataframe.path
3738
import org.jetbrains.kotlinx.dataframe.type
3839
import java.math.BigDecimal
3940
import java.net.URL
@@ -78,8 +79,7 @@ internal fun AnyCol.convertToTypeImpl(to: KType): AnyCol {
7879
nullsFound = true
7980
null
8081
}
81-
82-
else -> throw TypeConversionException(null, from, to)
82+
else -> throw TypeConversionException(null, from, to, path)
8383
}
8484

8585
fun applyConverter(converter: TypeConverter): AnyCol {
@@ -91,7 +91,7 @@ internal fun AnyCol.convertToTypeImpl(to: KType): AnyCol {
9191
}
9292
return DataColumn.createValueColumn(name, values, to.withNullability(nullsFound))
9393
} catch (e: TypeConversionException) {
94-
throw CellConversionException(e.value, e.from, e.to, this.name(), currentRow, e)
94+
throw CellConversionException(e.value, e.from, e.to, path, currentRow, e)
9595
}
9696
}
9797

@@ -107,16 +107,16 @@ internal fun AnyCol.convertToTypeImpl(to: KType): AnyCol {
107107
val clazz = it.javaClass.kotlin
108108
val type = clazz.createStarProjectedType(false)
109109
val converter = getConverter(type, to, ParserOptions(locale = Locale.getDefault()))
110-
?: throw TypeConverterNotFoundException(from, to)
110+
?: throw TypeConverterNotFoundException(from, to, path)
111111
converter(it)
112112
}.checkNulls()
113113
}
114114
DataColumn.createValueColumn(name, values, to.withNullability(nullsFound))
115115
}
116-
else -> throw TypeConverterNotFoundException(from, to)
116+
else -> throw TypeConverterNotFoundException(from, to, path)
117117
}
118118
} catch (e: TypeConversionException) {
119-
throw CellConversionException(e.value, e.from, e.to, this.name(), currentRow, e)
119+
throw CellConversionException(e.value, e.from, e.to, path, currentRow, e)
120120
}
121121
}
122122

@@ -146,7 +146,7 @@ internal fun Any.convertTo(type: KType): Any? {
146146
val clazz = javaClass.kotlin
147147
if (clazz.isSubclassOf(type.jvmErasure)) return this
148148
val from = clazz.createStarProjectedType(false)
149-
val converter = getConverter(from, type) ?: throw TypeConverterNotFoundException(from, type)
149+
val converter = getConverter(from, type) ?: throw TypeConverterNotFoundException(from, type, null)
150150
return converter(this)
151151
}
152152

@@ -170,11 +170,11 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n
170170
toClass.primaryConstructor ?: error("Value type $toClass doesn't have primary constructor")
171171
val underlyingType = constructor.parameters.single().type
172172
val converter = getConverter(from, underlyingType)
173-
?: throw TypeConverterNotFoundException(from, underlyingType)
173+
?: throw TypeConverterNotFoundException(from, underlyingType, null)
174174
return convert<Any> {
175175
val converted = converter(it)
176176
if (converted == null && !underlyingType.isMarkedNullable) {
177-
throw TypeConversionException(it, from, underlyingType)
177+
throw TypeConversionException(it, from, underlyingType, null)
178178
}
179179
constructor.call(converted)
180180
}
@@ -217,14 +217,15 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n
217217
val constructorParameter = constructor.parameters.single()
218218
val underlyingType = constructorParameter.type
219219
val converter = getConverter(underlyingType, to)
220-
?: throw TypeConverterNotFoundException(underlyingType, to)
220+
?: throw TypeConverterNotFoundException(underlyingType, to, null)
221221
val property =
222222
fromClass.memberProperties.single { it.name == constructorParameter.name } as kotlin.reflect.KProperty1<Any, *>
223223
if (property.visibility != kotlin.reflect.KVisibility.PUBLIC) {
224224
throw TypeConversionException(
225225
"Not public member property in primary constructor of value type",
226226
from,
227-
to
227+
to,
228+
null
228229
)
229230
}
230231

0 commit comments

Comments
 (0)