Skip to content

Commit 6257027

Browse files
committed
Add infer argument to map, convert and column API.
1 parent 82bb7c5 commit 6257027

File tree

13 files changed

+93
-68
lines changed

13 files changed

+93
-68
lines changed

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,17 @@ public fun <T> ColumnGroupReference.column(): ColumnDelegate<T> = ColumnDelegate
3636
public fun <T> ColumnGroupReference.column(name: String): ColumnAccessor<T> = ColumnAccessorImpl(path() + name)
3737
public fun <T> ColumnGroupReference.column(path: ColumnPath): ColumnAccessor<T> = ColumnAccessorImpl(this.path() + path)
3838

39-
public inline fun <reified T> column(name: String = "", noinline expression: RowExpression<Any?, T>): ColumnReference<T> = createComputedColumnReference(name, typeOf<T>(), expression)
40-
public inline fun <T, reified C> column(df: DataFrame<T>, name: String = "", noinline expression: RowExpression<T, C>): ColumnReference<C> = createComputedColumnReference(name, typeOf<C>(), expression as RowExpression<Any?, C>)
39+
public inline fun <reified T> column(
40+
name: String = "",
41+
infer: Infer = Infer.Nulls,
42+
noinline expression: RowExpression<Any?, T>
43+
): ColumnReference<T> = createComputedColumnReference(name, typeOf<T>(), infer, expression)
44+
public inline fun <T, reified C> column(
45+
df: DataFrame<T>,
46+
name: String = "",
47+
infer: Infer = Infer.Nulls,
48+
noinline expression: RowExpression<T, C>
49+
): ColumnReference<C> = createComputedColumnReference(name, typeOf<C>(), infer, expression as RowExpression<Any?, C>)
4150

4251
public fun columnGroup(): ColumnDelegate<AnyRow> = column()
4352
public fun columnGroup(name: String): ColumnAccessor<AnyRow> = column(name)

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,26 @@ public fun <T, C> DataFrame<T>.convert(vararg columns: ColumnReference<C>): Conv
4747
public inline fun <T, C, reified R> DataFrame<T>.convert(
4848
firstCol: ColumnReference<C>,
4949
vararg cols: ColumnReference<C>,
50+
infer: Infer = Infer.Nulls,
5051
noinline expression: RowValueExpression<T, C, R>
5152
): DataFrame<T> =
52-
convert(*headPlusArray(firstCol, cols)).with(inferType = false, expression)
53+
convert(*headPlusArray(firstCol, cols)).with(infer, expression)
5354

5455
public inline fun <T, C, reified R> DataFrame<T>.convert(
5556
firstCol: KProperty<C>,
5657
vararg cols: KProperty<C>,
58+
infer: Infer = Infer.Nulls,
5759
noinline expression: RowValueExpression<T, C, R>
5860
): DataFrame<T> =
59-
convert(*headPlusArray(firstCol, cols)).with(inferType = false, expression)
61+
convert(*headPlusArray(firstCol, cols)).with(infer, expression)
6062

6163
public inline fun <T, reified R> DataFrame<T>.convert(
6264
firstCol: String,
6365
vararg cols: String,
66+
infer: Infer = Infer.Nulls,
6467
noinline expression: RowValueExpression<T, Any?, R>
6568
): DataFrame<T> =
66-
convert(*headPlusArray(firstCol, cols)).with(inferType = false, expression)
69+
convert(*headPlusArray(firstCol, cols)).with(infer, expression)
6770

6871
public inline fun <T, C, reified R> Convert<T, C?>.notNull(crossinline expression: RowValueExpression<T, C, R>): DataFrame<T> =
6972
with {
@@ -80,17 +83,16 @@ public data class Convert<T, C>(val df: DataFrame<T>, val columns: ColumnsSelect
8083
public fun <T> Convert<T, *>.to(type: KType): DataFrame<T> = to { it.convertTo(type) }
8184

8285
public inline fun <T, C, reified R> Convert<T, C>.with(
83-
// TODO: replace with `Infer`
84-
inferType: Boolean = false,
86+
infer: Infer = Infer.Nulls,
8587
noinline rowConverter: RowValueExpression<T, C, R>
8688
): DataFrame<T> =
87-
withRowCellImpl(if (inferType) null else typeOf<R>(), rowConverter)
89+
withRowCellImpl(typeOf<R>(), infer, rowConverter)
8890

8991
public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
90-
inferType: Boolean = false,
92+
infer: Infer = Infer.Nulls,
9193
noinline expression: RowColumnExpression<T, C, R>
9294
): DataFrame<T> =
93-
convertRowColumnImpl(if (inferType) null else typeOf<R>(), expression)
95+
convertRowColumnImpl(typeOf<R>(), infer, expression)
9496

9597
public fun <T, C> Convert<T, C>.to(columnConverter: DataFrame<T>.(DataColumn<C>) -> AnyCol): DataFrame<T> =
9698
df.replace(columns).with { columnConverter(df, it) }

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,24 @@ import org.jetbrains.kotlinx.dataframe.Selector
99
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
1010
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1111
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
12-
import org.jetbrains.kotlinx.dataframe.columns.size
13-
import org.jetbrains.kotlinx.dataframe.columns.values
1412
import org.jetbrains.kotlinx.dataframe.impl.columnName
1513
import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference
1614
import org.jetbrains.kotlinx.dataframe.impl.columns.newColumn
17-
import org.jetbrains.kotlinx.dataframe.impl.columns.newColumnWithActualType
1815
import kotlin.reflect.KProperty
1916
import kotlin.reflect.KType
2017
import kotlin.reflect.typeOf
2118

2219
// region ColumnReference
2320

24-
public inline fun <C, reified R> ColumnReference<C>.map(noinline transform: (C) -> R): ColumnReference<R> =
25-
map(typeOf<R>(), transform)
26-
27-
public fun <C, R> ColumnReference<C>.map(tartypeOf: KType?, transform: (C) -> R): ColumnReference<R> =
28-
createComputedColumnReference(this.name, tartypeOf) { transform(this@map()) }
21+
public inline fun <C, reified R> ColumnReference<C>.map(infer: Infer = Infer.Nulls, noinline transform: (C) -> R): ColumnReference<R> =
22+
createComputedColumnReference(name(), typeOf<R>(), infer) { transform(this@map()) }
2923

3024
// endregion
3125

3226
// region DataColumn
3327

3428
public inline fun <T, reified R> DataColumn<T>.map(
35-
infer: Infer = if (typeOf<R>().isMarkedNullable) Infer.Nulls else Infer.None,
29+
infer: Infer = Infer.Nulls,
3630
crossinline transform: (T) -> R
3731
): DataColumn<R> {
3832
val newValues = Array(size()) { transform(get(it)) }.asList()
@@ -41,7 +35,7 @@ public inline fun <T, reified R> DataColumn<T>.map(
4135

4236
public fun <T, R> DataColumn<T>.mapTo(
4337
type: KType,
44-
infer: Infer = if (type.isMarkedNullable) Infer.Nulls else Infer.None,
38+
infer: Infer = Infer.Nulls,
4539
transform: (T) -> R
4640
): DataColumn<R> {
4741
val values = Array<Any?>(size()) { transform(get(it)) }.asList()
@@ -62,11 +56,7 @@ public inline fun <T, reified R> ColumnsContainer<T>.map(
6256
name: String,
6357
infer: Infer = Infer.Nulls,
6458
noinline body: AddExpression<T, R>
65-
): DataColumn<R> = when (infer) {
66-
Infer.Type -> newColumnWithActualType(name, body)
67-
Infer.Nulls -> newColumn(typeOf<R>(), name, true, body)
68-
Infer.None -> newColumn(typeOf<R>(), name, false, body)
69-
}
59+
): DataColumn<R> = newColumn(typeOf<R>(), name, infer, body)
7060

7161
public inline fun <T, reified R> ColumnsContainer<T>.map(
7262
column: ColumnAccessor<R>,

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,22 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1010
import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl
1111
import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
1212
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
13+
import org.jetbrains.kotlinx.dataframe.impl.getListType
1314
import org.jetbrains.kotlinx.dataframe.impl.nameGenerator
1415
import kotlin.reflect.KProperty
16+
import kotlin.reflect.KType
17+
import kotlin.reflect.typeOf
1518

1619
public fun <T, C> DataFrame<T>.merge(selector: ColumnsSelector<T, C>): Merge<T, C, List<C>> =
17-
Merge(this, selector, false, { it })
20+
Merge(this, selector, false, { it }, typeOf<Any?>(), Infer.Type)
1821

1922
public fun <T> DataFrame<T>.merge(vararg columns: String): Merge<T, Any?, List<Any?>> =
2023
merge { columns.toColumns() }
2124

22-
public fun <T, C> DataFrame<T>.merge(vararg columns: ColumnReference<C>): Merge<T, C, List<C>> =
25+
public inline fun <T, reified C> DataFrame<T>.merge(vararg columns: ColumnReference<C>): Merge<T, C, List<C>> =
2326
merge { columns.toColumns() }
2427

25-
public fun <T, C> DataFrame<T>.merge(vararg columns: KProperty<C>): Merge<T, C, List<C>> =
28+
public inline fun <T, reified C> DataFrame<T>.merge(vararg columns: KProperty<C>): Merge<T, C, List<C>> =
2629
merge { columns.toColumns() }
2730

2831
public data class Merge<T, C, R>(
@@ -34,12 +37,16 @@ public data class Merge<T, C, R>(
3437
internal val notNull: Boolean,
3538
@PublishedApi
3639
internal val transform: DataRow<T>.(List<C>) -> R,
40+
@PublishedApi
41+
internal val resultType: KType,
42+
@PublishedApi
43+
internal val infer: Infer,
3744
)
3845

3946
public fun <T, C, R> Merge<T, C, R>.notNull(): Merge<T, C, R> = copy(notNull = true)
4047

4148
public fun <T, C, R> Merge<T, C, R>.into(columnName: String): DataFrame<T> = into(pathOf(columnName))
42-
public fun <T, C, R> Merge<T, C, R>.into(column: ColumnAccessor<R>): DataFrame<T> = into(column.path())
49+
public fun <T, C, R> Merge<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> = into(column.path())
4350

4451
public fun <T, C, R> Merge<T, C, R>.intoList(): List<R> =
4552
df.select(selector).rows().map { transform(it, it.values() as List<C>) }
@@ -51,7 +58,7 @@ public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> {
5158
// move columns into group
5259
val grouped = df.move(selector).under { mergePath }
5360

54-
var res = grouped.convert { getColumnGroup(mergePath) }.withRowCellImpl(null) {
61+
var res = grouped.convert { getColumnGroup(mergePath) }.withRowCellImpl(resultType, infer) {
5562
val srcRow = df[index()]
5663
var values = it.values() as List<C>
5764
if (notNull) {
@@ -77,15 +84,22 @@ public fun <T, C, R> Merge<T, C, R>.by(
7784
limit: Int = -1,
7885
truncated: CharSequence = "..."
7986
): Merge<T, C, String> =
80-
Merge(df, selector, notNull) {
81-
it.joinToString(
82-
separator = separator,
83-
prefix = prefix,
84-
postfix = postfix,
85-
limit = limit,
86-
truncated = truncated
87-
)
88-
}
87+
Merge(
88+
df, selector, notNull,
89+
transform = {
90+
it.joinToString(
91+
separator = separator,
92+
prefix = prefix,
93+
postfix = postfix,
94+
limit = limit,
95+
truncated = truncated
96+
)
97+
},
98+
typeOf<String>(), Infer.Nulls
99+
)
89100

90-
public inline fun <T, C, R, reified V> Merge<T, C, R>.by(crossinline transform: DataRow<T>.(R) -> V): Merge<T, C, V> =
91-
Merge(df, selector, notNull) { transform(this@by.transform(this, it)) }
101+
public inline fun <T, C, R, reified V> Merge<T, C, R>.by(
102+
infer: Infer = Infer.Nulls,
103+
crossinline transform: DataRow<T>.(R) -> V
104+
): Merge<T, C, V> =
105+
Merge(df, selector, notNull, { transform(this@by.transform(this, it)) }, typeOf<V>(), infer)

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import org.jetbrains.kotlinx.dataframe.impl.asList
1616
import org.jetbrains.kotlinx.dataframe.impl.columnName
1717
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet
1818
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
19-
import org.jetbrains.kotlinx.dataframe.impl.createTypeWithArgument
19+
import org.jetbrains.kotlinx.dataframe.impl.getListType
2020
import kotlin.reflect.KProperty
2121
import kotlin.reflect.KType
2222
import kotlin.reflect.typeOf
@@ -274,8 +274,8 @@ public inline fun <T, C : Iterable<R>, reified R> Split<T, C>.intoRows(dropEmpty
274274
public fun <T, C : AnyFrame> Split<T, C>.intoRows(dropEmpty: Boolean = true): DataFrame<T> =
275275
by { it.rows() }.intoRows(dropEmpty)
276276

277-
internal fun <T, C, R> Convert<T, C?>.splitInplace(tartypeOf: KType, transform: DataRow<T>.(C) -> Iterable<R>) =
278-
withRowCellImpl(List::class.createTypeWithArgument(tartypeOf)) { if (it == null) emptyList() else transform(it).asList() }
277+
internal fun <T, C, R> Convert<T, C?>.splitInplace(type: KType, transform: DataRow<T>.(C) -> Iterable<R>) =
278+
withRowCellImpl(getListType(type), Infer.None) { if (it == null) emptyList() else transform(it).asList() }
279279

280280
public fun <T, C, R> SplitWithTransform<T, C, R>.intoRows(dropEmpty: Boolean = true): DataFrame<T> {
281281
val paths = df.getColumnPaths(columns).toColumnSet()

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Cache.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ import kotlin.reflect.KType
44

55
private val listTypes = mutableMapOf<KType, KType>()
66

7+
@PublishedApi
78
internal fun getListType(valueType: KType): KType = listTypes.getOrPut(valueType) { List::class.createTypeWithArgument(valueType) }

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
1616
import org.jetbrains.kotlinx.dataframe.RowColumnExpression
1717
import org.jetbrains.kotlinx.dataframe.RowValueExpression
1818
import org.jetbrains.kotlinx.dataframe.api.Convert
19+
import org.jetbrains.kotlinx.dataframe.api.Infer
1920
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
2021
import org.jetbrains.kotlinx.dataframe.api.name
2122
import org.jetbrains.kotlinx.dataframe.api.to
@@ -39,15 +40,20 @@ import kotlin.reflect.full.withNullability
3940
import kotlin.reflect.jvm.jvmErasure
4041

4142
@PublishedApi
42-
internal fun <T, C, R> Convert<T, C>.withRowCellImpl(type: KType?, rowConverter: RowValueExpression<T, C, R>): DataFrame<T> =
43-
to { col -> df.newColumn(type, col.name) { rowConverter(it, it[col]) } }
43+
internal fun <T, C, R> Convert<T, C>.withRowCellImpl(
44+
type: KType,
45+
infer: Infer,
46+
rowConverter: RowValueExpression<T, C, R>
47+
): DataFrame<T> =
48+
to { col -> df.newColumn(type, col.name, infer) { rowConverter(it, it[col]) } }
4449

4550
@PublishedApi
4651
internal fun <T, C, R> Convert<T, C>.convertRowColumnImpl(
47-
type: KType?,
52+
type: KType,
53+
infer: Infer,
4854
rowConverter: RowColumnExpression<T, C, R>
4955
): DataFrame<T> =
50-
to { col -> df.newColumn(type, col.name) { rowConverter(it, col) } }
56+
to { col -> df.newColumn(type, col.name, infer) { rowConverter(it, col) } }
5157

5258
internal fun AnyCol.convertToTypeImpl(newType: KType): AnyCol {
5359
val from = type

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ComputedColumnReference.kt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,30 @@ package org.jetbrains.kotlinx.dataframe.impl.columns
22

33
import org.jetbrains.kotlinx.dataframe.AnyRow
44
import org.jetbrains.kotlinx.dataframe.RowExpression
5+
import org.jetbrains.kotlinx.dataframe.api.Infer
56
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
67
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
78
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
89
import kotlin.reflect.KType
910

1011
internal class ComputedColumnReference<R>(
1112
val name: String,
12-
val type: KType?,
13+
val type: KType,
14+
val infer: Infer,
1315
val compute: RowExpression<Any?, R>
1416
) :
1517
ColumnReference<R> {
1618

1719
override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath<R> {
18-
return context.df.newColumn(type, name) { compute(it, it) }.addPath(context.df)
20+
return context.df.newColumn(type, name, infer) { compute(it, it) }.addPath(context.df)
1921
}
2022

2123
override fun name() = name
2224

23-
override fun rename(newName: String) = ComputedColumnReference(newName, type, compute)
25+
override fun rename(newName: String) = ComputedColumnReference(newName, type, infer, compute)
2426

2527
override fun getValue(row: AnyRow) = compute(row, row)
2628
}
2729

2830
@PublishedApi
29-
internal fun <R> createComputedColumnReference(name: String, type: KType?, compute: RowExpression<Any?, R>): ColumnReference<R> = ComputedColumnReference(name, type, compute)
31+
internal fun <R> createComputedColumnReference(name: String, type: KType, infer: Infer, compute: RowExpression<Any?, R>): ColumnReference<R> = ComputedColumnReference(name, type, infer, compute)

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,16 @@ import kotlin.reflect.full.withNullability
4242

4343
@PublishedApi
4444
internal fun <T, R> ColumnsContainer<T>.newColumn(
45-
type: KType?,
45+
type: KType,
4646
name: String = "",
47-
inferNulls: Boolean = true,
47+
infer: Infer = Infer.Nulls,
4848
expression: AddExpression<T, R>
4949
): DataColumn<R> {
5050
val (nullable, values) = computeValues(this as DataFrame<T>, expression)
51-
if (type == null) return guessColumnType(name, values)
52-
return when (type.classifier) {
53-
DataFrame::class -> DataColumn.createFrameColumn(name, values as List<AnyFrame>) as DataColumn<R>
54-
DataRow::class -> DataColumn.createColumnGroup(name, (values as List<AnyRow>).concat()) as DataColumn<R>
55-
else -> DataColumn.createValueColumn(name, values, if (inferNulls) type.withNullability(nullable) else type)
51+
return when (infer) {
52+
Infer.Nulls -> DataColumn.create(name, values, type.withNullability(nullable), Infer.None)
53+
Infer.Type -> DataColumn.createWithTypeInference(name, values, nullable)
54+
Infer.None -> DataColumn.create(name, values, type, Infer.None)
5655
}
5756
}
5857

src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class InferTypeTests {
2222
fun `infer type with argument`() {
2323
val col by columnOf(1)
2424
val df = dataFrameOf(col)
25-
val converted = df.convert(col).with(inferType = false) {
25+
val converted = df.convert(col).with(Infer.None) {
2626
B(it) as A<Int>
2727
}
2828
converted[col].type() shouldBe typeOf<A<Int>>()

0 commit comments

Comments
 (0)