Skip to content

Commit 5e014f4

Browse files
committed
ground work for recursively()
1 parent ae2ee7a commit 5e014f4

File tree

34 files changed

+834
-656
lines changed

34 files changed

+834
-656
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,8 @@
11
package org.jetbrains.kotlinx.dataframe
22

3-
import org.jetbrains.kotlinx.dataframe.api.Infer
4-
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
5-
import org.jetbrains.kotlinx.dataframe.api.cast
6-
import org.jetbrains.kotlinx.dataframe.api.concat
7-
import org.jetbrains.kotlinx.dataframe.api.filter
8-
import org.jetbrains.kotlinx.dataframe.api.schema
9-
import org.jetbrains.kotlinx.dataframe.api.take
10-
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
11-
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
12-
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
13-
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
14-
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
15-
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
16-
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
17-
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
18-
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
19-
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
20-
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl
21-
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
22-
import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
23-
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
3+
import org.jetbrains.kotlinx.dataframe.api.*
4+
import org.jetbrains.kotlinx.dataframe.columns.*
5+
import org.jetbrains.kotlinx.dataframe.impl.columns.*
246
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
257
import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
268
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
@@ -54,7 +36,7 @@ public interface DataColumn<out T> : BaseColumn<T> {
5436
values: List<T>,
5537
type: KType,
5638
infer: Infer = Infer.None,
57-
defaultValue: T? = null
39+
defaultValue: T? = null,
5840
): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue)
5941

6042
/**
@@ -67,7 +49,11 @@ public interface DataColumn<out T> : BaseColumn<T> {
6749
* @param values list of column values
6850
* @param infer column type inference mode
6951
*/
70-
public inline fun <reified T> createValueColumn(name: String, values: List<T>, infer: Infer = Infer.None): ValueColumn<T> = createValueColumn(
52+
public inline fun <reified T> createValueColumn(
53+
name: String,
54+
values: List<T>,
55+
infer: Infer = Infer.None,
56+
): ValueColumn<T> = createValueColumn(
7157
name, values,
7258
getValuesType(
7359
values,
@@ -81,17 +67,21 @@ public interface DataColumn<out T> : BaseColumn<T> {
8167
public fun <T> createFrameColumn(
8268
name: String,
8369
df: DataFrame<T>,
84-
startIndices: Iterable<Int>
70+
startIndices: Iterable<Int>,
8571
): FrameColumn<T> =
8672
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })
8773

8874
public fun <T> createFrameColumn(
8975
name: String,
9076
groups: List<DataFrame<T>>,
91-
schema: Lazy<DataFrameSchema>? = null
77+
schema: Lazy<DataFrameSchema>? = null,
9278
): FrameColumn<T> = FrameColumnImpl(name, groups, schema)
9379

94-
public fun <T> createWithTypeInference(name: String, values: List<T>, nullable: Boolean? = null): DataColumn<T> = guessColumnType(name, values, nullable = nullable)
80+
public fun <T> createWithTypeInference(
81+
name: String,
82+
values: List<T>,
83+
nullable: Boolean? = null,
84+
): DataColumn<T> = guessColumnType(name, values, nullable = nullable)
9585

9686
public fun <T> create(name: String, values: List<T>, type: KType, infer: Infer = Infer.None): DataColumn<T> {
9787
return when (type.toColumnKind()) {
@@ -101,7 +91,8 @@ public interface DataColumn<out T> : BaseColumn<T> {
10191
}
10292
}
10393

104-
public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> = create(name, values, typeOf<T>(), infer)
94+
public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> =
95+
create(name, values, typeOf<T>(), infer)
10596

10697
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>())
10798
}
@@ -116,7 +107,16 @@ public interface DataColumn<out T> : BaseColumn<T> {
116107

117108
override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath<T>? = this.addPath()
118109

119-
override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn<T> = super.getValue(thisRef, property) as DataColumn<T>
110+
override fun resolveSingleAfter(
111+
context: ColumnResolutionContext,
112+
conversion: (List<ColumnWithPath<T>>) -> List<ColumnWithPath<@UnsafeVariance T>>,
113+
): ColumnWithPath<T>? = this
114+
.transform(conversion)
115+
.let { it as DataColumn<T> }
116+
.addPath()
117+
118+
override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn<T> =
119+
super.getValue(thisRef, property) as DataColumn<T>
120120

121121
public operator fun iterator(): Iterator<T> = values().iterator()
122122

@@ -133,6 +133,8 @@ public val AnyCol.indices: IntRange get() = indices()
133133

134134
public val AnyCol.type: KType get() = type()
135135
public val AnyCol.kind: ColumnKind get() = kind()
136-
public val AnyCol.typeClass: KClass<*> get() = type.classifier as? KClass<*> ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type")
136+
public val AnyCol.typeClass: KClass<*>
137+
get() = type.classifier as? KClass<*>
138+
?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type")
137139

138140
public fun AnyBaseCol.indices(): IntRange = 0 until size()

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 65 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,37 +10,14 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
1010
import org.jetbrains.kotlinx.dataframe.DataFrame
1111
import org.jetbrains.kotlinx.dataframe.DataRow
1212
import org.jetbrains.kotlinx.dataframe.Predicate
13-
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
14-
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
15-
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
16-
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
17-
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
18-
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
19-
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
20-
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
21-
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
22-
import org.jetbrains.kotlinx.dataframe.columns.renamedReference
23-
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
13+
import org.jetbrains.kotlinx.dataframe.columns.*
2414
import org.jetbrains.kotlinx.dataframe.documentation.AccessApi
2515
import org.jetbrains.kotlinx.dataframe.documentation.ColumnExpression
2616
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
2717
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
28-
import org.jetbrains.kotlinx.dataframe.hasNulls
2918
import org.jetbrains.kotlinx.dataframe.impl.aggregation.toColumns
3019
import org.jetbrains.kotlinx.dataframe.impl.columnName
31-
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList
32-
import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet
33-
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
34-
import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExcept
35-
import org.jetbrains.kotlinx.dataframe.impl.columns.changePath
36-
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnSet
37-
import org.jetbrains.kotlinx.dataframe.impl.columns.getAt
38-
import org.jetbrains.kotlinx.dataframe.impl.columns.getChildrenAt
39-
import org.jetbrains.kotlinx.dataframe.impl.columns.singleImpl
40-
import org.jetbrains.kotlinx.dataframe.impl.columns.top
41-
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
42-
import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle
43-
import org.jetbrains.kotlinx.dataframe.impl.columns.transformWithContext
20+
import org.jetbrains.kotlinx.dataframe.impl.columns.*
4421
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs
4522
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
4623
import kotlin.reflect.KProperty
@@ -736,8 +713,9 @@ public interface ColumnsSelectionDsl<out T> : ColumnSelectionDsl<T>, SingleColum
736713
*/
737714
public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> =
738715
object : ColumnSet<Any?> {
739-
override fun resolve(context: ColumnResolutionContext): List<ColumnWithPath<Any?>> {
740-
val startPath = this@rangeTo.resolveSingle(context)!!.path
716+
717+
private fun process(col: AnyColumnReference, context: ColumnResolutionContext): List<ColumnWithPath<Any?>> {
718+
val startPath = col.resolveSingle(context)!!.path
741719
val endPath = endInclusive.resolveSingle(context)!!.path
742720
val parentPath = startPath.parent()!!
743721
require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" }
@@ -750,6 +728,15 @@ public interface ColumnsSelectionDsl<out T> : ColumnSelectionDsl<T>, SingleColum
750728
}
751729
}
752730
}
731+
732+
override fun resolve(context: ColumnResolutionContext): List<ColumnWithPath<Any?>> =
733+
process(this@rangeTo, context)
734+
735+
override fun resolveAfterTransform(
736+
context: ColumnResolutionContext,
737+
transform: (List<ColumnWithPath<Any?>>) -> List<ColumnWithPath<Any?>>,
738+
): List<ColumnWithPath<Any?>> =
739+
process(this@rangeTo.transform(transform) as AnyColumnReference, context)
753740
}
754741

755742
/**
@@ -3925,16 +3912,32 @@ public interface ColumnsSelectionDsl<out T> : ColumnSelectionDsl<T>, SingleColum
39253912

39263913
// region dfs
39273914

3915+
@Deprecated(
3916+
message = "dfs is deprecated, use recursively instead.",
3917+
replaceWith = ReplaceWith("this.cols(predicate).recursively()"),
3918+
level = DeprecationLevel.WARNING,
3919+
)
39283920
public fun <C> ColumnSet<C>.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<Any?> = dfsInternal(predicate)
39293921

3922+
@Deprecated(
3923+
message = "dfs is deprecated, use recursively instead.",
3924+
replaceWith = ReplaceWith("this.cols(predicate).recursively()"),
3925+
level = DeprecationLevel.WARNING,
3926+
)
39303927
public fun String.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<*> = toColumnAccessor().dfs(predicate)
39313928

3929+
@Deprecated(
3930+
message = "dfs is deprecated, use recursively instead.",
3931+
replaceWith = ReplaceWith("this.cols(predicate).recursively()"),
3932+
level = DeprecationLevel.WARNING,
3933+
)
39323934
public fun <C> KProperty<C>.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<*> =
39333935
toColumnAccessor().dfs(predicate)
39343936

39353937
// endregion
39363938

39373939
// region all
3940+
public fun ColumnSet<*>.all(): ColumnSet<*> = wrap()
39383941

39393942
public fun SingleColumn<*>.all(): ColumnSet<*> = transformSingle { it.children() }
39403943

@@ -3944,11 +3947,26 @@ public interface ColumnsSelectionDsl<out T> : ColumnSelectionDsl<T>, SingleColum
39443947

39453948
// region allDfs
39463949

3950+
@Deprecated(
3951+
message = "allDfs is deprecated, use recursively instead.",
3952+
replaceWith = ReplaceWith("this.allRecursively(includeGroups)"),
3953+
level = DeprecationLevel.WARNING,
3954+
)
39473955
public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet<Any?> =
39483956
if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() }
39493957

3958+
@Deprecated(
3959+
message = "allDfs is deprecated, use recursively instead.",
3960+
replaceWith = ReplaceWith("this.allRecursively(includeGroups)"),
3961+
level = DeprecationLevel.WARNING,
3962+
)
39503963
public fun String.allDfs(includeGroups: Boolean = false): ColumnSet<Any?> = toColumnAccessor().allDfs(includeGroups)
39513964

3965+
@Deprecated(
3966+
message = "allDfs is deprecated, use recursively instead.",
3967+
replaceWith = ReplaceWith("this.allRecursively(includeGroups)"),
3968+
level = DeprecationLevel.WARNING,
3969+
)
39523970
public fun KProperty<*>.allDfs(includeGroups: Boolean = false): ColumnSet<Any?> =
39533971
toColumnAccessor().allDfs(includeGroups)
39543972

@@ -4140,7 +4158,8 @@ public interface ColumnsSelectionDsl<out T> : ColumnSelectionDsl<T>, SingleColum
41404158
public fun <C> ColumnSet<C>.except(vararg other: ColumnSet<*>): ColumnSet<*> = except(other.toColumnSet())
41414159
public fun <C> ColumnSet<C>.except(vararg other: String): ColumnSet<*> = except(other.toColumnSet())
41424160

4143-
public fun <C> ColumnSet<C?>.withoutNulls(): ColumnSet<C> = transform { it.filter { !it.hasNulls } } as ColumnSet<C>
4161+
public fun <C> ColumnSet<C?>.withoutNulls(): ColumnSet<C> =
4162+
transform { it.filter { !it.hasNulls() } } as ColumnSet<C>
41444163

41454164
public infix fun <C> ColumnSet<C>.except(other: ColumnSet<*>): ColumnSet<*> =
41464165
createColumnSet { resolve(it).allColumnsExcept(other.resolve(it)) }
@@ -4338,12 +4357,30 @@ internal fun <T, C> ColumnsSelector<T, C>.filter(predicate: (ColumnWithPath<C>)
43384357
internal fun ColumnSet<*>.colsInternal(predicate: ColumnFilter<*>) =
43394358
transform { it.flatMap { it.children().filter { predicate(it) } } }
43404359

4360+
@Deprecated("Replaced with recursively()")
43414361
internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) =
43424362
transform { it.filter { it.isColumnGroup() }.flatMap { it.children().dfs().filter(predicate) } }
43434363

4364+
@Deprecated(
4365+
message = "Use recursively() instead",
4366+
replaceWith = ReplaceWith(
4367+
"this.colsOf(type, predicate).recursively()",
4368+
"org.jetbrains.kotlinx.dataframe.columns.recursively",
4369+
"org.jetbrains.kotlinx.dataframe.columns.recursively",
4370+
"org.jetbrains.kotlinx.dataframe.api.colsOf",
4371+
),
4372+
)
43444373
public fun <C> ColumnSet<*>.dfsOf(type: KType, predicate: (ColumnWithPath<C>) -> Boolean = { true }): ColumnSet<*> =
43454374
dfsInternal { it.isSubtypeOf(type) && predicate(it.cast()) }
43464375

4376+
@Deprecated(
4377+
message = "Use recursively() instead",
4378+
replaceWith = ReplaceWith(
4379+
"this.colsOf<C>(filter).recursively()",
4380+
"org.jetbrains.kotlinx.dataframe.columns.recursively",
4381+
"org.jetbrains.kotlinx.dataframe.api.colsOf",
4382+
),
4383+
)
43474384
public inline fun <reified C> ColumnSet<*>.dfsOf(noinline filter: (ColumnWithPath<C>) -> Boolean = { true }): ColumnSet<C> =
43484385
dfsOf(typeOf<C>(), filter) as ColumnSet<C>
43494386

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,7 @@ package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.ColumnsContainer
44
import org.jetbrains.kotlinx.dataframe.DataFrame
5-
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
6-
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
7-
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
8-
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
9-
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
5+
import org.jetbrains.kotlinx.dataframe.columns.*
106
import org.jetbrains.kotlinx.dataframe.impl.api.joinImpl
117
import kotlin.reflect.KProperty
128

@@ -112,6 +108,13 @@ public class ColumnMatch<C>(public val left: ColumnReference<C>, public val righ
112108
override fun resolve(context: ColumnResolutionContext): List<ColumnWithPath<C>> {
113109
throw UnsupportedOperationException()
114110
}
111+
112+
override fun resolveAfterTransform(
113+
context: ColumnResolutionContext,
114+
transform: (List<ColumnWithPath<C>>) -> List<ColumnWithPath<C>>,
115+
): List<ColumnWithPath<C>> {
116+
throw UnsupportedOperationException()
117+
}
115118
}
116119

117120
public typealias JoinColumnsSelector<A, B> = JoinDsl<A, B>.(ColumnsContainer<A>) -> ColumnSet<*>
Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
package org.jetbrains.kotlinx.dataframe.columns
22

3-
import org.jetbrains.kotlinx.dataframe.AnyFrame
4-
import org.jetbrains.kotlinx.dataframe.AnyRow
5-
import org.jetbrains.kotlinx.dataframe.DataColumn
6-
import org.jetbrains.kotlinx.dataframe.DataFrame
7-
import org.jetbrains.kotlinx.dataframe.DataRow
3+
import org.jetbrains.kotlinx.dataframe.*
4+
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
85
import org.jetbrains.kotlinx.dataframe.api.name
6+
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
97
import org.jetbrains.kotlinx.dataframe.impl.columnName
108
import org.jetbrains.kotlinx.dataframe.impl.columns.RenamedColumnReference
119
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
1210
import org.jetbrains.kotlinx.dataframe.impl.columns.getColumn
11+
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
1312
import kotlin.reflect.KProperty
1413

1514
/**
@@ -20,7 +19,8 @@ import kotlin.reflect.KProperty
2019
*/
2120
public interface ColumnReference<out C> : SingleColumn<C> {
2221

23-
public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnReference<C> = renamedReference(property.columnName)
22+
public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnReference<C> =
23+
renamedReference(property.columnName)
2424

2525
public fun name(): String
2626

@@ -32,13 +32,28 @@ public interface ColumnReference<out C> : SingleColumn<C> {
3232

3333
public fun getValueOrNull(row: AnyRow): C? = resolveFor(row.df())?.get(row.index())
3434

35-
override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath<C>? {
36-
return context.df.getColumn<C>(path(), context.unresolvedColumnsPolicy)?.addPath(path())
37-
}
35+
override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath<C>? =
36+
context.df
37+
.getColumn<C>(path(), context.unresolvedColumnsPolicy)
38+
?.addPath(path())
39+
40+
override fun resolveSingleAfter(
41+
context: ColumnResolutionContext,
42+
conversion: (List<ColumnWithPath<C>>) -> List<ColumnWithPath<@UnsafeVariance C>>,
43+
): ColumnWithPath<C>? =
44+
context.df
45+
.asColumnGroup()
46+
.transform { conversion(it as List<ColumnWithPath<C>>) }
47+
.resolve(context)
48+
.toDataFrame()
49+
.getColumn<C>(path(), context.unresolvedColumnsPolicy)
50+
?.addPath(path())
3851
}
3952

40-
internal fun <C> ColumnReference<C>.renamedReference(newName: String): ColumnReference<C> = RenamedColumnReference(this, newName)
53+
internal fun <C> ColumnReference<C>.renamedReference(newName: String): ColumnReference<C> =
54+
RenamedColumnReference(this, newName)
4155

4256
internal fun ColumnReference<*>.shortPath() = ColumnPath(name)
4357

44-
internal fun <C> ColumnReference<C>.resolveFor(df: AnyFrame): ColumnWithPath<C>? = resolveSingle(ColumnResolutionContext(df, UnresolvedColumnsPolicy.Skip))
58+
internal fun <C> ColumnReference<C>.resolveFor(df: AnyFrame): ColumnWithPath<C>? =
59+
resolveSingle(ColumnResolutionContext(df, UnresolvedColumnsPolicy.Skip))

0 commit comments

Comments
 (0)