|
| 1 | +package org.jetbrains.kotlinx.dataframe |
| 2 | + |
| 3 | +import org.jetbrains.kotlinx.dataframe.api.Infer |
| 4 | +import org.jetbrains.kotlinx.dataframe.api.asDataColumn |
| 5 | +import org.jetbrains.kotlinx.dataframe.api.cast |
| 6 | +import org.jetbrains.kotlinx.dataframe.api.concat |
| 7 | +import org.jetbrains.kotlinx.dataframe.api.filter |
| 8 | +import org.jetbrains.kotlinx.dataframe.api.schema |
| 9 | +import org.jetbrains.kotlinx.dataframe.api.take |
| 10 | +import org.jetbrains.kotlinx.dataframe.columns.BaseColumn |
| 11 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup |
| 12 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind |
| 13 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnPath |
| 14 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext |
| 15 | +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath |
| 16 | +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn |
| 17 | +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn |
| 18 | +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl |
| 19 | +import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl |
| 20 | +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl |
| 21 | +import org.jetbrains.kotlinx.dataframe.impl.columns.addPath |
| 22 | +import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType |
| 23 | +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind |
| 24 | +import org.jetbrains.kotlinx.dataframe.impl.getValuesType |
| 25 | +import org.jetbrains.kotlinx.dataframe.impl.splitByIndices |
| 26 | +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema |
| 27 | +import kotlin.reflect.KClass |
| 28 | +import kotlin.reflect.KProperty |
| 29 | +import kotlin.reflect.KType |
| 30 | +import kotlin.reflect.typeOf |
| 31 | + |
| 32 | +/** |
| 33 | + * Column with [name] and [values] of specific [type]. |
| 34 | + * |
| 35 | + * Base interface for [ValueColumn] and [FrameColumn], but not for [ColumnGroup]. However, implementations for all three [column kinds][ColumnKind] derive from DataColumn and can cast to it safely. |
| 36 | + * Column operations that have signature clash with [DataFrame] API ([filter], [take], [map] etc.) are defined for [DataColumn] and not for [BaseColumn]. |
| 37 | + * |
| 38 | + * @param T type of values in the column. |
| 39 | + */ |
| 40 | +public interface DataColumn<out T> : BaseColumn<T> { |
| 41 | + |
| 42 | + public companion object { |
| 43 | + |
| 44 | + /** |
| 45 | + * Creates [ValueColumn] using given [name], [values] and [type]. |
| 46 | + * |
| 47 | + * @param name name of the column |
| 48 | + * @param values list of column values |
| 49 | + * @param type type of the column |
| 50 | + * @param infer column type inference mode |
| 51 | + */ |
| 52 | + public fun <T> createValueColumn( |
| 53 | + name: String, |
| 54 | + values: List<T>, |
| 55 | + type: KType, |
| 56 | + infer: Infer = Infer.None, |
| 57 | + defaultValue: T? = null |
| 58 | + ): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue) |
| 59 | + |
| 60 | + /** |
| 61 | + * Creates [ValueColumn] using given [name], [values] and reified column [type]. |
| 62 | + * |
| 63 | + * Note, that column [type] will be defined at compile-time using [T] argument |
| 64 | + * |
| 65 | + * @param T type of the column |
| 66 | + * @param name name of the column |
| 67 | + * @param values list of column values |
| 68 | + * @param infer column type inference mode |
| 69 | + */ |
| 70 | + public inline fun <reified T> createValueColumn(name: String, values: List<T>, infer: Infer = Infer.None): ValueColumn<T> = createValueColumn( |
| 71 | + name, values, |
| 72 | + getValuesType( |
| 73 | + values, |
| 74 | + typeOf<T>(), |
| 75 | + infer |
| 76 | + ) |
| 77 | + ) |
| 78 | + |
| 79 | + public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df) |
| 80 | + |
| 81 | + public fun <T> createFrameColumn( |
| 82 | + name: String, |
| 83 | + df: DataFrame<T>, |
| 84 | + startIndices: Iterable<Int> |
| 85 | + ): FrameColumn<T> = |
| 86 | + FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) |
| 87 | + |
| 88 | + public fun <T> createFrameColumn( |
| 89 | + name: String, |
| 90 | + groups: List<DataFrame<T>>, |
| 91 | + schema: Lazy<DataFrameSchema>? = null |
| 92 | + ): FrameColumn<T> = FrameColumnImpl(name, groups, schema) |
| 93 | + |
| 94 | + public fun <T> createWithTypeInference(name: String, values: List<T>, nullable: Boolean? = null): DataColumn<T> = guessColumnType(name, values, nullable = nullable) |
| 95 | + |
| 96 | + public fun <T> create(name: String, values: List<T>, type: KType, infer: Infer = Infer.None): DataColumn<T> { |
| 97 | + return when (type.toColumnKind()) { |
| 98 | + ColumnKind.Value -> createValueColumn(name, values, type, infer) |
| 99 | + ColumnKind.Group -> createColumnGroup(name, (values as List<AnyRow?>).concat()).asDataColumn().cast() |
| 100 | + ColumnKind.Frame -> createFrameColumn(name, values as List<AnyFrame>).asDataColumn().cast() |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> = create(name, values, typeOf<T>(), infer) |
| 105 | + |
| 106 | + public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>()) |
| 107 | + } |
| 108 | + |
| 109 | + public fun hasNulls(): Boolean = type().isMarkedNullable |
| 110 | + |
| 111 | + override fun distinct(): DataColumn<T> |
| 112 | + |
| 113 | + override fun get(indices: Iterable<Int>): DataColumn<T> |
| 114 | + |
| 115 | + override fun rename(newName: String): DataColumn<T> |
| 116 | + |
| 117 | + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath<T>? = this.addPath() |
| 118 | + |
| 119 | + override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn<T> = super.getValue(thisRef, property) as DataColumn<T> |
| 120 | + |
| 121 | + public operator fun iterator(): Iterator<T> = values().iterator() |
| 122 | + |
| 123 | + public override operator fun get(range: IntRange): DataColumn<T> |
| 124 | +} |
| 125 | + |
| 126 | +public val AnyCol.name: String get() = name() |
| 127 | +public val AnyCol.path: ColumnPath get() = path() |
| 128 | + |
| 129 | +public val <T> DataColumn<T>.values: Iterable<T> get() = values() |
| 130 | +public val AnyCol.hasNulls: Boolean get() = hasNulls() |
| 131 | +public val AnyCol.size: Int get() = size() |
| 132 | +public val AnyCol.indices: IntRange get() = indices() |
| 133 | + |
| 134 | +public val AnyCol.type: KType get() = type() |
| 135 | +public val AnyCol.kind: ColumnKind get() = kind() |
| 136 | +public val AnyCol.typeClass: KClass<*> get() = type.classifier as? KClass<*> ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") |
| 137 | + |
| 138 | +public fun AnyBaseCol.indices(): IntRange = 0 until size() |
0 commit comments