Skip to content

Commit dfaf46e

Browse files
committed
refactored suggestedType+guessTypeWithSuggestedAsUpperbound into TypeSuggestion, working on feedback
1 parent 42e2d67 commit dfaf46e

File tree

11 files changed

+119
-73
lines changed

11 files changed

+119
-73
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
1515
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
1616
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
1717
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
18+
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
1819
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
1920
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
2021
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
@@ -73,11 +74,10 @@ public interface DataColumn<out T> : BaseColumn<T> {
7374
/**
7475
* Creates [ValueColumn] using given [name], [values] and reified column [type].
7576
*
76-
* Be careful; values are NOT checked to adhere to [type] for efficiency,
77+
* The column [type] will be defined at compile-time using [T] argument.
78+
* Be careful with casting; values are NOT checked to adhere to `reified` type [T] for efficiency,
7779
* unless you specify [infer].
7880
*
79-
* Note, that column [type] will be defined at compile-time using [T] argument
80-
*
8181
* @param T type of the column
8282
* @param name name of the column
8383
* @param values list of column values
@@ -114,7 +114,8 @@ public interface DataColumn<out T> : BaseColumn<T> {
114114
/**
115115
* Creates [FrameColumn] using the given [name] and list of dataframes [groups].
116116
*
117-
* Be careful; [groups] must be a non-null list of [DataFrames][DataFrame].
117+
* [groups] must be a non-null list of [DataFrames][DataFrame], as [FrameColumn] does
118+
* not allow `null` values.
118119
* This is NOT checked at runtime for efficiency, nor is the validity of given [schema].
119120
*
120121
* @param name name of the frame column
@@ -141,21 +142,21 @@ public interface DataColumn<out T> : BaseColumn<T> {
141142
*
142143
* @param name name of the column
143144
* @param values the values to represent each row in the column
145+
* @param suggestedType optional suggested type for values. Default is [TypeSuggestion.Infer].
146+
* See [TypeSuggestion] for more information.
144147
* @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred.
145-
* @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns,
146-
* a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`.
147148
*/
148149
public fun <T> createWithTypeInference(
149150
name: String,
150151
values: List<T>,
152+
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
151153
nullable: Boolean? = null,
152-
allColsMakesColGroup: Boolean = false,
153154
): DataColumn<T> =
154155
createColumnGuessingType(
155156
name = name,
156157
values = values,
158+
suggestedType = suggestedType,
157159
nullable = nullable,
158-
allColsMakesColGroup = allColsMakesColGroup,
159160
)
160161

161162
/**

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
1515
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
1616
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1717
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
18+
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
1819
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
1920
import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
2021
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
@@ -225,8 +226,7 @@ public class ColumnDelegate<T>(private val parent: ColumnGroupReference? = null)
225226
public inline fun <reified T> columnOf(vararg values: T): DataColumn<T> =
226227
createColumnGuessingType(
227228
values = values.asIterable(),
228-
suggestedType = typeOf<T>(),
229-
guessTypeWithSuggestedAsUpperbound = true,
229+
suggestedType = TypeSuggestion.InferWithUpperbound(typeOf<T>()),
230230
listifyValues = false,
231231
allColsMakesColGroup = true,
232232
).forceResolve()
@@ -252,8 +252,7 @@ public fun <T> columnOf(frames: Iterable<DataFrame<T>>): FrameColumn<T> =
252252
public inline fun <reified T> column(values: Iterable<T>): DataColumn<T> =
253253
createColumnGuessingType(
254254
values = values,
255-
suggestedType = typeOf<T>(),
256-
guessTypeWithSuggestedAsUpperbound = false,
255+
suggestedType = TypeSuggestion.Use(typeOf<T>()),
257256
allColsMakesColGroup = true,
258257
).forceResolve()
259258

@@ -305,8 +304,7 @@ public inline fun <T, reified C> dataFrameOf(header: Iterable<T>, fill: (T) -> I
305304
createColumnGuessingType(
306305
name = value.toString(),
307306
values = fill(value).asList(),
308-
suggestedType = typeOf<C>(),
309-
guessTypeWithSuggestedAsUpperbound = true,
307+
suggestedType = TypeSuggestion.InferWithUpperbound(typeOf<C>()),
310308
)
311309
}.toDataFrame()
312310

@@ -346,8 +344,7 @@ public class DataFrameBuilder(private val header: List<String>) {
346344
createColumnGuessingType(
347345
name = name,
348346
values = valuesBuilder(name).asList(),
349-
suggestedType = typeOf<T>(),
350-
guessTypeWithSuggestedAsUpperbound = true,
347+
suggestedType = TypeSuggestion.InferWithUpperbound(typeOf<T>()),
351348
)
352349
}
353350

@@ -387,12 +384,12 @@ public class DataFrameBuilder(private val header: List<String>) {
387384
)
388385
}
389386

390-
private inline fun <reified C> fillNotNull(nrow: Int, crossinline init: (Int) -> C & Any) =
387+
private inline fun <reified C : Any> fillNotNull(nrow: Int, crossinline init: (Int) -> C) =
391388
withColumns { name ->
392389
DataColumn.createValueColumn(
393390
name = name,
394391
values = List(nrow, init),
395-
type = typeOf<C>().withNullability(false),
392+
type = typeOf<C>(),
396393
)
397394
}
398395

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,18 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
55
import org.jetbrains.kotlinx.dataframe.DataColumn
66
import org.jetbrains.kotlinx.dataframe.DataFrame
77
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
8+
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
89
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
910
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
1011
import org.jetbrains.kotlinx.dataframe.type
1112
import kotlin.reflect.KProperty
1213

13-
public fun AnyCol.inferType(): DataColumn<*> = createColumnGuessingType(name, toList(), type, true)
14+
public fun AnyCol.inferType(): DataColumn<*> =
15+
createColumnGuessingType(
16+
name = name,
17+
values = toList(),
18+
suggestedType = TypeSuggestion.InferWithUpperbound(type),
19+
)
1420

1521
// region DataFrame
1622

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt renamed to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,14 +233,14 @@ public inline fun <reified T> Iterable<T>.toValueColumn(column: KProperty<T>): V
233233
public enum class Infer {
234234

235235
/**
236-
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type].
236+
* Use `reified` type argument of an inline [DataFrame] operation as [DataColumn.type].
237237
*
238238
* This is the most efficient but least safe option.
239239
*/
240240
None,
241241

242242
/**
243-
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type],
243+
* Use `reified` type argument of an inline [DataFrame] operation as [DataColumn.type],
244244
* but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of `null` values.
245245
*/
246246
Nulls,
@@ -250,6 +250,10 @@ public enum class Infer {
250250
* base type as an upper bound.
251251
*
252252
* This is the least efficient but safest option.
253+
*
254+
* It's useful, for instance,
255+
* if you have a column of type `Any?` and want its schema type to be inferred based on the actual values.
256+
* In many cases, letting the library infer by `reified` types is enough and more efficient.
253257
*/
254258
Type,
255259

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package org.jetbrains.kotlinx.dataframe.columns
2+
3+
import kotlin.reflect.KType
4+
5+
/**
6+
* The suggestion of how to find a column type.
7+
*
8+
* The suggestion can either be:
9+
*
10+
* - [Infer] - {@include [Infer]}
11+
* - [InferWithUpperbound] - {@include [InferWithUpperbound]}
12+
* - [Use] - {@include [Use]}
13+
*
14+
* It can be either an [exact type][Use] or an [upper bound][InferWithUpperbound] of possible types
15+
* after which the library will infer the exact type.
16+
*/
17+
public sealed interface TypeSuggestion {
18+
19+
public companion object {
20+
21+
/** Creates a new [TypeSuggestion] instance based on the given parameters. */
22+
public fun create(suggestedType: KType?, guessType: Boolean): TypeSuggestion =
23+
when {
24+
suggestedType != null && guessType -> InferWithUpperbound(suggestedType)
25+
suggestedType != null && !guessType -> Use(suggestedType)
26+
suggestedType == null && guessType -> Infer
27+
else -> error("Cannot create TypeSuggestion with no suggested type and no guessing allowed.")
28+
}
29+
}
30+
31+
/** The library will try to infer the type by checking all the values. */
32+
public data object Infer : TypeSuggestion
33+
34+
/** The library will infer the type by checking all the values taking a given upper bound into account. */
35+
public data class InferWithUpperbound(val upperbound: KType) : TypeSuggestion
36+
37+
/** The library will use the specified type without inference. */
38+
public data class Use(val type: KType) : TypeSuggestion
39+
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue
77
import org.jetbrains.kotlinx.dataframe.api.filter
88
import org.jetbrains.kotlinx.dataframe.api.isComparable
99
import org.jetbrains.kotlinx.dataframe.api.isNumber
10+
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
1011
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
1112

1213
internal inline fun <T> Aggregatable<T>.remainingColumns(
@@ -22,7 +23,6 @@ internal fun NamedValue.toColumnWithPath() =
2223
path to createColumnGuessingType(
2324
name = path.last(),
2425
values = listOf(value),
25-
suggestedType = type,
26-
guessTypeWithSuggestedAsUpperbound = guessType,
26+
suggestedType = TypeSuggestion.create(type, guessType),
2727
defaultValue = default,
2828
)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.api.cast
88
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
99
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
1010
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
11+
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
1112
import org.jetbrains.kotlinx.dataframe.hasNulls
1213
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
1314
import org.jetbrains.kotlinx.dataframe.impl.commonType
@@ -76,8 +77,7 @@ internal fun <T> concatImpl(name: String, columns: List<DataColumn<T>?>, columnS
7677
return createColumnGuessingType(
7778
name = name,
7879
values = list,
79-
suggestedType = tartypeOf,
80-
guessTypeWithSuggestedAsUpperbound = guessType,
80+
suggestedType = TypeSuggestion.create(tartypeOf, guessType),
8181
defaultValue = defaultValue,
8282
).cast()
8383
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
2727
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
2828
import org.jetbrains.kotlinx.dataframe.api.toColumn
2929
import org.jetbrains.kotlinx.dataframe.api.tryParse
30+
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
3031
import org.jetbrains.kotlinx.dataframe.columns.size
3132
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
3233
import org.jetbrains.kotlinx.dataframe.hasNulls
3334
import org.jetbrains.kotlinx.dataframe.impl.canParse
3435
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
35-
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
3636
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
3737
import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse
3838
import org.jetbrains.kotlinx.dataframe.io.isURL
@@ -529,7 +529,11 @@ internal fun DataColumn<String?>.tryParseImpl(options: ParserOptions?): DataColu
529529

530530
// Create a new column with the parsed values,
531531
// createColumnGuessingType is used to handle unifying values if needed
532-
return createColumnGuessingType(name(), parsedValues, type)
532+
return DataColumn.createWithTypeInference(
533+
name = name(),
534+
values = parsedValues,
535+
suggestedType = TypeSuggestion.Use(type),
536+
)
533537
}
534538

535539
internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: ParserOptions?): DataColumn<T?> {
@@ -539,7 +543,11 @@ internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: Par
539543
handler(it.trim()) ?: throw IllegalStateException("Couldn't parse '$it' into type ${parser.type}")
540544
}
541545
}
542-
return createColumnGuessingType(name(), parsedValues, parser.type.withNullability(hasNulls))
546+
return DataColumn.createWithTypeInference(
547+
name = name(),
548+
values = parsedValues,
549+
suggestedType = TypeSuggestion.Use(parser.type.withNullability(hasNulls)),
550+
)
543551
}
544552

545553
internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ internal fun convertToDataFrame(
263263
val shouldCreateColumnGroup = kClass == DataRow::class
264264

265265
when {
266-
hasExceptions -> DataColumn.createWithTypeInference(it.columnName, values, nullable)
266+
hasExceptions -> DataColumn.createWithTypeInference(it.columnName, values, nullable = nullable)
267267

268268
shouldCreateValueCol ->
269269
DataColumn.createValueColumn(

0 commit comments

Comments
 (0)