Skip to content
59 changes: 42 additions & 17 deletions core/api/core.api

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.api.JsonPath
import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
import org.jetbrains.kotlinx.dataframe.io.JSON

/**
Expand Down Expand Up @@ -80,4 +81,6 @@ public annotation class JsonOptions(
* `["""\$["store"]["book"][*]["author"]"""]`
*/
public val keyValuePaths: Array<String> = [],
/** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */
public val unifyNumbers: Boolean = true,
)
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,30 @@ internal fun <T> getValuesType(values: List<T>, type: KType, infer: Infer): KTyp
@Deprecated(GUESS_VALUE_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, listifyValues: Boolean = false): KType =
guessValueType(values = values, upperBound = upperBound, listifyValues = listifyValues, allColsMakesRow = false)
guessValueType(
values = values,
upperBound = upperBound,
listifyValues = listifyValues,
allColsMakesRow = false,
unifyNumbers = false,
)

/** Just for binary compatibility, as it's @PublishedApi. */
@Deprecated(GUESS_VALUE_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun guessValueType(
values: Sequence<Any?>,
upperBound: KType? = null,
listifyValues: Boolean = false,
allColsMakesRow: Boolean = false,
): KType =
guessValueType(
values = values,
upperBound = upperBound,
listifyValues = listifyValues,
allColsMakesRow = allColsMakesRow,
unifyNumbers = false,
)

/**
* Returns the guessed value type of the given [values] sequence.
Expand All @@ -381,13 +404,18 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
* @param allColsMakesRow if true, then, if all values are non-null columns, we assume
* that a column group should be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`,
* so the function will return [DataRow].
* @param unifyNumbers if true, then all number types encountered will be unified to the smallest possible
* number-type that can hold all number values lossless in [values]. See [commonNumberClass].
* Unsigned numbers are not supported.
* If false, the result of encountering multiple number types would be [Number].
*/
@PublishedApi
internal fun guessValueType(
values: Sequence<Any?>,
upperBound: KType? = null,
listifyValues: Boolean = false,
allColsMakesRow: Boolean = false,
unifyNumbers: Boolean = false,
): KType {
val classes = mutableSetOf<KClass<*>>()
val collectionClasses = mutableSetOf<KClass<out Collection<*>>>()
Expand Down Expand Up @@ -443,6 +471,18 @@ internal fun guessValueType(
classesInCollection.all { it.isSubclassOf(DataRow::class) } &&
!nullsInCollection

if (unifyNumbers) {
val nothingClass = Nothing::class
val usedNumberClasses = classes.filter {
it.isSubclassOf(Number::class) && it != nothingClass
}
if (usedNumberClasses.isNotEmpty()) {
val unifiedNumberClass = usedNumberClasses.unifiedNumberClass() as KClass<Number>
classes -= usedNumberClasses
classes += unifiedNumberClass
}
}

return when {
classes.isNotEmpty() -> {
if (hasRows) classes.add(DataRow::class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,23 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.columns.toColumnsSetOf
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver
import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl
import org.jetbrains.kotlinx.dataframe.impl.api.createConverter
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.guessValueType
import org.jetbrains.kotlinx.dataframe.impl.isNothing
import org.jetbrains.kotlinx.dataframe.impl.replaceGenericTypeParametersWithUpperbound
import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.util.CREATE_COLUMN
import org.jetbrains.kotlinx.dataframe.util.GUESS_COLUMN_TYPE
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf

// region create DataColumn

Expand Down Expand Up @@ -193,6 +198,9 @@ internal fun Array<out String>.toNumberColumns() = toColumnsSetOf<Number>()
* Note: this parameter is ignored if another [Collection] is present in the values.
* @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns,
* a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`.
* @param unifyNumbers if `true`, then all numbers encountered in [values] will be converted to the smallest possible
* number-type that can hold all the values lossless. Unsigned numbers are not supported. See [UnifyingNumbers].
* For example, if the values are `[1, 2f, 3.0]`, then all values will be converted to [Double].
*/
@PublishedApi
internal fun <T> createColumnGuessingType(
Expand All @@ -202,6 +210,7 @@ internal fun <T> createColumnGuessingType(
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
unifyNumbers: Boolean = false,
): DataColumn<T> =
createColumnGuessingType(
name = "",
Expand All @@ -211,6 +220,7 @@ internal fun <T> createColumnGuessingType(
nullable = nullable,
listifyValues = listifyValues,
allColsMakesColGroup = allColsMakesColGroup,
unifyNumbers = unifyNumbers,
)

/**
Expand All @@ -226,6 +236,7 @@ internal fun <T> createColumnGuessingType(
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
unifyNumbers: Boolean = false,
): DataColumn<T> {
val type = when (suggestedType) {
is TypeSuggestion.Infer, is TypeSuggestion.InferWithUpperbound ->
Expand All @@ -234,11 +245,23 @@ internal fun <T> createColumnGuessingType(
upperBound = (suggestedType as? TypeSuggestion.InferWithUpperbound)?.upperbound,
listifyValues = listifyValues,
allColsMakesRow = allColsMakesColGroup,
unifyNumbers = unifyNumbers,
)

is TypeSuggestion.Use -> suggestedType.type
}

// only needs to be used when unifyNumbers == true
@Suppress("UNCHECKED_CAST")
fun getSafeNumberConverter(targetType: KType): (Any?) -> Any? {
val converter = createConverter(
from = typeOf<Number>(),
to = targetType,
) as (Number) -> Number?

return { value -> if (value != null && value is Number) converter(value) else value }
}

return when (type.classifier!! as KClass<*>) {
// guessValueType can only return DataRow if all values are `AnyRow?`
// or allColsMakesColGroup == true, and all values are `AnyCol`
Expand Down Expand Up @@ -269,18 +292,29 @@ internal fun <T> createColumnGuessingType(
List::class -> {
val nullable = type.isMarkedNullable
var isListOfRows: Boolean? = null
val lists = values.map {
when (it) {
val subType = type.arguments.first().type!! // List<T> -> T

val needsNumberConversion = unifyNumbers &&
subType.isSubtypeOf(typeOf<Number?>()) &&
!subType.isNothing
val numberConverter: (Any?) -> Any? by lazy { getSafeNumberConverter(subType) }

val lists = values.map { value ->
when (value) {
null -> if (nullable) null else emptyList()

is List<*> -> {
if (isListOfRows != false && it.isNotEmpty()) isListOfRows = it.all { it is AnyRow }
it
if (isListOfRows != false && value.isNotEmpty()) isListOfRows = value.all { it is AnyRow }

if (needsNumberConversion) value.map(numberConverter) else value
}

else -> { // if !detectType and suggestedType is a list, we wrap the values in lists
if (isListOfRows != false) isListOfRows = it is AnyRow
listOf(it)
if (isListOfRows != false) isListOfRows = value is AnyRow

listOf(
if (needsNumberConversion) numberConverter(value) else value,
)
}
}
}
Expand All @@ -303,10 +337,15 @@ internal fun <T> createColumnGuessingType(
}
}

else ->
else -> {
val needsNumberConversion = unifyNumbers &&
type.isSubtypeOf(typeOf<Number?>()) &&
!type.isNothing
val numberConverter by lazy { getSafeNumberConverter(type) }

DataColumn.createValueColumn(
name = name,
values = values.asList(),
values = if (needsNumberConversion) values.map(numberConverter) as List<T> else values.asList(),
type = if (nullable != null) type.withNullability(nullable) else type,
infer = when {
// even though an exact type is suggested,
Expand All @@ -318,6 +357,7 @@ internal fun <T> createColumnGuessingType(
},
defaultValue = defaultValue,
)
}
}
}

Expand All @@ -332,6 +372,7 @@ internal fun <T> createColumn(values: Iterable<T>, suggestedType: KType, guessTy
values = values,
suggestedType = TypeSuggestion.create(suggestedType, guessType),
allColsMakesColGroup = true,
unifyNumbers = false,
)

/** Just for binary compatibility, since it's @PublishedApi. */
Expand All @@ -355,4 +396,48 @@ internal fun <T> guessColumnType(
allColsMakesColGroup = false,
)

/** Just for binary compatibility, since it's @PublishedApi. */
@Deprecated(GUESS_COLUMN_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun <T> createColumnGuessingType(
values: Iterable<T>,
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
defaultValue: T? = null,
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
): DataColumn<T> =
createColumnGuessingType(
values = values,
suggestedType = suggestedType,
defaultValue = defaultValue,
nullable = nullable,
listifyValues = listifyValues,
allColsMakesColGroup = allColsMakesColGroup,
unifyNumbers = false,
)

/** Just for binary compatibility, since it's @PublishedApi. */
@Deprecated(GUESS_COLUMN_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun <T> createColumnGuessingType(
name: String,
values: Iterable<T>,
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
defaultValue: T? = null,
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
): DataColumn<T> =
createColumnGuessingType(
name = name,
values = values,
suggestedType = suggestedType,
defaultValue = defaultValue,
nullable = nullable,
listifyValues = listifyValues,
allColsMakesColGroup = allColsMakesColGroup,
unifyNumbers = false,
)

// endregion
Loading