diff --git a/.editorconfig b/.editorconfig index be0f4587f3..a1d81313c6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -33,6 +33,7 @@ ktlint_standard_chain-method-continuation = disabled ktlint_ignore_back_ticked_identifier = true ktlint_standard_multiline-expression-wrapping = disabled ktlint_standard_when-entry-bracing = disabled +ktlint_standard_function-expression-body = disabled [{*/build/**/*,**/*keywords*/**,**/*.Generated.kt,**/*$Extensions.kt}] ktlint = disabled diff --git a/core/api/core.api b/core/api/core.api index afb2d7b7ef..aa45c54174 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -5877,8 +5877,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt { } public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt { - public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KType;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun getCanBeUnfolded (Lkotlin/reflect/KClass;)Z public static final fun getHasProperties (Lkotlin/reflect/KClass;)Z public static final fun isValueType (Lkotlin/reflect/KClass;)Z @@ -5889,7 +5889,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt { } public final class org/jetbrains/kotlinx/dataframe/impl/api/UnfoldKt { - public static final fun unfoldImpl (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun unfoldImpl (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; } public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index b532a29368..fdbbed1312 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dataframe.annotations.Interpretable import org.jetbrains.kotlinx.dataframe.annotations.Refine import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator -import org.jetbrains.kotlinx.dataframe.impl.api.canBeUnfolded import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columnName @@ -21,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API import kotlin.reflect.KCallable import kotlin.reflect.KClass import kotlin.reflect.KProperty +import kotlin.reflect.typeOf // region read DataFrame from objects @@ -28,21 +28,13 @@ import kotlin.reflect.KProperty @Interpretable("toDataFrameDefault") public inline fun Iterable.toDataFrame(): DataFrame = toDataFrame { - // check if type is value: primitives, primitive arrays, datetime types etc., - // or has no properties - if (!T::class.canBeUnfolded) { - // create a single `value` column - ValueProperty::value.name from { it } - } else { - // otherwise creates columns based on properties - properties() - } + properties() } @Refine @Interpretable("toDataFrameDsl") public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = - createDataFrameImpl(T::class, body) + createDataFrameImpl(typeOf(), body) @Refine @Interpretable("toDataFrame") diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt index e0b9f17520..49e984fd8e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt @@ -13,9 +13,10 @@ import org.jetbrains.kotlinx.dataframe.impl.api.unfoldImpl import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API import kotlin.reflect.KCallable import kotlin.reflect.KProperty +import kotlin.reflect.typeOf public inline fun DataColumn.unfold(vararg roots: KCallable<*>, maxDepth: Int = 0): AnyCol = - unfoldImpl { properties(roots = roots, maxDepth) } + unfoldImpl(typeOf()) { properties(roots = roots, maxDepth) } @Refine @Interpretable("DataFrameUnfold") @@ -23,7 +24,7 @@ public fun DataFrame.unfold( vararg roots: KCallable<*>, maxDepth: Int = 0, columns: ColumnsSelector, -): DataFrame = replace(columns).with { it.unfoldImpl { properties(roots = roots, maxDepth) } } +): DataFrame = replace(columns).with { it.unfoldImpl(it.type()) { properties(roots = roots, maxDepth) } } public fun DataFrame.unfold(vararg columns: String): DataFrame = unfold { columns.toColumnSet() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt index 948525a9ad..cf6670c4e3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt @@ -28,6 +28,7 @@ import java.time.temporal.TemporalAmount import kotlin.reflect.KCallable import kotlin.reflect.KClass import kotlin.reflect.KProperty +import kotlin.reflect.KType import kotlin.reflect.KVisibility import kotlin.reflect.full.isSubclassOf import kotlin.reflect.full.memberFunctions @@ -98,13 +99,11 @@ internal val KClass<*>.isValueType: Boolean */ @PublishedApi internal val KClass<*>.hasProperties: Boolean - get() = this.memberProperties.any { it.visibility == KVisibility.PUBLIC } || - // check pojo-like classes - this.memberFunctions.any { it.visibility == KVisibility.PUBLIC && it.isGetterLike() } + get() = properties().isNotEmpty() internal class CreateDataFrameDslImpl( override val source: Iterable, - private val clazz: KClass<*>, + private val type: KType, private val prefix: ColumnPath = emptyPath(), private val configuration: TraverseConfiguration = TraverseConfiguration(), ) : CreateDataFrameDsl(), @@ -119,7 +118,7 @@ internal class CreateDataFrameDslImpl( } override operator fun String.invoke(builder: CreateDataFrameDsl.() -> Unit) { - val child = CreateDataFrameDslImpl(source, clazz, prefix + this) + val child = CreateDataFrameDslImpl(source, type, prefix + this) builder(child) columns.addAll(child.columns) } @@ -182,11 +181,12 @@ internal class CreateDataFrameDslImpl( } val df = convertToDataFrame( data = source, - clazz = clazz, + type = type, roots = roots.toList(), excludes = dsl.excludeProperties, preserveClasses = dsl.preserveClasses, preserveProperties = dsl.preserveProperties, + excludeClasses = dsl.excludeClasses, maxDepth = maxDepth, ) df.columns().forEach { @@ -197,10 +197,10 @@ internal class CreateDataFrameDslImpl( @PublishedApi internal fun Iterable.createDataFrameImpl( - clazz: KClass<*>, + type: KType, body: CreateDataFrameDslImpl.() -> Unit, ): DataFrame { - val builder = CreateDataFrameDslImpl(this, clazz) + val builder = CreateDataFrameDslImpl(this, type) builder.body() return builder.columns.toDataFrameFromPairs() } @@ -208,22 +208,24 @@ internal fun Iterable.createDataFrameImpl( @PublishedApi internal fun convertToDataFrame( data: Iterable<*>, - clazz: KClass<*>, + type: KType, roots: List>, excludes: Set>, preserveClasses: Set>, preserveProperties: Set>, + excludeClasses: Set>, maxDepth: Int, ): AnyFrame { + val clazz = type.classifierOrAny() + // this check relies on later recursive calls having roots = emptyList() + if (roots.isEmpty() && !clazz.canBeUnfolded) { + val column = DataColumn.createByType("value", data.toList(), type) + return dataFrameOf(column) + } + val properties: List> = roots .ifEmpty { - clazz.memberProperties - .filter { it.visibility == KVisibility.PUBLIC } - } - // fall back to getter functions for pojo-like classes if no member properties were found - .ifEmpty { - clazz.memberFunctions - .filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() } + clazz.properties() } // sort properties by order in constructor .sortWithConstructor(clazz) @@ -231,10 +233,12 @@ internal fun convertToDataFrame( val columns = properties.mapNotNull { val property = it if (excludes.contains(property)) return@mapNotNull null + val klass = it.returnType.classifier as? KClass<*> + if (excludeClasses.contains(klass)) return@mapNotNull null class ValueClassConverter(val unbox: Method, val box: Method) - val valueClassConverter = (it.returnType.classifier as? KClass<*>)?.let { kClass -> + val valueClassConverter = klass?.let { kClass -> if (!kClass.isValue) return@let null val constructor = requireNotNull(kClass.primaryConstructor) { @@ -302,10 +306,11 @@ internal fun convertToDataFrame( val keepSubtree = maxDepth <= 0 && !fieldKind.shouldBeConvertedToFrameColumn && !fieldKind.shouldBeConvertedToColumnGroup val shouldCreateValueCol = keepSubtree || - kClass == Any::class || kClass in preserveClasses || property in preserveProperties || - kClass.isValueType + !kClass.canBeUnfolded && + !fieldKind.shouldBeConvertedToFrameColumn && + !fieldKind.shouldBeConvertedToColumnGroup val shouldCreateFrameCol = kClass == DataFrame::class && !nullable val shouldCreateColumnGroup = kClass == DataRow::class @@ -368,11 +373,12 @@ internal fun convertToDataFrame( require(it is Iterable<*>) convertToDataFrame( data = it, - clazz = elementClass, + type = elementType, roots = emptyList(), excludes = excludes, preserveClasses = preserveClasses, preserveProperties = preserveProperties, + excludeClasses = excludeClasses, maxDepth = maxDepth - 1, ) } @@ -386,11 +392,12 @@ internal fun convertToDataFrame( else -> { val df = convertToDataFrame( data = values, - clazz = kClass, + type = returnType, roots = emptyList(), excludes = excludes, preserveClasses = preserveClasses, preserveProperties = preserveProperties, + excludeClasses = excludeClasses, maxDepth = maxDepth - 1, ) DataColumn.createColumnGroup(name = it.columnName, df = df) @@ -403,3 +410,15 @@ internal fun convertToDataFrame( dataFrameOf(columns) } } + +private fun KType.classifierOrAny(): KClass<*> = classifier as? KClass<*> ?: Any::class + +private fun KClass<*>.properties(): List> { + return memberProperties + .filter { it.visibility == KVisibility.PUBLIC } + // fall back to getter functions for pojo-like classes if no member properties were found + .ifEmpty { + memberFunctions + .filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() } + } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/unfold.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/unfold.kt index 24503f7e6c..701c7db304 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/unfold.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/unfold.kt @@ -7,9 +7,10 @@ import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asDataColumn import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.typeClass +import kotlin.reflect.KType @PublishedApi -internal fun DataColumn.unfoldImpl(body: CreateDataFrameDsl.() -> Unit): AnyCol = +internal fun DataColumn.unfoldImpl(type: KType, body: CreateDataFrameDsl.() -> Unit): AnyCol = when (kind()) { ColumnKind.Group, ColumnKind.Frame -> this @@ -17,7 +18,7 @@ internal fun DataColumn.unfoldImpl(body: CreateDataFrameDsl.() -> Unit !typeClass.canBeUnfolded -> this else -> values() - .createDataFrameImpl(typeClass) { (this as CreateDataFrameDsl).body() } + .createDataFrameImpl(type) { (this as CreateDataFrameDsl).body() } .asColumnGroup(name()) .asDataColumn() } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index c0e4d055bf..a5abd4a098 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -156,8 +156,11 @@ class CreateDataFrameTests { df.a[0].v shouldBe 7 val df2 = data.toDataFrame { - preserve(B::row) - properties { preserve(DataFrame::class) } + preserve(B::row) // this@toDataFrame: TraversePropertiesDsl - works + properties { + preserve(DataFrame::class) // this@properties: TraversePropertiesDsl - always works + } + preserve(B::row) // this@toDataFrame: TraversePropertiesDsl - doesn't } df2.frame.kind shouldBe ColumnKind.Value df2.frame.type shouldBe typeOf>() @@ -186,6 +189,24 @@ class CreateDataFrameTests { res.schema() shouldBe data.toDataFrame(maxDepth = 0).schema() } + class NestedExcludeClasses(val s: String, val list1: List) + + class ExcludeClasses(val i: Int, val list: List, val nested: NestedExcludeClasses) + + @Test + fun `exclude classes`() { + val list = listOf( + ExcludeClasses(1, listOf(1, 2, 3), NestedExcludeClasses("str", listOf("foo", "bar"))), + ) + val df = list.toDataFrame { + properties(maxDepth = 2) { + exclude(List::class) + } + } + + df shouldBe list.toDataFrame(maxDepth = 2).remove { "list" and "nested"["list1"] } + } + enum class DummyEnum { A } @Test @@ -213,8 +234,7 @@ class CreateDataFrameTests { df.rowsCount() shouldBe 1 val childCol = df[Entry::child] - childCol.kind() shouldBe ColumnKind.Group - childCol.asColumnGroup().columnsCount() shouldBe 0 + childCol.kind() shouldBe ColumnKind.Value } @Test @@ -632,4 +652,43 @@ class CreateDataFrameTests { val df = files.toDataFrame(columnName = "files") df["files"][0] shouldBe File("data.csv") } + + class MyEmptyDeclaration + + class TestItem(val name: String, val containingDeclaration: MyEmptyDeclaration, val test: Int) + + @Test + fun `preserve empty interface consistency`() { + val df = listOf(MyEmptyDeclaration(), MyEmptyDeclaration()).toDataFrame() + df["value"].type() shouldBe typeOf() + } + + @Test + fun `preserve nested empty interface consistency`() { + val df = List(10) { + TestItem( + "Test1", + MyEmptyDeclaration(), + 123, + ) + }.toDataFrame(maxDepth = 2) + + df["containingDeclaration"].type() shouldBe typeOf() + } + + @Test + fun `preserve value type consistency`() { + val list = listOf(mapOf("a" to 1)) + val df = list.toDataFrame(maxDepth = 1) + df["value"].type() shouldBe typeOf>() + } + + class MapContainer(val map: Map) + + @Test + fun `preserve nested value type consistency`() { + val list = listOf(MapContainer(mapOf("a" to 1))) + val df = list.toDataFrame(maxDepth = 2) + df["map"].type() shouldBe typeOf>() + } }