Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ ktlint_standard_chain-method-continuation = disabled
ktlint_ignore_back_ticked_identifier = true
ktlint_standard_multiline-expression-wrapping = disabled
ktlint_standard_when-entry-bracing = disabled
ktlint_standard_function-expression-body = disabled
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turning this off project-wide disables the quick-fix too. I use this quite often :( I'd rather recommend suppressing it on a class/file/function basis if you really need it for clarity. The KtLint plugin has a quick-fix option for suppressing.


[{*/build/**/*,**/*keywords*/**,**/*.Generated.kt,**/*$Extensions.kt}]
ktlint = disabled
6 changes: 3 additions & 3 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -5877,8 +5877,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KType;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun getCanBeUnfolded (Lkotlin/reflect/KClass;)Z
public static final fun getHasProperties (Lkotlin/reflect/KClass;)Z
public static final fun isValueType (Lkotlin/reflect/KClass;)Z
Expand All @@ -5889,7 +5889,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt {
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/UnfoldKt {
public static final fun unfoldImpl (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun unfoldImpl (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.api.canBeUnfolded
import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
Expand All @@ -21,28 +20,21 @@ import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KCallable
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
import kotlin.reflect.typeOf

// region read DataFrame from objects

@Refine
@Interpretable("toDataFrameDefault")
public inline fun <reified T> Iterable<T>.toDataFrame(): DataFrame<T> =
toDataFrame {
// check if type is value: primitives, primitive arrays, datetime types etc.,
// or has no properties
if (!T::class.canBeUnfolded) {
// create a single `value` column
ValueProperty<T>::value.name from { it }
} else {
// otherwise creates columns based on properties
properties()
}
properties()
}

@Refine
@Interpretable("toDataFrameDsl")
public inline fun <reified T> Iterable<T>.toDataFrame(noinline body: CreateDataFrameDsl<T>.() -> Unit): DataFrame<T> =
createDataFrameImpl(T::class, body)
createDataFrameImpl(typeOf<T>(), body)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice! We should really do a sweep of ::class usage in DF. typeOf<T>() is cheaper I believe


@Refine
@Interpretable("toDataFrame")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,18 @@ import org.jetbrains.kotlinx.dataframe.impl.api.unfoldImpl
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KCallable
import kotlin.reflect.KProperty
import kotlin.reflect.typeOf

public inline fun <reified T> DataColumn<T>.unfold(vararg roots: KCallable<*>, maxDepth: Int = 0): AnyCol =
unfoldImpl { properties(roots = roots, maxDepth) }
unfoldImpl(typeOf<T>()) { properties(roots = roots, maxDepth) }

@Refine
@Interpretable("DataFrameUnfold")
public fun <T> DataFrame<T>.unfold(
vararg roots: KCallable<*>,
maxDepth: Int = 0,
columns: ColumnsSelector<T, *>,
): DataFrame<T> = replace(columns).with { it.unfoldImpl { properties(roots = roots, maxDepth) } }
): DataFrame<T> = replace(columns).with { it.unfoldImpl(it.type()) { properties(roots = roots, maxDepth) } }

public fun <T> DataFrame<T>.unfold(vararg columns: String): DataFrame<T> = unfold { columns.toColumnSet() }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import java.time.temporal.TemporalAmount
import kotlin.reflect.KCallable
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.KVisibility
import kotlin.reflect.full.isSubclassOf
import kotlin.reflect.full.memberFunctions
Expand Down Expand Up @@ -98,13 +99,11 @@ internal val KClass<*>.isValueType: Boolean
*/
@PublishedApi
internal val KClass<*>.hasProperties: Boolean
get() = this.memberProperties.any { it.visibility == KVisibility.PUBLIC } ||
// check pojo-like classes
this.memberFunctions.any { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }
get() = properties().isNotEmpty()

internal class CreateDataFrameDslImpl<T>(
override val source: Iterable<T>,
private val clazz: KClass<*>,
private val type: KType,
private val prefix: ColumnPath = emptyPath(),
private val configuration: TraverseConfiguration = TraverseConfiguration(),
) : CreateDataFrameDsl<T>(),
Expand All @@ -119,7 +118,7 @@ internal class CreateDataFrameDslImpl<T>(
}

override operator fun String.invoke(builder: CreateDataFrameDsl<T>.() -> Unit) {
val child = CreateDataFrameDslImpl(source, clazz, prefix + this)
val child = CreateDataFrameDslImpl(source, type, prefix + this)
builder(child)
columns.addAll(child.columns)
}
Expand Down Expand Up @@ -182,11 +181,12 @@ internal class CreateDataFrameDslImpl<T>(
}
val df = convertToDataFrame(
data = source,
clazz = clazz,
type = type,
roots = roots.toList(),
excludes = dsl.excludeProperties,
preserveClasses = dsl.preserveClasses,
preserveProperties = dsl.preserveProperties,
excludeClasses = dsl.excludeClasses,
maxDepth = maxDepth,
)
df.columns().forEach {
Expand All @@ -197,44 +197,48 @@ internal class CreateDataFrameDslImpl<T>(

@PublishedApi
internal fun <T> Iterable<T>.createDataFrameImpl(
clazz: KClass<*>,
type: KType,
body: CreateDataFrameDslImpl<T>.() -> Unit,
): DataFrame<T> {
val builder = CreateDataFrameDslImpl(this, clazz)
val builder = CreateDataFrameDslImpl(this, type)
builder.body()
return builder.columns.toDataFrameFromPairs()
}

@PublishedApi
internal fun convertToDataFrame(
data: Iterable<*>,
clazz: KClass<*>,
type: KType,
roots: List<KCallable<*>>,
excludes: Set<KCallable<*>>,
preserveClasses: Set<KClass<*>>,
preserveProperties: Set<KCallable<*>>,
excludeClasses: Set<KClass<*>>,
maxDepth: Int,
): AnyFrame {
val clazz = type.classifierOrAny()
// this check relies on later recursive calls having roots = emptyList()
if (roots.isEmpty() && !clazz.canBeUnfolded) {
val column = DataColumn.createByType("value", data.toList(), type)
return dataFrameOf(column)
}

val properties: List<KCallable<*>> = roots
.ifEmpty {
clazz.memberProperties
.filter { it.visibility == KVisibility.PUBLIC }
}
// fall back to getter functions for pojo-like classes if no member properties were found
.ifEmpty {
clazz.memberFunctions
.filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }
clazz.properties()
}
// sort properties by order in constructor
.sortWithConstructor(clazz)

val columns = properties.mapNotNull {
val property = it
if (excludes.contains(property)) return@mapNotNull null
val klass = it.returnType.classifier as? KClass<*>
if (excludeClasses.contains(klass)) return@mapNotNull null

class ValueClassConverter(val unbox: Method, val box: Method)

val valueClassConverter = (it.returnType.classifier as? KClass<*>)?.let { kClass ->
val valueClassConverter = klass?.let { kClass ->
if (!kClass.isValue) return@let null

val constructor = requireNotNull(kClass.primaryConstructor) {
Expand Down Expand Up @@ -302,10 +306,11 @@ internal fun convertToDataFrame(
val keepSubtree =
maxDepth <= 0 && !fieldKind.shouldBeConvertedToFrameColumn && !fieldKind.shouldBeConvertedToColumnGroup
val shouldCreateValueCol = keepSubtree ||
kClass == Any::class ||
kClass in preserveClasses ||
property in preserveProperties ||
kClass.isValueType
!kClass.canBeUnfolded &&
!fieldKind.shouldBeConvertedToFrameColumn &&
!fieldKind.shouldBeConvertedToColumnGroup

val shouldCreateFrameCol = kClass == DataFrame::class && !nullable
val shouldCreateColumnGroup = kClass == DataRow::class
Expand Down Expand Up @@ -368,11 +373,12 @@ internal fun convertToDataFrame(
require(it is Iterable<*>)
convertToDataFrame(
data = it,
clazz = elementClass,
type = elementType,
roots = emptyList(),
excludes = excludes,
preserveClasses = preserveClasses,
preserveProperties = preserveProperties,
excludeClasses = excludeClasses,
maxDepth = maxDepth - 1,
)
}
Expand All @@ -386,11 +392,12 @@ internal fun convertToDataFrame(
else -> {
val df = convertToDataFrame(
data = values,
clazz = kClass,
type = returnType,
roots = emptyList(),
excludes = excludes,
preserveClasses = preserveClasses,
preserveProperties = preserveProperties,
excludeClasses = excludeClasses,
maxDepth = maxDepth - 1,
)
DataColumn.createColumnGroup(name = it.columnName, df = df)
Expand All @@ -403,3 +410,15 @@ internal fun convertToDataFrame(
dataFrameOf(columns)
}
}

private fun KType.classifierOrAny(): KClass<*> = classifier as? KClass<*> ?: Any::class

private fun KClass<*>.properties(): List<KCallable<*>> {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd either rewrite it declaratively/functionally, like:

private fun KClass<*>.properties(): List<KCallable<*>> =
    memberProperties
        .filter { it.visibility == KVisibility.PUBLIC }
        // fall back to getter functions for pojo-like classes if no member properties were found
        .ifEmpty {
            memberFunctions
                .filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }
        }

or imperatively:

private fun KClass<*>.properties(): List<KCallable<*>> {
    val publicMembers = memberProperties.filter { it.visibility == KVisibility.PUBLIC }
    return if (publicMembers.isEmpty()) {
        // fall back to getter functions for pojo-like classes if no member properties were found
        memberFunctions.filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }
    } else {
        publicMembers
    }
}

(or a variant of either). In either case the linter won't complain while sticking to one code paradigm.

return memberProperties
.filter { it.visibility == KVisibility.PUBLIC }
// fall back to getter functions for pojo-like classes if no member properties were found
.ifEmpty {
memberFunctions
.filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@ import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.typeClass
import kotlin.reflect.KType

@PublishedApi
internal fun <T> DataColumn<T>.unfoldImpl(body: CreateDataFrameDsl<T>.() -> Unit): AnyCol =
internal fun <T> DataColumn<T>.unfoldImpl(type: KType, body: CreateDataFrameDsl<T>.() -> Unit): AnyCol =
when (kind()) {
ColumnKind.Group, ColumnKind.Frame -> this

else -> when {
!typeClass.canBeUnfolded -> this

else -> values()
.createDataFrameImpl(typeClass) { (this as CreateDataFrameDsl<T>).body() }
.createDataFrameImpl(type) { (this as CreateDataFrameDsl<T>).body() }
.asColumnGroup(name())
.asDataColumn()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,11 @@ class CreateDataFrameTests {
df.a[0].v shouldBe 7

val df2 = data.toDataFrame {
preserve(B::row)
properties { preserve(DataFrame::class) }
preserve(B::row) // this@toDataFrame: TraversePropertiesDsl - works
properties {
preserve(DataFrame::class) // this@properties: TraversePropertiesDsl - always works
}
preserve(B::row) // this@toDataFrame: TraversePropertiesDsl - doesn't
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we deprecate this function here? since it doesn't always work

}
df2.frame.kind shouldBe ColumnKind.Value
df2.frame.type shouldBe typeOf<DataFrame<A>>()
Expand Down Expand Up @@ -186,6 +189,24 @@ class CreateDataFrameTests {
res.schema() shouldBe data.toDataFrame(maxDepth = 0).schema()
}

class NestedExcludeClasses(val s: String, val list1: List<String>)

class ExcludeClasses(val i: Int, val list: List<Int>, val nested: NestedExcludeClasses)

@Test
fun `exclude classes`() {
val list = listOf(
ExcludeClasses(1, listOf(1, 2, 3), NestedExcludeClasses("str", listOf("foo", "bar"))),
)
val df = list.toDataFrame {
properties(maxDepth = 2) {
exclude(List::class)
}
}

df shouldBe list.toDataFrame(maxDepth = 2).remove { "list" and "nested"["list1"] }
}

enum class DummyEnum { A }

@Test
Expand Down Expand Up @@ -213,8 +234,7 @@ class CreateDataFrameTests {
df.rowsCount() shouldBe 1

val childCol = df[Entry::child]
childCol.kind() shouldBe ColumnKind.Group
childCol.asColumnGroup().columnsCount() shouldBe 0
childCol.kind() shouldBe ColumnKind.Value
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that was weird, didn't know!

}

@Test
Expand Down Expand Up @@ -632,4 +652,43 @@ class CreateDataFrameTests {
val df = files.toDataFrame(columnName = "files")
df["files"][0] shouldBe File("data.csv")
}

class MyEmptyDeclaration

class TestItem(val name: String, val containingDeclaration: MyEmptyDeclaration, val test: Int)

@Test
fun `preserve empty interface consistency`() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it work as well for interfaces, data classes, and objects?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No for data classes - they require at least one property. But in general anything T that has no member properties / getter like functions, here we rely on Kotlin reflection

val df = listOf(MyEmptyDeclaration(), MyEmptyDeclaration()).toDataFrame()
df["value"].type() shouldBe typeOf<MyEmptyDeclaration>()
}

@Test
fun `preserve nested empty interface consistency`() {
val df = List(10) {
TestItem(
"Test1",
MyEmptyDeclaration(),
123,
)
}.toDataFrame(maxDepth = 2)

df["containingDeclaration"].type() shouldBe typeOf<MyEmptyDeclaration>()
}

@Test
fun `preserve value type consistency`() {
val list = listOf(mapOf("a" to 1))
val df = list.toDataFrame(maxDepth = 1)
df["value"].type() shouldBe typeOf<Map<String, Int>>()
}

class MapContainer(val map: Map<String, Int>)

@Test
fun `preserve nested value type consistency`() {
val list = listOf(MapContainer(mapOf("a" to 1)))
val df = list.toDataFrame(maxDepth = 2)
df["map"].type() shouldBe typeOf<Map<String, Int>>()
}
}
Loading