-
Notifications
You must be signed in to change notification settings - Fork 75
.toDataFrame
fixes
#1475
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
.toDataFrame
fixes
#1475
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dataframe.annotations.Interpretable | |
import org.jetbrains.kotlinx.dataframe.annotations.Refine | ||
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath | ||
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator | ||
import org.jetbrains.kotlinx.dataframe.impl.api.canBeUnfolded | ||
import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl | ||
import org.jetbrains.kotlinx.dataframe.impl.asList | ||
import org.jetbrains.kotlinx.dataframe.impl.columnName | ||
|
@@ -21,28 +20,21 @@ import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API | |
import kotlin.reflect.KCallable | ||
import kotlin.reflect.KClass | ||
import kotlin.reflect.KProperty | ||
import kotlin.reflect.typeOf | ||
|
||
// region read DataFrame from objects | ||
|
||
@Refine | ||
@Interpretable("toDataFrameDefault") | ||
public inline fun <reified T> Iterable<T>.toDataFrame(): DataFrame<T> = | ||
toDataFrame { | ||
// check if type is value: primitives, primitive arrays, datetime types etc., | ||
// or has no properties | ||
if (!T::class.canBeUnfolded) { | ||
// create a single `value` column | ||
ValueProperty<T>::value.name from { it } | ||
} else { | ||
// otherwise creates columns based on properties | ||
properties() | ||
} | ||
properties() | ||
} | ||
|
||
@Refine | ||
@Interpretable("toDataFrameDsl") | ||
public inline fun <reified T> Iterable<T>.toDataFrame(noinline body: CreateDataFrameDsl<T>.() -> Unit): DataFrame<T> = | ||
createDataFrameImpl(T::class, body) | ||
createDataFrameImpl(typeOf<T>(), body) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice! We should really do a sweep of |
||
|
||
@Refine | ||
@Interpretable("toDataFrame") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,7 @@ import java.time.temporal.TemporalAmount | |
import kotlin.reflect.KCallable | ||
import kotlin.reflect.KClass | ||
import kotlin.reflect.KProperty | ||
import kotlin.reflect.KType | ||
import kotlin.reflect.KVisibility | ||
import kotlin.reflect.full.isSubclassOf | ||
import kotlin.reflect.full.memberFunctions | ||
|
@@ -98,13 +99,11 @@ internal val KClass<*>.isValueType: Boolean | |
*/ | ||
@PublishedApi | ||
internal val KClass<*>.hasProperties: Boolean | ||
get() = this.memberProperties.any { it.visibility == KVisibility.PUBLIC } || | ||
// check pojo-like classes | ||
this.memberFunctions.any { it.visibility == KVisibility.PUBLIC && it.isGetterLike() } | ||
get() = properties().isNotEmpty() | ||
|
||
internal class CreateDataFrameDslImpl<T>( | ||
override val source: Iterable<T>, | ||
private val clazz: KClass<*>, | ||
private val type: KType, | ||
private val prefix: ColumnPath = emptyPath(), | ||
private val configuration: TraverseConfiguration = TraverseConfiguration(), | ||
) : CreateDataFrameDsl<T>(), | ||
|
@@ -119,7 +118,7 @@ internal class CreateDataFrameDslImpl<T>( | |
} | ||
|
||
override operator fun String.invoke(builder: CreateDataFrameDsl<T>.() -> Unit) { | ||
val child = CreateDataFrameDslImpl(source, clazz, prefix + this) | ||
val child = CreateDataFrameDslImpl(source, type, prefix + this) | ||
builder(child) | ||
columns.addAll(child.columns) | ||
} | ||
|
@@ -182,11 +181,12 @@ internal class CreateDataFrameDslImpl<T>( | |
} | ||
val df = convertToDataFrame( | ||
data = source, | ||
clazz = clazz, | ||
type = type, | ||
roots = roots.toList(), | ||
excludes = dsl.excludeProperties, | ||
preserveClasses = dsl.preserveClasses, | ||
preserveProperties = dsl.preserveProperties, | ||
excludeClasses = dsl.excludeClasses, | ||
maxDepth = maxDepth, | ||
) | ||
df.columns().forEach { | ||
|
@@ -197,44 +197,48 @@ internal class CreateDataFrameDslImpl<T>( | |
|
||
@PublishedApi | ||
internal fun <T> Iterable<T>.createDataFrameImpl( | ||
clazz: KClass<*>, | ||
type: KType, | ||
body: CreateDataFrameDslImpl<T>.() -> Unit, | ||
): DataFrame<T> { | ||
val builder = CreateDataFrameDslImpl(this, clazz) | ||
val builder = CreateDataFrameDslImpl(this, type) | ||
builder.body() | ||
return builder.columns.toDataFrameFromPairs() | ||
} | ||
|
||
@PublishedApi | ||
internal fun convertToDataFrame( | ||
data: Iterable<*>, | ||
clazz: KClass<*>, | ||
type: KType, | ||
roots: List<KCallable<*>>, | ||
excludes: Set<KCallable<*>>, | ||
preserveClasses: Set<KClass<*>>, | ||
preserveProperties: Set<KCallable<*>>, | ||
excludeClasses: Set<KClass<*>>, | ||
maxDepth: Int, | ||
): AnyFrame { | ||
val clazz = type.classifierOrAny() | ||
// this check relies on later recursive calls having roots = emptyList() | ||
if (roots.isEmpty() && !clazz.canBeUnfolded) { | ||
val column = DataColumn.createByType("value", data.toList(), type) | ||
return dataFrameOf(column) | ||
} | ||
|
||
val properties: List<KCallable<*>> = roots | ||
.ifEmpty { | ||
clazz.memberProperties | ||
.filter { it.visibility == KVisibility.PUBLIC } | ||
} | ||
// fall back to getter functions for pojo-like classes if no member properties were found | ||
.ifEmpty { | ||
clazz.memberFunctions | ||
.filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() } | ||
clazz.properties() | ||
} | ||
// sort properties by order in constructor | ||
.sortWithConstructor(clazz) | ||
|
||
val columns = properties.mapNotNull { | ||
val property = it | ||
if (excludes.contains(property)) return@mapNotNull null | ||
val klass = it.returnType.classifier as? KClass<*> | ||
if (excludeClasses.contains(klass)) return@mapNotNull null | ||
|
||
class ValueClassConverter(val unbox: Method, val box: Method) | ||
|
||
val valueClassConverter = (it.returnType.classifier as? KClass<*>)?.let { kClass -> | ||
val valueClassConverter = klass?.let { kClass -> | ||
if (!kClass.isValue) return@let null | ||
|
||
val constructor = requireNotNull(kClass.primaryConstructor) { | ||
|
@@ -302,10 +306,11 @@ internal fun convertToDataFrame( | |
val keepSubtree = | ||
maxDepth <= 0 && !fieldKind.shouldBeConvertedToFrameColumn && !fieldKind.shouldBeConvertedToColumnGroup | ||
val shouldCreateValueCol = keepSubtree || | ||
kClass == Any::class || | ||
kClass in preserveClasses || | ||
property in preserveProperties || | ||
kClass.isValueType | ||
!kClass.canBeUnfolded && | ||
!fieldKind.shouldBeConvertedToFrameColumn && | ||
!fieldKind.shouldBeConvertedToColumnGroup | ||
|
||
val shouldCreateFrameCol = kClass == DataFrame::class && !nullable | ||
val shouldCreateColumnGroup = kClass == DataRow::class | ||
|
@@ -368,11 +373,12 @@ internal fun convertToDataFrame( | |
require(it is Iterable<*>) | ||
convertToDataFrame( | ||
data = it, | ||
clazz = elementClass, | ||
type = elementType, | ||
roots = emptyList(), | ||
excludes = excludes, | ||
preserveClasses = preserveClasses, | ||
preserveProperties = preserveProperties, | ||
excludeClasses = excludeClasses, | ||
maxDepth = maxDepth - 1, | ||
) | ||
} | ||
|
@@ -386,11 +392,12 @@ internal fun convertToDataFrame( | |
else -> { | ||
val df = convertToDataFrame( | ||
data = values, | ||
clazz = kClass, | ||
type = returnType, | ||
roots = emptyList(), | ||
excludes = excludes, | ||
preserveClasses = preserveClasses, | ||
preserveProperties = preserveProperties, | ||
excludeClasses = excludeClasses, | ||
maxDepth = maxDepth - 1, | ||
) | ||
DataColumn.createColumnGroup(name = it.columnName, df = df) | ||
|
@@ -403,3 +410,15 @@ internal fun convertToDataFrame( | |
dataFrameOf(columns) | ||
} | ||
} | ||
|
||
private fun KType.classifierOrAny(): KClass<*> = classifier as? KClass<*> ?: Any::class | ||
|
||
private fun KClass<*>.properties(): List<KCallable<*>> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd either rewrite it declaratively/functionally, like: private fun KClass<*>.properties(): List<KCallable<*>> =
memberProperties
.filter { it.visibility == KVisibility.PUBLIC }
// fall back to getter functions for pojo-like classes if no member properties were found
.ifEmpty {
memberFunctions
.filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }
} or imperatively: private fun KClass<*>.properties(): List<KCallable<*>> {
val publicMembers = memberProperties.filter { it.visibility == KVisibility.PUBLIC }
return if (publicMembers.isEmpty()) {
// fall back to getter functions for pojo-like classes if no member properties were found
memberFunctions.filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }
} else {
publicMembers
}
} (or a variant of either). In either case the linter won't complain while sticking to one code paradigm. |
||
return memberProperties | ||
.filter { it.visibility == KVisibility.PUBLIC } | ||
// fall back to getter functions for pojo-like classes if no member properties were found | ||
.ifEmpty { | ||
memberFunctions | ||
.filter { it.visibility == KVisibility.PUBLIC && it.isGetterLike() } | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -156,8 +156,11 @@ class CreateDataFrameTests { | |
df.a[0].v shouldBe 7 | ||
|
||
val df2 = data.toDataFrame { | ||
preserve(B::row) | ||
properties { preserve(DataFrame::class) } | ||
preserve(B::row) // this@toDataFrame: TraversePropertiesDsl - works | ||
properties { | ||
preserve(DataFrame::class) // this@properties: TraversePropertiesDsl - always works | ||
} | ||
preserve(B::row) // this@toDataFrame: TraversePropertiesDsl - doesn't | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we deprecate this function here? since it doesn't always work |
||
} | ||
df2.frame.kind shouldBe ColumnKind.Value | ||
df2.frame.type shouldBe typeOf<DataFrame<A>>() | ||
|
@@ -186,6 +189,24 @@ class CreateDataFrameTests { | |
res.schema() shouldBe data.toDataFrame(maxDepth = 0).schema() | ||
} | ||
|
||
class NestedExcludeClasses(val s: String, val list1: List<String>) | ||
|
||
class ExcludeClasses(val i: Int, val list: List<Int>, val nested: NestedExcludeClasses) | ||
|
||
@Test | ||
fun `exclude classes`() { | ||
val list = listOf( | ||
ExcludeClasses(1, listOf(1, 2, 3), NestedExcludeClasses("str", listOf("foo", "bar"))), | ||
) | ||
val df = list.toDataFrame { | ||
properties(maxDepth = 2) { | ||
exclude(List::class) | ||
} | ||
} | ||
|
||
df shouldBe list.toDataFrame(maxDepth = 2).remove { "list" and "nested"["list1"] } | ||
} | ||
|
||
enum class DummyEnum { A } | ||
|
||
@Test | ||
|
@@ -213,8 +234,7 @@ class CreateDataFrameTests { | |
df.rowsCount() shouldBe 1 | ||
|
||
val childCol = df[Entry::child] | ||
childCol.kind() shouldBe ColumnKind.Group | ||
childCol.asColumnGroup().columnsCount() shouldBe 0 | ||
childCol.kind() shouldBe ColumnKind.Value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, that was weird, didn't know! |
||
} | ||
|
||
@Test | ||
|
@@ -632,4 +652,43 @@ class CreateDataFrameTests { | |
val df = files.toDataFrame(columnName = "files") | ||
df["files"][0] shouldBe File("data.csv") | ||
} | ||
|
||
class MyEmptyDeclaration | ||
|
||
class TestItem(val name: String, val containingDeclaration: MyEmptyDeclaration, val test: Int) | ||
|
||
@Test | ||
fun `preserve empty interface consistency`() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does it work as well for interfaces, data classes, and objects? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No for data classes - they require at least one property. But in general anything T that has no member properties / getter like functions, here we rely on Kotlin reflection |
||
val df = listOf(MyEmptyDeclaration(), MyEmptyDeclaration()).toDataFrame() | ||
df["value"].type() shouldBe typeOf<MyEmptyDeclaration>() | ||
} | ||
|
||
@Test | ||
fun `preserve nested empty interface consistency`() { | ||
val df = List(10) { | ||
TestItem( | ||
"Test1", | ||
MyEmptyDeclaration(), | ||
123, | ||
) | ||
}.toDataFrame(maxDepth = 2) | ||
|
||
df["containingDeclaration"].type() shouldBe typeOf<MyEmptyDeclaration>() | ||
} | ||
|
||
@Test | ||
fun `preserve value type consistency`() { | ||
val list = listOf(mapOf("a" to 1)) | ||
val df = list.toDataFrame(maxDepth = 1) | ||
df["value"].type() shouldBe typeOf<Map<String, Int>>() | ||
} | ||
|
||
class MapContainer(val map: Map<String, Int>) | ||
|
||
@Test | ||
fun `preserve nested value type consistency`() { | ||
val list = listOf(MapContainer(mapOf("a" to 1))) | ||
val df = list.toDataFrame(maxDepth = 2) | ||
df["map"].type() shouldBe typeOf<Map<String, Int>>() | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Turning this off project-wide disables the quick-fix too. I use this quite often :( I'd rather recommend suppressing it on a class/file/function basis if you really need it for clarity. The KtLint plugin has a quick-fix option for suppressing.