Skip to content

Commit 839fbef

Browse files
committed
Add DataFrame.emptyOf<T>(). Remove nrow argument from emptyDataFrame. Preserve properties order in empty DataFrame created from data class schema.
1 parent 4a2a9c9 commit 839fbef

File tree

11 files changed

+125
-9
lines changed

11 files changed

+125
-9
lines changed

src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
1414
import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
1515
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
1616
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
17+
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
1718
import kotlin.reflect.KType
1819

1920
/**
@@ -28,6 +29,8 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
2829
public companion object {
2930
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0)
3031
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow)
32+
33+
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast()
3134
}
3235

3336
// region columns

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,6 @@ public fun <T> Iterable<DataColumn<T>>.concat(): DataColumn<T> {
4848
}
4949

5050
@JvmName("concatRows")
51-
public fun <T> Iterable<DataRow<T>?>.concat(): DataFrame<T> = concatImpl(map { it?.toDataFrame() ?: emptyDataFrame(1) })
51+
public fun <T> Iterable<DataRow<T>?>.concat(): DataFrame<T> = concatImpl(map { it?.toDataFrame() ?: DataFrame.empty(1).cast() })
5252

5353
// endregion

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,14 @@ public class DataFrameBuilder(private val header: List<String>) {
271271
public fun randomBoolean(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextBoolean() }
272272
}
273273

274-
public fun <T> emptyDataFrame(nrow: Int = 0): DataFrame<T> = DataFrame.empty(nrow).cast()
274+
/**
275+
* Returns [DataFrame] with no rows and no columns.
276+
*
277+
* To create [DataFrame] with empty columns or empty rows see [DataFrame.empty]
278+
*
279+
* @param T schema marker for [DataFrame]
280+
*/
281+
public fun <T> emptyDataFrame(): DataFrame<T> = DataFrame.empty().cast()
275282

276283
// endregion
277284

src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/Marker.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public open class Marker(
6464
fields.forEach {
6565
fieldsMap[it.fieldName.quotedIfNeeded] = it
6666
}
67-
fieldsMap.values.sortedBy { it.fieldName.quotedIfNeeded }
67+
fieldsMap.values.toList()
6868
}
6969

7070
public val allFieldsByColumn: Map<String, GeneratedField> by lazy {

src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MarkersExtractor.kt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
44
import org.jetbrains.kotlinx.dataframe.DataRow
55
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
66
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
7+
import org.jetbrains.kotlinx.dataframe.impl.schema.getPropertiesOrder
78
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
89
import kotlin.reflect.KClass
910
import kotlin.reflect.full.declaredMemberProperties
@@ -33,8 +34,9 @@ internal object MarkersExtractor {
3334
)
3435
}
3536

36-
private fun getFields(markerClass: KClass<*>): List<GeneratedField> =
37-
markerClass.declaredMemberProperties.mapIndexed { index, it ->
37+
private fun getFields(markerClass: KClass<*>): List<GeneratedField> {
38+
val order = getPropertiesOrder(markerClass)
39+
return markerClass.declaredMemberProperties.sortedBy { order[it.name] ?: Int.MAX_VALUE }.mapIndexed { index, it ->
3840
val fieldName = ValidFieldName.of(it.name)
3941
val columnName = it.findAnnotation<ColumnName>()?.name ?: fieldName.unquoted
4042
val type = it.returnType
@@ -61,4 +63,5 @@ internal object MarkersExtractor {
6163

6264
GeneratedField(fieldName, columnName, false, columnSchema, fieldType)
6365
}
66+
}
6467
}

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ internal fun <T> concatImpl(name: String, columns: List<DataColumn<T>?>, columnS
2828

2929
if (columns.all { it == null || it.isColumnGroup() }) {
3030
val frames = columns.mapIndexed { index, col ->
31-
col?.asColumnGroup() ?: emptyDataFrame(columnSizes[index])
31+
col?.asColumnGroup() ?: DataFrame.empty(columnSizes[index])
3232
}
3333
val merged = concatImpl(frames)
3434
return DataColumn.createColumnGroup(name, merged).asDataColumn().cast()

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ import org.jetbrains.kotlinx.dataframe.api.toDataFrame
1111
import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor
1212
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
1313
import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
14+
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
1415
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
1516
import org.jetbrains.kotlinx.dataframe.kind
17+
import org.jetbrains.kotlinx.dataframe.ncol
1618
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
1719
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
1820
import kotlin.reflect.KType
@@ -23,6 +25,7 @@ public enum class ExtraColumns { Remove, Keep, Fail }
2325
@PublishedApi
2426
internal fun <T> AnyFrame.convertToImpl(type: KType, allowConversion: Boolean, extraColumns: ExtraColumns): DataFrame<T> {
2527
fun AnyFrame.convertToSchema(schema: DataFrameSchema): AnyFrame {
28+
if (ncol == 0) return schema.createEmptyDataFrame()
2629
var visited = 0
2730
val newColumns = columns().mapNotNull {
2831
val targetColumn = schema.columns[it.name()]

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ import org.jetbrains.kotlinx.dataframe.impl.columnName
1414
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
1515
import org.jetbrains.kotlinx.dataframe.impl.getListType
1616
import org.jetbrains.kotlinx.dataframe.impl.projectUpTo
17+
import org.jetbrains.kotlinx.dataframe.impl.schema.getPropertiesOrder
1718
import java.lang.reflect.InvocationTargetException
1819
import java.time.temporal.Temporal
1920
import kotlin.reflect.KClass
2021
import kotlin.reflect.KProperty
2122
import kotlin.reflect.KVisibility
2223
import kotlin.reflect.full.isSubclassOf
2324
import kotlin.reflect.full.memberProperties
24-
import kotlin.reflect.full.primaryConstructor
2525
import kotlin.reflect.full.withNullability
2626
import kotlin.reflect.jvm.javaField
2727

@@ -92,12 +92,12 @@ internal fun convertToDataFrame(
9292
preserves: Set<KClass<*>>,
9393
depth: Int
9494
): AnyFrame {
95-
val constructorParameters = clazz.primaryConstructor?.parameters?.mapNotNull { it.name }?.mapIndexed { i, v -> v to i }?.toMap() ?: emptyMap()
95+
val order = getPropertiesOrder(clazz)
9696

9797
val properties = roots.ifEmpty {
9898
clazz.memberProperties
9999
.filter { it.visibility == KVisibility.PUBLIC && it.parameters.toList().size == 1 }
100-
}.sortedBy { constructorParameters[it.name] ?: Int.MAX_VALUE }
100+
}.sortedBy { order[it.name] ?: Int.MAX_VALUE }
101101

102102
val columns = properties.mapNotNull {
103103
val property = it

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ package org.jetbrains.kotlinx.dataframe.impl.schema
22

33
import org.jetbrains.kotlinx.dataframe.AnyCol
44
import org.jetbrains.kotlinx.dataframe.AnyFrame
5+
import org.jetbrains.kotlinx.dataframe.DataColumn
56
import org.jetbrains.kotlinx.dataframe.api.schema
7+
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
8+
import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor
69
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
710
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
811
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
@@ -12,6 +15,8 @@ import org.jetbrains.kotlinx.dataframe.impl.baseType
1215
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
1316
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
1417
import org.jetbrains.kotlinx.dataframe.type
18+
import kotlin.reflect.KClass
19+
import kotlin.reflect.full.primaryConstructor
1520
import kotlin.reflect.full.withNullability
1621
import kotlin.reflect.typeOf
1722

@@ -72,3 +77,17 @@ internal fun AnyCol.extractSchema(): ColumnSchema = when (this) {
7277
)
7378
else -> throw RuntimeException()
7479
}
80+
81+
internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol = when (this) {
82+
is ColumnSchema.Value -> DataColumn.createValueColumn<Any?>(name, emptyList(), type)
83+
is ColumnSchema.Group -> DataColumn.createColumnGroup(name, schema.createEmptyDataFrame()) as AnyCol
84+
is ColumnSchema.Frame -> DataColumn.createFrameColumn<Any?>(name, emptyList(), lazyOf(schema))
85+
else -> error("Unexpected ColumnSchema: $this")
86+
}
87+
internal fun DataFrameSchema.createEmptyDataFrame(): AnyFrame = columns.map { (name, schema) -> schema.createEmptyColumn(name) }.toDataFrame()
88+
89+
@PublishedApi
90+
internal fun createEmptyDataFrameOf(clazz: KClass<*>): AnyFrame = MarkersExtractor[clazz].schema.createEmptyDataFrame()
91+
92+
internal fun getPropertiesOrder(clazz: KClass<*>): Map<String, Int> =
93+
clazz.primaryConstructor?.parameters?.mapNotNull { it.name }?.mapIndexed { i, v -> v to i }?.toMap() ?: emptyMap()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.DataFrame
5+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
6+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
7+
import org.junit.Test
8+
import kotlin.reflect.typeOf
9+
10+
class ConvertToTests {
11+
12+
@Test
13+
fun `convert frame column with empty frames`() {
14+
val groups by columnOf(dataFrameOf("a")("1"), DataFrame.empty())
15+
val df = dataFrameOf(groups)
16+
17+
@DataSchema
18+
data class GroupSchema(val a: Int)
19+
20+
@DataSchema
21+
data class DataFrameSchema(val groups: DataFrame<GroupSchema>)
22+
23+
val converted = df.convertTo<DataFrameSchema>()
24+
25+
converted[groups].forEach {
26+
it["a"].type() shouldBe typeOf<Int>()
27+
}
28+
}
29+
}

0 commit comments

Comments
 (0)