Skip to content

Commit 72354c5

Browse files
authored
Merge pull request #990 from Kotlin/compileTimeSchemaOrder
Sort df.compileTimeSchema() columns according to df.schema() so they're easier to compare
2 parents 651cbae + 791dfb7 commit 72354c5

File tree

4 files changed

+88
-3
lines changed

4 files changed

+88
-3
lines changed

core/api/core.api

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9799,6 +9799,10 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/MapKt {
97999799
public static final fun mapNotNullValues (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
98009800
}
98019801

9802+
public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
9803+
public static final fun compileTimeSchemaImpl (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lkotlin/reflect/KClass;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
9804+
}
9805+
98029806
public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
98039807
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
98049808
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ package org.jetbrains.kotlinx.dataframe.api
33
import org.jetbrains.kotlinx.dataframe.AnyFrame
44
import org.jetbrains.kotlinx.dataframe.AnyRow
55
import org.jetbrains.kotlinx.dataframe.DataFrame
6+
import org.jetbrains.kotlinx.dataframe.impl.api.compileTimeSchemaImpl
67
import org.jetbrains.kotlinx.dataframe.impl.owner
78
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
8-
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
99
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
1010

1111
// region DataRow
@@ -26,5 +26,5 @@ public fun GroupBy<*, *>.schema(): DataFrameSchema = toDataFrame().schema()
2626

2727
// endregion
2828

29-
@Suppress("UnusedReceiverParameter")
30-
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema = getSchema(T::class)
29+
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema =
30+
compileTimeSchemaImpl(schema(), T::class)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package org.jetbrains.kotlinx.dataframe.impl.api
2+
3+
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
4+
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
5+
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
6+
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
7+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
8+
import kotlin.reflect.KClass
9+
10+
@PublishedApi
11+
internal fun compileTimeSchemaImpl(runtimeSchema: DataFrameSchema, klass: KClass<*>): DataFrameSchema {
12+
val compileSchema = getSchema(klass)
13+
val root = ColumnPath(emptyList())
14+
val order = buildMap {
15+
putColumnsOrder(runtimeSchema, path = root)
16+
}
17+
return compileSchema.sortedBy(order, path = root)
18+
}
19+
20+
internal fun MutableMap<ColumnPath, Int>.putColumnsOrder(schema: DataFrameSchema, path: ColumnPath) {
21+
schema.columns.entries.forEachIndexed { i, (name, column) ->
22+
val columnPath = path + name
23+
this[columnPath] = i
24+
when (column) {
25+
is ColumnSchema.Frame -> {
26+
putColumnsOrder(column.schema, columnPath)
27+
}
28+
29+
is ColumnSchema.Group -> {
30+
putColumnsOrder(column.schema, columnPath)
31+
}
32+
}
33+
}
34+
}
35+
36+
internal fun DataFrameSchema.sortedBy(order: Map<ColumnPath, Int>, path: ColumnPath): DataFrameSchema {
37+
val sorted = columns.map { (name, column) ->
38+
name to when (column) {
39+
is ColumnSchema.Frame -> ColumnSchema.Frame(
40+
column.schema.sortedBy(order, path + name),
41+
column.nullable,
42+
column.contentType,
43+
)
44+
45+
is ColumnSchema.Group -> ColumnSchema.Group(column.schema.sortedBy(order, path + name), column.contentType)
46+
47+
is ColumnSchema.Value -> column
48+
49+
else -> TODO("unexpected ColumnSchema class ${column::class}")
50+
}
51+
}.sortedBy { (name, _) ->
52+
order[path + name]
53+
}.toMap()
54+
return DataFrameSchemaImpl(sorted)
55+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.DataRow
5+
import org.junit.Test
6+
7+
class SchemaTests {
8+
@Test
9+
fun `columns order test`() {
10+
val row = dataFrameOf("c", "b")(4, 5).first()
11+
val df = dataFrameOf("abc", "a", "a123", "nested")(1, 2, 3, row).cast<Schema>()
12+
df.schema().toString() shouldBe df.compileTimeSchema().toString()
13+
}
14+
}
15+
16+
private interface Schema {
17+
val a: Int
18+
val abc: Int
19+
val a123: Int
20+
val nested: DataRow<Nested>
21+
}
22+
23+
private interface Nested {
24+
val b: Int
25+
val c: Int
26+
}

0 commit comments

Comments
 (0)