Skip to content

Commit 993b5ad

Browse files
committed
[Compiler plugin] fix explode multiple columns
1 parent 656a07e commit 993b5ad

File tree

2 files changed

+28
-19
lines changed
  • plugins/kotlin-dataframe

2 files changed

+28
-19
lines changed

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.Arguments
66
import org.jetbrains.kotlinx.dataframe.plugin.impl.PluginDataFrameSchema
77
import org.jetbrains.kotlinx.dataframe.plugin.impl.Present
88
import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleCol
9-
import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleDataColumn
109
import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleColumnGroup
10+
import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleDataColumn
1111
import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleFrameColumn
1212
import org.jetbrains.kotlinx.dataframe.plugin.impl.data.ColumnPathApproximation
1313
import org.jetbrains.kotlinx.dataframe.plugin.impl.data.ColumnWithPathApproximation
1414
import org.jetbrains.kotlinx.dataframe.plugin.impl.dataFrame
15+
import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf
1516

1617
internal class Explode0 : AbstractInterpreter<PluginDataFrameSchema>() {
1718
val Arguments.dropEmpty: Boolean by arg(defaultValue = Present(true))
@@ -20,14 +21,21 @@ internal class Explode0 : AbstractInterpreter<PluginDataFrameSchema>() {
2021
override val Arguments.startingSchema get() = receiver
2122

2223
override fun Arguments.interpret(): PluginDataFrameSchema {
23-
val columns = selector ?: TODO()
24+
val columns = selector ?: object : ColumnsResolver {
25+
override fun resolve(df: PluginDataFrameSchema): List<ColumnWithPathApproximation> {
26+
return df.flatten(includeFrames = false).filter {
27+
val column = it.column
28+
column is SimpleFrameColumn || column is SimpleDataColumn && column.type.isList()
29+
}
30+
}
31+
}
2432
return receiver.explodeImpl(dropEmpty, columns.resolve(receiver).map { ColumnPathApproximation(it.path.path) })
2533
}
2634
}
2735

28-
val KotlinTypeFacade.explodeImpl: PluginDataFrameSchema.(dropEmpty: Boolean, selector: List<ColumnPathApproximation>?) -> PluginDataFrameSchema
36+
val KotlinTypeFacade.explodeImpl: PluginDataFrameSchema.(dropEmpty: Boolean, selector: List<ColumnPathApproximation>) -> PluginDataFrameSchema
2937
get() = { dropEmpty, selector ->
30-
val columns = selector ?: TODO()
38+
val columns = selector
3139

3240
val selected: Set<List<String>> = columns.map { it.path }.toSet()
3341

@@ -36,9 +44,7 @@ val KotlinTypeFacade.explodeImpl: PluginDataFrameSchema.(dropEmpty: Boolean, sel
3644
is SimpleColumnGroup -> SimpleColumnGroup(column.name, column.columns().map { makeNullable(it) })
3745
is SimpleFrameColumn -> column
3846
is SimpleDataColumn -> {
39-
// val nullable = if (dropEmpty) (column.type as TypeApproximationImpl).nullable else true
40-
41-
column.changeType(type = column.type.changeNullability { nullable -> if (dropEmpty) nullable else true })
47+
column.changeType(type = column.type.changeNullability { nullable -> selector.size > 1 || !dropEmpty || nullable })
4248
}
4349
}
4450
}
@@ -61,7 +67,7 @@ val KotlinTypeFacade.explodeImpl: PluginDataFrameSchema.(dropEmpty: Boolean, sel
6167
column.type.isList() -> column.type.typeArgument()
6268
else -> column.type
6369
}
64-
SimpleDataColumn(column.name, newType)
70+
makeNullable(simpleColumnOf(column.name, newType.type))
6571
} else {
6672
column
6773
}

plugins/kotlin-dataframe/testData/box/explode.kt

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,23 @@ import org.jetbrains.kotlinx.dataframe.annotations.*
33
import org.jetbrains.kotlinx.dataframe.api.*
44
import org.jetbrains.kotlinx.dataframe.io.*
55

6-
@DataSchema
7-
interface ExplodeSchema {
8-
val timestamps: List<Int>
9-
}
10-
11-
fun explode(df: DataFrame<ExplodeSchema>) {
12-
val res = df.explode { timestamps }
13-
val col: DataColumn<Int> = res.timestamps
14-
}
15-
166
fun box(): String {
17-
val df = dataFrameOf("timestamps")(listOf(100, 113, 140), listOf(400, 410, 453)).cast<ExplodeSchema>()
7+
val df = dataFrameOf("timestamps")(listOf(100, 113, 140), listOf(400, 410, 453))
188
val df1 = df.explode { timestamps }
199
val timestamps: DataColumn<Int> = df1.timestamps
2010
timestamps.print()
11+
12+
13+
val df2 = dataFrameOf("a", "b")(listOf(100, 113, 140), listOf(400, 410))
14+
val df3 = df2.explode { a and b }
15+
// exploding multiple columns will introduce nulls
16+
df3.print()
17+
// compiler needs to play safe and make both selected columns nullable
18+
df3.compileTimeSchema().columns.let {
19+
assert(it["a"]!!.nullable)
20+
assert(it["b"]!!.nullable)
21+
}
22+
// compile time schema is still compatible with runtime
23+
df3.compareSchemas()
2124
return "OK"
2225
}

0 commit comments

Comments
 (0)