Skip to content

Commit 1fc7197

Browse files
committed
Fixed one failing test for #713: convertTo can create a temporary non-nullable column with nulls in it. This is now avoided and the behavior has been simplified/stabilized
1 parent ba1f343 commit 1fc7197

File tree

2 files changed

+39
-27
lines changed

2 files changed

+39
-27
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ import org.jetbrains.kotlinx.dataframe.api.ConvertSchemaDsl
1111
import org.jetbrains.kotlinx.dataframe.api.ConverterScope
1212
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
1313
import org.jetbrains.kotlinx.dataframe.api.Infer
14+
import org.jetbrains.kotlinx.dataframe.api.add
1415
import org.jetbrains.kotlinx.dataframe.api.all
1516
import org.jetbrains.kotlinx.dataframe.api.allNulls
1617
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
1718
import org.jetbrains.kotlinx.dataframe.api.concat
1819
import org.jetbrains.kotlinx.dataframe.api.convertTo
1920
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
20-
import org.jetbrains.kotlinx.dataframe.api.getColumnPaths
2121
import org.jetbrains.kotlinx.dataframe.api.isEmpty
2222
import org.jetbrains.kotlinx.dataframe.api.map
2323
import org.jetbrains.kotlinx.dataframe.api.name
@@ -29,12 +29,14 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
2929
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
3030
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
3131
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
32+
import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy
3233
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
3334
import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException
3435
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
3536
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
36-
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn
37+
import org.jetbrains.kotlinx.dataframe.impl.getColumnPaths
3738
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
39+
import org.jetbrains.kotlinx.dataframe.impl.schema.createNullFilledColumn
3840
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
3941
import org.jetbrains.kotlinx.dataframe.impl.schema.render
4042
import org.jetbrains.kotlinx.dataframe.kind
@@ -252,22 +254,15 @@ internal fun AnyFrame.convertToImpl(
252254
}
253255
}.toMutableList()
254256

255-
// when the target is nullable but the source does not contain a column, fill it in with nulls / empty dataframes
257+
// when the target is nullable but the source does not contain a column,
258+
// fill it in with nulls / empty dataframes
256259
val size = this.size.nrow
257260
schema.columns.forEach { (name, targetColumn) ->
258-
val isNullable =
259-
// like value column of type Int?
260-
targetColumn.nullable ||
261-
// like value column of type Int? (backup check)
262-
targetColumn.type.isMarkedNullable ||
263-
// like DataRow<Something?> for a group column (all columns in the group will be nullable)
264-
targetColumn.contentType?.isMarkedNullable == true ||
265-
// frame column can be filled with empty dataframes
266-
targetColumn.kind == ColumnKind.Frame
267-
268261
if (name !in visited) {
269-
newColumns += targetColumn.createEmptyColumn(name, size)
270-
if (!isNullable) {
262+
try {
263+
newColumns += targetColumn.createNullFilledColumn(name, size)
264+
} catch (e: IllegalStateException) {
265+
// if this could not be done automatically, they need to be filled manually
271266
missingPaths.add(path + name)
272267
}
273268
}
@@ -280,10 +275,16 @@ internal fun AnyFrame.convertToImpl(
280275
var result = convertToSchema(marker.schema, emptyPath())
281276

282277
dsl.fillers.forEach { filler ->
283-
val paths = result.getColumnPaths(filler.columns)
284-
missingPaths.removeAll(paths.toSet())
285-
result = result.update { paths.toColumnSet() }.with {
286-
filler.expr(this, this)
278+
val paths = result.getColumnPaths(UnresolvedColumnsPolicy.Create, filler.columns).toSet()
279+
missingPaths -= paths
280+
val (newPaths, existingPaths) = paths.partition { it in missingPaths }
281+
282+
// first fill cols that are already in the df
283+
result = result.update { existingPaths.toColumnSet() }.with { filler.expr(this, this) }
284+
285+
// then create any missing ones by filling
286+
result = newPaths.fold(result) { df, newPath ->
287+
df.add(newPath, Infer.Type) { filler.expr(this, this) }
287288
}
288289
}
289290

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ internal fun AnyCol.extractSchema(): ColumnSchema =
102102
@PublishedApi
103103
internal fun getSchema(kClass: KClass<*>): DataFrameSchema = MarkersExtractor.get(kClass).schema
104104

105+
/**
106+
* Create "empty" column based on the toplevel of [this] [ColumnSchema].
107+
*/
105108
internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol =
106109
when (this) {
107110
is ColumnSchema.Value -> DataColumn.createValueColumn<Any?>(name, emptyList(), type)
@@ -110,14 +113,22 @@ internal fun ColumnSchema.createEmptyColumn(name: String): AnyCol =
110113
else -> error("Unexpected ColumnSchema: $this")
111114
}
112115

113-
/** Create "empty" column, filled with either null or empty dataframes. */
114-
internal fun ColumnSchema.createEmptyColumn(name: String, numberOfRows: Int): AnyCol =
116+
/**
117+
* Creates a column based on [this] [ColumnSchema] filled with `null` or empty dataframes.
118+
* @throws IllegalStateException if the column is not nullable and [numberOfRows]` > 0`.
119+
*/
120+
internal fun ColumnSchema.createNullFilledColumn(name: String, numberOfRows: Int): AnyCol =
115121
when (this) {
116-
is ColumnSchema.Value -> DataColumn.createValueColumn(
117-
name = name,
118-
values = List(numberOfRows) { null },
119-
type = type,
120-
)
122+
is ColumnSchema.Value -> {
123+
if (!type.isMarkedNullable && numberOfRows > 0) {
124+
error("Cannot create a null-filled value column of type $type as it's not nullable.")
125+
}
126+
DataColumn.createValueColumn(
127+
name = name,
128+
values = List(numberOfRows) { null },
129+
type = type,
130+
)
131+
}
121132

122133
is ColumnSchema.Group -> DataColumn.createColumnGroup(
123134
name = name,
@@ -143,7 +154,7 @@ internal fun DataFrameSchema.createEmptyDataFrame(numberOfRows: Int): AnyFrame =
143154
DataFrame.empty(numberOfRows)
144155
} else {
145156
columns.map { (name, schema) ->
146-
schema.createEmptyColumn(name, numberOfRows)
157+
schema.createNullFilledColumn(name, numberOfRows)
147158
}.toDataFrame()
148159
}
149160

0 commit comments

Comments
 (0)