Skip to content

Commit b805c5c

Browse files
authored
Merge pull request #213 from nikitinas/framecolumn-conversion-fixes
Restore additional conversions from ValueColumn/ColumnGroup to FrameColumn
2 parents acf9d09 + 8d032d8 commit b805c5c

File tree

2 files changed

+102
-30
lines changed
  • core/src
    • main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api
    • test/kotlin/org/jetbrains/kotlinx/dataframe/api

2 files changed

+102
-30
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.jetbrains.kotlinx.dataframe.impl.api
22

3-
import org.jetbrains.kotlinx.dataframe.AnyCol
43
import org.jetbrains.kotlinx.dataframe.AnyFrame
4+
import org.jetbrains.kotlinx.dataframe.AnyRow
55
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
66
import org.jetbrains.kotlinx.dataframe.DataColumn
77
import org.jetbrains.kotlinx.dataframe.DataFrame
@@ -14,6 +14,7 @@ import org.jetbrains.kotlinx.dataframe.api.Infer
1414
import org.jetbrains.kotlinx.dataframe.api.all
1515
import org.jetbrains.kotlinx.dataframe.api.allNulls
1616
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
17+
import org.jetbrains.kotlinx.dataframe.api.concat
1718
import org.jetbrains.kotlinx.dataframe.api.convertTo
1819
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
1920
import org.jetbrains.kotlinx.dataframe.api.getColumnPaths
@@ -24,11 +25,12 @@ import org.jetbrains.kotlinx.dataframe.api.toDataFrame
2425
import org.jetbrains.kotlinx.dataframe.api.update
2526
import org.jetbrains.kotlinx.dataframe.api.with
2627
import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor
28+
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
2729
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
2830
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
31+
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
2932
import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException
3033
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
31-
import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
3234
import org.jetbrains.kotlinx.dataframe.impl.emptyPath
3335
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn
3436
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
@@ -107,8 +109,8 @@ internal fun AnyFrame.convertToImpl(
107109

108110
val visited = mutableSetOf<String>()
109111
val newColumns = columns().mapNotNull { originalColumn ->
110-
val targetColumn = schema.columns[originalColumn.name()]
111-
if (targetColumn == null) {
112+
val targetSchema = schema.columns[originalColumn.name()]
113+
if (targetSchema == null) {
112114
when (excessiveColumns) {
113115
ExcessiveColumns.Fail -> throw ExcessiveColumnsException(listOf(originalColumn.name))
114116
ExcessiveColumns.Keep -> originalColumn
@@ -118,13 +120,13 @@ internal fun AnyFrame.convertToImpl(
118120
visited.add(originalColumn.name())
119121
val currentSchema = originalColumn.extractSchema()
120122
when {
121-
targetColumn == currentSchema -> originalColumn
123+
targetSchema == currentSchema -> originalColumn
122124

123125
!allowConversion -> {
124126
val originalSchema = mapOf(originalColumn.name to currentSchema)
125127
.render(0, StringBuilder(), "\t")
126128

127-
val targetSchema = mapOf(originalColumn.name to targetColumn)
129+
val targetSchema = mapOf(originalColumn.name to targetSchema)
128130
.render(0, StringBuilder(), "\t")
129131

130132
throw IllegalArgumentException("Column has schema:\n $originalSchema\n that differs from target schema:\n $targetSchema")
@@ -135,26 +137,31 @@ internal fun AnyFrame.convertToImpl(
135137

136138
// try to perform any user-specified conversions first
137139
val from = originalColumn.type()
138-
val to = targetColumn.type
139-
val converter = dsl.getConverter(from, targetColumn)
140+
val to = targetSchema.type
141+
val converter = dsl.getConverter(from, targetSchema)
140142

141143
val convertedColumn = if (converter != null) {
142144
val nullsAllowed = to.isMarkedNullable
143145
originalColumn.map(to, Infer.Nulls) {
144146
val result =
145147
if (it != null || !converter.skipNulls) {
146-
converter.transform(ConverterScope(from, targetColumn), it)
148+
converter.transform(ConverterScope(from, targetSchema), it)
147149
} else {
148150
it
149151
}
150152

151-
if (!nullsAllowed && result == null) throw TypeConversionException(it, from, to, originalColumn.path())
153+
if (!nullsAllowed && result == null) throw TypeConversionException(
154+
it,
155+
from,
156+
to,
157+
originalColumn.path()
158+
)
152159

153160
result
154161
}
155162
} else null
156163

157-
when (targetColumn.kind) {
164+
when (targetSchema.kind) {
158165
ColumnKind.Value ->
159166
convertedColumn ?: originalColumn.convertTo(to)
160167

@@ -187,37 +194,43 @@ internal fun AnyFrame.convertToImpl(
187194
DataColumn.createColumnGroup(
188195
name = column.name(),
189196
df = columnGroup.convertToSchema(
190-
schema = (targetColumn as ColumnSchema.Group).schema,
197+
schema = (targetSchema as ColumnSchema.Group).schema,
191198
path = columnPath,
192199
),
193200
)
194201
}
195202

196203
ColumnKind.Frame -> {
197204
val column = convertedColumn ?: originalColumn
198-
199-
// perform any patches if needed to be able to convert a column to a frame column
200-
val patchedOriginalColumn: AnyCol = when {
201-
// a value column of AnyFrame? (or nulls) can be converted to a frame column by making nulls empty dataframes
202-
column.kind == ColumnKind.Value && column.all { it is AnyFrame? } -> {
203-
column
204-
.map { (it ?: emptyDataFrame<Any?>()) as AnyFrame }
205-
.convertTo<AnyFrame>()
205+
val frameSchema = (targetSchema as ColumnSchema.Frame).schema
206+
207+
val frames = when (column.kind) {
208+
ColumnKind.Frame ->
209+
(column as FrameColumn<*>).values()
210+
211+
ColumnKind.Value -> {
212+
require(column.all { it == null || it is AnyFrame || (it is List<*> && it.all { it is AnyRow? }) }) {
213+
"Column `${column.name}` is ValueColumn and contains objects that can not be converted into `DataFrame`"
214+
}
215+
column.values().map {
216+
when (it) {
217+
null -> emptyDataFrame()
218+
is AnyFrame -> it
219+
else -> (it as List<AnyRow?>).concat()
220+
}
221+
}
206222
}
207223

208-
else -> column
224+
ColumnKind.Group -> {
225+
(column as ColumnGroup<*>).values().map { it.toDataFrame() }
226+
}
209227
}
210228

211-
require(patchedOriginalColumn.kind == ColumnKind.Frame) {
212-
"Column `${patchedOriginalColumn.name}` is ${patchedOriginalColumn.kind}Column and can not be converted to `FrameColumn`"
213-
}
214-
val frameColumn = patchedOriginalColumn.asAnyFrameColumn()
215-
val frameSchema = (targetColumn as ColumnSchema.Frame).schema
216-
val frames = frameColumn.values().map { it.convertToSchema(frameSchema, columnPath) }
229+
val convertedFrames = frames.map { it.convertToSchema(frameSchema, columnPath)}
217230

218231
DataColumn.createFrameColumn(
219-
name = patchedOriginalColumn.name(),
220-
groups = frames,
232+
name = column.name(),
233+
groups = convertedFrames,
221234
schema = lazy { frameSchema },
222235
)
223236
}
@@ -259,7 +272,7 @@ internal fun AnyFrame.convertToImpl(
259272
}
260273

261274
if (missingPaths.isNotEmpty()) {
262-
throw IllegalArgumentException("The following columns were not found in DataFrame: ${missingPaths.map { it.joinToString()}}, and their type was not nullable. Use `fill` to initialize these columns")
275+
throw IllegalArgumentException("The following columns were not found in DataFrame: ${missingPaths.map { it.joinToString() }}, and their type was not nullable. Use `fill` to initialize these columns")
263276
}
264277

265278
return result

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@ package org.jetbrains.kotlinx.dataframe.api
33
import io.kotest.assertions.throwables.shouldThrow
44
import io.kotest.matchers.shouldBe
55
import org.jetbrains.kotlinx.dataframe.AnyFrame
6+
import org.jetbrains.kotlinx.dataframe.AnyRow
7+
import org.jetbrains.kotlinx.dataframe.DataColumn
68
import org.jetbrains.kotlinx.dataframe.DataFrame
79
import org.jetbrains.kotlinx.dataframe.alsoDebug
810
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
11+
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
912
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
13+
import org.jetbrains.kotlinx.dataframe.kind
1014
import org.junit.Test
1115
import kotlin.reflect.typeOf
1216

@@ -282,4 +286,59 @@ class ConvertToTests {
282286

283287
converted shouldBe locations.update { gps.longitude }.with { gps.latitude }
284288
}
289+
290+
@Test
291+
fun `convert column of empty lists into FrameColumn`() {
292+
@DataSchema
293+
data class Entry(val v: Int)
294+
295+
@DataSchema
296+
data class Result(val d: DataFrame<Entry>)
297+
298+
dataFrameOf("d")(emptyList<Any>(), emptyList<Any>())
299+
.convertTo<Result>() shouldBe
300+
dataFrameOf("d")(DataFrame.emptyOf<Entry>(), DataFrame.emptyOf<Entry>())
301+
}
302+
303+
@Test
304+
fun `convert ColumnGroup into FrameColumn`() {
305+
@DataSchema
306+
data class Entry(val v: Int)
307+
308+
@DataSchema
309+
data class Result(val d: DataFrame<Entry>)
310+
311+
val columnGroup = DataColumn.createColumnGroup("d", dataFrameOf("v")(1, 2))
312+
columnGroup.kind() shouldBe ColumnKind.Group
313+
val res = dataFrameOf(columnGroup).convertTo<Result>()
314+
val frameColumn = res.getFrameColumn("d")
315+
frameColumn.kind shouldBe ColumnKind.Frame
316+
frameColumn.values() shouldBe listOf(dataFrameOf("v")(1), dataFrameOf("v")(2))
317+
}
318+
319+
@Test
320+
fun `convert ValueColumn of lists, nulls and frames into FrameColumn`(){
321+
@DataSchema
322+
data class Entry(val v: Int)
323+
324+
@DataSchema
325+
data class Result(val d: DataFrame<Entry>)
326+
327+
val emptyList: List<Any?> = emptyList()
328+
val listOfRows: List<AnyRow> = dataFrameOf("v")(1, 2).rows().toList()
329+
val frame: DataFrame<Entry> = listOf(Entry(3), Entry(4)).toDataFrame()
330+
331+
val src = DataColumn.createValueColumn("d", listOf(emptyList, listOfRows, frame, null)).toDataFrame()
332+
src["d"].kind shouldBe ColumnKind.Value
333+
334+
val df = src.convertTo<Result>()
335+
val frameColumn = df.getFrameColumn("d")
336+
frameColumn.kind shouldBe ColumnKind.Frame
337+
frameColumn.toList() shouldBe listOf(
338+
DataFrame.emptyOf<Entry>(),
339+
dataFrameOf("v")(1, 2),
340+
dataFrameOf("v")(3, 4),
341+
DataFrame.emptyOf<Entry>(),
342+
)
343+
}
285344
}

0 commit comments

Comments
 (0)