Skip to content

Commit 7eac71f

Browse files
committed
Convert StructVector into ColumnGroup instead of DataColumn<Map<String, Any?>>
1 parent 4fd5602 commit 7eac71f

File tree

2 files changed

+18
-14
lines changed

2 files changed

+18
-14
lines changed

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import org.apache.arrow.vector.DateMilliVector
1919
import org.apache.arrow.vector.Decimal256Vector
2020
import org.apache.arrow.vector.DecimalVector
2121
import org.apache.arrow.vector.DurationVector
22+
import org.apache.arrow.vector.FieldVector
2223
import org.apache.arrow.vector.Float4Vector
2324
import org.apache.arrow.vector.Float8Vector
2425
import org.apache.arrow.vector.IntVector
@@ -293,10 +294,16 @@ private fun List<Nothing?>.withTypeNullable(
293294
return this to nothingType(nullable)
294295
}
295296

296-
private fun readField(root: VectorSchemaRoot, field: Field, nullability: NullabilityOptions): AnyBaseCol {
297+
private fun readField(vector: FieldVector, field: Field, nullability: NullabilityOptions): AnyBaseCol {
297298
try {
298-
val range = 0 until root.rowCount
299-
val (list, type) = when (val vector = root.getVector(field)) {
299+
val range = 0 until vector.valueCount
300+
if (vector is StructVector) {
301+
val columns = field.children.map { childField ->
302+
readField(vector.getChild(childField.name), childField, nullability)
303+
}
304+
return DataColumn.createColumnGroup(field.name, columns.toDataFrame())
305+
}
306+
val (list, type) = when (vector) {
300307
is VarCharVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
301308

302309
is LargeVarCharVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
@@ -357,8 +364,6 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi
357364

358365
is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
359366

360-
is StructVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
361-
362367
is NullVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
363368

364369
else -> {
@@ -371,6 +376,9 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi
371376
}
372377
}
373378

379+
private fun readField(root: VectorSchemaRoot, field: Field, nullability: NullabilityOptions): AnyBaseCol =
380+
readField(root.getVector(field), field, nullability)
381+
374382
/**
375383
* Read [Arrow interprocess streaming format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-streaming-format) data from existing [channel]
376384
*/

dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import org.apache.arrow.vector.types.pojo.Field
2525
import org.apache.arrow.vector.types.pojo.FieldType
2626
import org.apache.arrow.vector.types.pojo.Schema
2727
import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
28-
import org.apache.arrow.vector.util.Text
2928
import org.duckdb.DuckDBConnection
3029
import org.duckdb.DuckDBResultSet
3130
import org.jetbrains.kotlinx.dataframe.AnyFrame
@@ -39,7 +38,6 @@ import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
3938
import org.jetbrains.kotlinx.dataframe.api.map
4039
import org.jetbrains.kotlinx.dataframe.api.pathOf
4140
import org.jetbrains.kotlinx.dataframe.api.remove
42-
import org.jetbrains.kotlinx.dataframe.api.toColumn
4341
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
4442
import org.junit.Assert
4543
import org.junit.Test
@@ -68,13 +66,11 @@ internal class ArrowKtTest {
6866
val df = DataFrame.readArrowFeather(feather)
6967
val a by columnOf("one")
7068
val b by columnOf(2.0)
71-
val c by listOf(
72-
mapOf(
73-
"c1" to Text("inner"),
74-
"c2" to 4.0,
75-
"c3" to 50.0,
76-
) as Map<String, Any?>,
77-
).toColumn()
69+
val c by columnOf(
70+
"c1" to columnOf("inner"),
71+
"c2" to columnOf(4.0),
72+
"c3" to columnOf(50.0),
73+
)
7874
val d by columnOf("four")
7975
val expected = dataFrameOf(a, b, c, d)
8076
df shouldBe expected

0 commit comments

Comments
 (0)