Skip to content

Commit 0af2d25

Browse files
committed
Pad records with nulls if it has fewer columns than header
It seems sometimes CSV doesn't have trailing commas in the end of the record #101
1 parent 308e456 commit 0af2d25

File tree

2 files changed

+24
-1
lines changed
  • src
    • main/kotlin/org/jetbrains/kotlinx/dataframe/io
    • test/kotlin/org/jetbrains/kotlinx/dataframe/io

2 files changed

+24
-1
lines changed

src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,12 @@ public fun DataFrame.Companion.readDelim(
274274
val colType = colTypes[colName] ?: defaultColType
275275
var hasNulls = false
276276
val values = records.map {
277-
it[colIndex].ifEmpty {
277+
if (it.isSet(colIndex)) {
278+
it[colIndex].ifEmpty {
279+
hasNulls = true
280+
null
281+
}
282+
} else {
278283
hasNulls = true
279284
null
280285
}

src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.jetbrains.kotlinx.dataframe.io
22

3+
import io.kotest.assertions.throwables.shouldNotThrowAny
34
import io.kotest.matchers.nulls.shouldNotBeNull
45
import io.kotest.matchers.shouldBe
56
import kotlinx.datetime.LocalDateTime
@@ -137,6 +138,23 @@ class CsvTests {
137138
df["floatDuration"].type() shouldBe typeOf<String>()
138139
}
139140

141+
@Test
142+
fun `if record has fewer columns than header then pad it with nulls`() {
143+
val csvContent = """col1,col2,col3
144+
568,801,587
145+
780,588
146+
""".trimIndent()
147+
148+
val df = shouldNotThrowAny {
149+
DataFrame.readDelimStr(csvContent)
150+
}
151+
152+
df shouldBe dataFrameOf("col1", "col2", "col3")(
153+
568, 801, 587,
154+
780, 588, null
155+
)
156+
}
157+
140158
@Test
141159
fun `write and read frame column`() {
142160
val df = dataFrameOf("a", "b", "c")(

0 commit comments

Comments
 (0)