Skip to content

Commit 64d1f99

Browse files
committed
Changed parsing to use createColumnGuessingType() to fix issue #593 for CSV and other parsing usages
1 parent 32bee07 commit 64d1f99

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
3232
import org.jetbrains.kotlinx.dataframe.hasNulls
3333
import org.jetbrains.kotlinx.dataframe.impl.canParse
3434
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
35+
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
3536
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
3637
import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse
3738
import org.jetbrains.kotlinx.dataframe.io.isURL
@@ -525,7 +526,10 @@ internal fun DataColumn<String?>.tryParseImpl(options: ParserOptions?): DataColu
525526
if (type.jvmErasure == String::class && !nullStringParsed) {
526527
return this // nothing parsed
527528
}
528-
return DataColumn.createUnsafe(name(), parsedValues, type)
529+
530+
// Create a new column with the parsed values,
531+
// createColumnGuessingType is used to handle unifying values if needed
532+
return createColumnGuessingType(name(), parsedValues, type)
529533
}
530534

531535
internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: ParserOptions?): DataColumn<T?> {
@@ -535,7 +539,7 @@ internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: Par
535539
handler(it.trim()) ?: throw IllegalStateException("Couldn't parse '$it' into type ${parser.type}")
536540
}
537541
}
538-
return DataColumn.createValueColumn(name(), parsedValues, parser.type.withNullability(hasNulls)) as DataColumn<T?>
542+
return createColumnGuessingType(name(), parsedValues, parser.type.withNullability(hasNulls))
539543
}
540544

541545
internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: ColumnsSelector<T, Any?>): DataFrame<T> {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,9 @@ internal fun <T> createColumnGuessingType(
249249
val df = dataFrameOf(values as Iterable<AnyCol>)
250250
DataColumn.createColumnGroup(name, df)
251251
} else {
252-
val df = values.map { (it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1) }.concat()
252+
val df = values.map {
253+
(it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1)
254+
}.concat()
253255
DataColumn.createColumnGroup(name, df)
254256
}.asDataColumn().cast()
255257
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import kotlinx.datetime.LocalTime
77
import kotlinx.datetime.TimeZone
88
import kotlinx.datetime.toKotlinLocalDate
99
import kotlinx.datetime.toKotlinLocalDateTime
10+
import org.jetbrains.kotlinx.dataframe.AnyFrame
1011
import org.jetbrains.kotlinx.dataframe.DataColumn
1112
import org.jetbrains.kotlinx.dataframe.DataFrame
1213
import org.jetbrains.kotlinx.dataframe.api.cast
@@ -16,11 +17,15 @@ import org.jetbrains.kotlinx.dataframe.api.convertToDouble
1617
import org.jetbrains.kotlinx.dataframe.api.convertToLocalDate
1718
import org.jetbrains.kotlinx.dataframe.api.convertToLocalDateTime
1819
import org.jetbrains.kotlinx.dataframe.api.convertToLocalTime
20+
import org.jetbrains.kotlinx.dataframe.api.first
21+
import org.jetbrains.kotlinx.dataframe.api.isEmpty
22+
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
1923
import org.jetbrains.kotlinx.dataframe.api.parse
2024
import org.jetbrains.kotlinx.dataframe.api.parser
2125
import org.jetbrains.kotlinx.dataframe.api.plus
2226
import org.jetbrains.kotlinx.dataframe.api.times
2327
import org.jetbrains.kotlinx.dataframe.api.tryParse
28+
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
2429
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
2530
import org.junit.Test
2631
import java.math.BigDecimal
@@ -215,4 +220,17 @@ class ParserTests {
215220
Locale.setDefault(currentLocale)
216221
}
217222
}
223+
224+
@Test
225+
fun `Issue #593, mixing null and json`() {
226+
val col by columnOf("[\"str\"]", "[]", "null")
227+
val parsed = col.parse()
228+
parsed.type() shouldBe typeOf<AnyFrame>()
229+
parsed.kind() shouldBe ColumnKind.Frame
230+
require(parsed.isFrameColumn())
231+
232+
parsed[0]["value"].first() shouldBe "str"
233+
parsed[1].isEmpty() shouldBe true
234+
parsed[2].isEmpty() shouldBe true
235+
}
218236
}

0 commit comments

Comments
 (0)