Skip to content

Commit ecf051c

Browse files
authored
Merge pull request #853 from Kopilov/convert-comparable-to-string
Avoid 'expected: range(0, 32768)' exception when saving mixed data to Arrow
2 parents 76e948f + 3ce5428 commit ecf051c

File tree

3 files changed

+48
-2
lines changed

3 files changed

+48
-2
lines changed

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowWriterImpl.kt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,16 @@ internal class ArrowWriterImpl(
145145
}
146146
}
147147

148+
private fun convertColumnToCompatible(column: AnyCol): Pair<AnyCol, Field> {
149+
val actualField = column.toArrowField(mismatchSubscriber)
150+
val result = try {
151+
convertColumnToTarget(column, actualField.type)!!
152+
} catch (e: Exception) {
153+
column
154+
}
155+
return result to actualField
156+
}
157+
148158
private fun infillVector(vector: FieldVector, column: AnyCol) {
149159
when (vector) {
150160
is VarCharVector ->
@@ -306,7 +316,7 @@ internal class ArrowWriterImpl(
306316
cause = e,
307317
),
308318
)
309-
column to column!!.toArrowField(mismatchSubscriber)
319+
convertColumnToCompatible(column!!)
310320
}
311321
} catch (e: TypeConverterNotFoundException) {
312322
if (strictType) {
@@ -317,7 +327,7 @@ internal class ArrowWriterImpl(
317327
} else {
318328
// If strictType is not enabled, use original data with its type. Target nullable is saved at this step.
319329
mismatchSubscriber(ConvertingMismatch.TypeConversionNotFound.ConversionNotFoundIgnored(field.name, e))
320-
column to column!!.toArrowField(mismatchSubscriber)
330+
convertColumnToCompatible(column!!)
321331
}
322332
}
323333

dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,30 @@ internal class ArrowKtTest {
498498
DataFrame.readArrowFeather(data) shouldBe dataFrame
499499
}
500500

501+
@Test
502+
fun testBigMixedColumn() {
503+
val dataFrame = dataFrameOf(bigMixedColumn)
504+
val warnings = ArrayList<ConvertingMismatch>()
505+
val writer = dataFrame.arrowWriter(
506+
targetSchema = Schema(
507+
listOf(
508+
Field("bigMixedColumn", FieldType.nullable(ArrowType.Int(64, true)), emptyList()),
509+
),
510+
),
511+
mode = ArrowWriter.Mode.LOYAL,
512+
) {
513+
warnings.add(it)
514+
}
515+
val stream = ByteArrayOutputStream()
516+
writer.writeArrowFeather(stream)
517+
val data = stream.toByteArray()
518+
519+
assert(warnings.filterIsInstance<ConvertingMismatch.TypeConversionFail.ConversionFailIgnored>().size == 1)
520+
assert(warnings.filterIsInstance<ConvertingMismatch.SavedAsString>().size == 1)
521+
522+
DataFrame.readArrowFeather(data)["bigMixedColumn"] shouldBe dataFrame[bigMixedColumn].map { it.toString() }
523+
}
524+
501525
@Test
502526
fun testTimeStamp() {
503527
val dates = listOf(

dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/examplesToWrite.kt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,3 +192,15 @@ val bigStringColumn = run {
192192
}
193193
DataColumn.createValueColumn("bigStringColumn", list)
194194
}
195+
196+
val bigMixedColumn = run {
197+
val list = ArrayList<Any>()
198+
for (i in 0..32768) {
199+
list.add(i * i)
200+
}
201+
list.add("Dirty data")
202+
for (i in 32768 downTo 0) {
203+
list.add(i * i)
204+
}
205+
DataColumn.createValueColumn("bigMixedColumn", list)
206+
}

0 commit comments

Comments
 (0)