Skip to content

Commit 4200370

Browse files
committed
Update comments and korro, clarify TODO
1 parent 6eea98d commit 4200370

File tree

3 files changed

+5
-16
lines changed

3 files changed

+5
-16
lines changed

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowWriterImpl.kt

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,21 +89,17 @@ internal class ArrowWriterImpl(
8989
return when (targetFieldType) {
9090
ArrowType.Utf8() -> column.map { it?.toString() }
9191
ArrowType.LargeUtf8() -> column.map { it?.toString() }
92-
ArrowType.Binary(), ArrowType.LargeBinary() -> throw NotImplementedError("Saving var binary is currently not implemented")
9392
ArrowType.Bool() -> column.convertToBoolean()
9493
ArrowType.Int(8, true) -> column.convertToByte()
9594
ArrowType.Int(16, true) -> column.convertToShort()
9695
ArrowType.Int(32, true) -> column.convertToInt()
9796
ArrowType.Int(64, true) -> column.convertToLong()
98-
// ArrowType.Int(8, false), ArrowType.Int(16, false), ArrowType.Int(32, false), ArrowType.Int(64, false) -> todo
9997
is ArrowType.Decimal -> column.convertToBigDecimal()
10098
ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) -> column.convertToDouble().convertToFloat() // Use [convertToDouble] as locale logic step
10199
ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) -> column.convertToDouble()
102100
ArrowType.Date(DateUnit.DAY) -> column.convertToLocalDate()
103101
ArrowType.Date(DateUnit.MILLISECOND) -> column.convertToLocalDateTime()
104102
is ArrowType.Time -> column.convertToLocalTime()
105-
// is ArrowType.Duration -> todo
106-
// is ArrowType.Struct -> todo
107103
else -> {
108104
throw NotImplementedError("Saving ${targetFieldType.javaClass.canonicalName} is currently not implemented")
109105
}
@@ -114,17 +110,11 @@ internal class ArrowWriterImpl(
114110
when (vector) {
115111
is VarCharVector -> column.convertToString().forEachIndexed { i, value -> value?.let { vector.set(i, Text(value)); value } ?: vector.setNull(i) }
116112
is LargeVarCharVector -> column.convertToString().forEachIndexed { i, value -> value?.let { vector.set(i, Text(value)); value } ?: vector.setNull(i) }
117-
// is VarBinaryVector -> todo
118-
// is LargeVarBinaryVector -> todo
119113
is BitVector -> column.convertToBoolean().forEachIndexed { i, value -> value?.let { vector.set(i, value.compareTo(false)); value } ?: vector.setNull(i) }
120114
is TinyIntVector -> column.convertToInt().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
121115
is SmallIntVector -> column.convertToInt().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
122116
is IntVector -> column.convertToInt().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
123117
is BigIntVector -> column.convertToLong().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
124-
// is UInt1Vector -> todo
125-
// is UInt2Vector -> todo
126-
// is UInt4Vector -> todo
127-
// is UInt8Vector -> todo
128118
is DecimalVector -> column.convertToBigDecimal().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
129119
is Decimal256Vector -> column.convertToBigDecimal().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
130120
is Float8Vector -> column.convertToDouble().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
@@ -133,13 +123,12 @@ internal class ArrowWriterImpl(
133123
is DateDayVector -> column.convertToLocalDate().forEachIndexed { i, value -> value?.let { vector.set(i, (value.toJavaLocalDate().toEpochDay()).toInt()); value } ?: vector.setNull(i) }
134124
is DateMilliVector -> column.convertToLocalDateTime().forEachIndexed { i, value -> value?.let { vector.set(i, value.toInstant(
135125
TimeZone.UTC).toEpochMilliseconds()); value } ?: vector.setNull(i) }
136-
// is DurationVector -> todo
137126
is TimeNanoVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, value.toNanoOfDay()); value } ?: vector.setNull(i) }
138127
is TimeMicroVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, value.toNanoOfDay() / 1000); value } ?: vector.setNull(i) }
139128
is TimeMilliVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, (value.toNanoOfDay() / 1000 / 1000).toInt()); value } ?: vector.setNull(i) }
140129
is TimeSecVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, (value.toNanoOfDay() / 1000 / 1000 / 1000).toInt()); value } ?: vector.setNull(i) }
141-
// is StructVector -> todo
142130
else -> {
131+
// TODO implement other vector types from [readField] (VarBinaryVector, UIntVector, DurationVector, StructVector) and may be others (ListVector, FixedSizeListVector etc)
143132
throw NotImplementedError("Saving to ${vector.javaClass.canonicalName} is currently not implemented")
144133
}
145134
}

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ private inline fun <reified T> List<T?>.withTypeNullable(
174174
return this to typeOf<T>().withNullability(nullable)
175175
}
176176

177-
internal fun readField(root: VectorSchemaRoot, field: Field, nullability: NullabilityOptions): AnyBaseCol {
177+
private fun readField(root: VectorSchemaRoot, field: Field, nullability: NullabilityOptions): AnyBaseCol {
178178
try {
179179
val range = 0 until root.rowCount
180180
val (list, type) = when (val vector = root.getVector(field)) {
@@ -216,7 +216,7 @@ internal fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullab
216216
/**
217217
* Read [Arrow interprocess streaming format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-streaming-format) data from existing [channel]
218218
*/
219-
public fun DataFrame.Companion.readArrowIPCImpl(
219+
internal fun DataFrame.Companion.readArrowIPCImpl(
220220
channel: ReadableByteChannel,
221221
allocator: RootAllocator = Allocator.ROOT,
222222
nullability: NullabilityOptions = NullabilityOptions.Infer,
@@ -237,7 +237,7 @@ public fun DataFrame.Companion.readArrowIPCImpl(
237237
/**
238238
* Read [Arrow random access format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-random-access-files) data from existing [channel]
239239
*/
240-
public fun DataFrame.Companion.readArrowFeatherImpl(
240+
internal fun DataFrame.Companion.readArrowFeatherImpl(
241241
channel: SeekableByteChannel,
242242
allocator: RootAllocator = Allocator.ROOT,
243243
nullability: NullabilityOptions = NullabilityOptions.Infer,

tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ class Write : TestBase() {
204204
strictType = true,
205205
strictNullable = false
206206
),
207-
// Specify warning subscriber
207+
// Specify mismatch subscriber
208208
writeMismatchMessage
209209
).use { writer ->
210210
// Save to any format and sink, like in previous example

0 commit comments

Comments
 (0)