Skip to content

Commit d50002b

Browse files
committed
Linter fixes and slight refactorings in some places for all non-:core modules (except jdbc)
1 parent e2a131b commit d50002b

File tree

25 files changed

+884
-485
lines changed

25 files changed

+884
-485
lines changed

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowWriterImpl.kt

Lines changed: 139 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException
5454
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
5555
import org.jetbrains.kotlinx.dataframe.name
5656
import org.jetbrains.kotlinx.dataframe.values
57-
import java.nio.charset.Charset
5857
import kotlin.reflect.full.isSubtypeOf
5958
import kotlin.reflect.typeOf
6059

@@ -85,15 +84,21 @@ internal class ArrowWriterImpl(
8584
private fun countTotalBytes(column: AnyCol): Long? {
8685
val columnType = column.type()
8786
return when {
88-
columnType.isSubtypeOf(typeOf<String?>()) -> column.values.fold(0L) {totalBytes, value -> totalBytes + value.toString().length * 4}
87+
columnType.isSubtypeOf(typeOf<String?>()) -> column.values.fold(0L) { totalBytes, value -> totalBytes + value.toString().length * 4 }
8988
else -> null
9089
}
9190
}
9291

9392
private fun infillWithNulls(vector: FieldVector, size: Int) {
9493
when (vector) {
95-
is BaseFixedWidthVector -> for (i in 0 until size) { vector.setNull(i) }
96-
is BaseVariableWidthVector -> for (i in 0 until size) { vector.setNull(i) }
94+
is BaseFixedWidthVector -> for (i in 0 until size) {
95+
vector.setNull(i)
96+
}
97+
98+
is BaseVariableWidthVector -> for (i in 0 until size) {
99+
vector.setNull(i)
100+
}
101+
97102
else -> throw IllegalArgumentException("Can not infill ${vector.javaClass.canonicalName}")
98103
}
99104
vector.valueCount = size
@@ -110,7 +115,8 @@ internal class ArrowWriterImpl(
110115
ArrowType.Int(32, true) -> column.convertToInt()
111116
ArrowType.Int(64, true) -> column.convertToLong()
112117
is ArrowType.Decimal -> column.convertToBigDecimal()
113-
ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) -> column.convertToDouble().convertToFloat() // Use [convertToDouble] as locale logic step
118+
ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) -> column.convertToDouble()
119+
.convertToFloat() // Use [convertToDouble] as locale logic step
114120
ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) -> column.convertToDouble()
115121
ArrowType.Date(DateUnit.DAY) -> column.convertToLocalDate()
116122
ArrowType.Date(DateUnit.MILLISECOND) -> column.convertToLocalDateTime()
@@ -123,25 +129,108 @@ internal class ArrowWriterImpl(
123129

124130
private fun infillVector(vector: FieldVector, column: AnyCol) {
125131
when (vector) {
126-
is VarCharVector -> column.convertToString().forEachIndexed { i, value -> value?.let { vector.set(i, Text(value)); value } ?: vector.setNull(i) }
127-
is LargeVarCharVector -> column.convertToString().forEachIndexed { i, value -> value?.let { vector.set(i, Text(value)); value } ?: vector.setNull(i) }
128-
is BitVector -> column.convertToBoolean().forEachIndexed { i, value -> value?.let { vector.set(i, value.compareTo(false)); value } ?: vector.setNull(i) }
129-
is TinyIntVector -> column.convertToInt().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
130-
is SmallIntVector -> column.convertToInt().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
131-
is IntVector -> column.convertToInt().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
132-
is BigIntVector -> column.convertToLong().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
133-
is DecimalVector -> column.convertToBigDecimal().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
134-
is Decimal256Vector -> column.convertToBigDecimal().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
135-
is Float8Vector -> column.convertToDouble().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
136-
is Float4Vector -> column.convertToFloat().forEachIndexed { i, value -> value?.let { vector.set(i, value); value } ?: vector.setNull(i) }
137-
138-
is DateDayVector -> column.convertToLocalDate().forEachIndexed { i, value -> value?.let { vector.set(i, (value.toJavaLocalDate().toEpochDay()).toInt()); value } ?: vector.setNull(i) }
139-
is DateMilliVector -> column.convertToLocalDateTime().forEachIndexed { i, value -> value?.let { vector.set(i, value.toInstant(
140-
TimeZone.UTC).toEpochMilliseconds()); value } ?: vector.setNull(i) }
141-
is TimeNanoVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, value.toNanoOfDay()); value } ?: vector.setNull(i) }
142-
is TimeMicroVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, value.toNanoOfDay() / 1000); value } ?: vector.setNull(i) }
143-
is TimeMilliVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, (value.toNanoOfDay() / 1000 / 1000).toInt()); value } ?: vector.setNull(i) }
144-
is TimeSecVector -> column.convertToLocalTime().forEachIndexed { i, value -> value?.let { vector.set(i, (value.toNanoOfDay() / 1000 / 1000 / 1000).toInt()); value } ?: vector.setNull(i) }
132+
is VarCharVector -> column.convertToString()
133+
.forEachIndexed { i, value ->
134+
value?.also { vector.set(i, Text(value)) }
135+
?: vector.setNull(i)
136+
}
137+
138+
is LargeVarCharVector -> column.convertToString()
139+
.forEachIndexed { i, value ->
140+
value?.also { vector.set(i, Text(value)) }
141+
?: vector.setNull(i)
142+
}
143+
144+
is BitVector -> column.convertToBoolean()
145+
.forEachIndexed { i, value ->
146+
value?.also { vector.set(i, value.compareTo(false)) }
147+
?: vector.setNull(i)
148+
}
149+
150+
is TinyIntVector -> column.convertToInt()
151+
.forEachIndexed { i, value ->
152+
value?.also { vector.set(i, value) }
153+
?: vector.setNull(i)
154+
}
155+
156+
is SmallIntVector -> column.convertToInt()
157+
.forEachIndexed { i, value ->
158+
value?.also { vector.set(i, value) }
159+
?: vector.setNull(i)
160+
}
161+
162+
is IntVector -> column.convertToInt()
163+
.forEachIndexed { i, value ->
164+
value?.also { vector.set(i, value) }
165+
?: vector.setNull(i)
166+
}
167+
168+
is BigIntVector -> column.convertToLong()
169+
.forEachIndexed { i, value ->
170+
value?.also { vector.set(i, value) }
171+
?: vector.setNull(i)
172+
}
173+
174+
is DecimalVector -> column.convertToBigDecimal()
175+
.forEachIndexed { i, value ->
176+
value?.also { vector.set(i, value) }
177+
?: vector.setNull(i)
178+
}
179+
180+
is Decimal256Vector -> column.convertToBigDecimal()
181+
.forEachIndexed { i, value ->
182+
value?.also { vector.set(i, value) }
183+
?: vector.setNull(i)
184+
}
185+
186+
is Float8Vector -> column.convertToDouble()
187+
.forEachIndexed { i, value ->
188+
value?.also { vector.set(i, value) }
189+
?: vector.setNull(i)
190+
}
191+
192+
is Float4Vector -> column.convertToFloat()
193+
.forEachIndexed { i, value ->
194+
value?.also { vector.set(i, value) }
195+
?: vector.setNull(i)
196+
}
197+
198+
is DateDayVector -> column.convertToLocalDate()
199+
.forEachIndexed { i, value ->
200+
value?.also { vector.set(i, value.toJavaLocalDate().toEpochDay().toInt()) }
201+
?: vector.setNull(i)
202+
}
203+
204+
is DateMilliVector -> column.convertToLocalDateTime()
205+
.forEachIndexed { i, value ->
206+
value?.also { vector.set(i, value.toInstant(TimeZone.UTC).toEpochMilliseconds()) }
207+
?: vector.setNull(i)
208+
}
209+
210+
is TimeNanoVector -> column.convertToLocalTime()
211+
.forEachIndexed { i, value ->
212+
value?.also { vector.set(i, value.toNanoOfDay()) }
213+
?: vector.setNull(i)
214+
}
215+
216+
is TimeMicroVector -> column.convertToLocalTime()
217+
.forEachIndexed { i, value ->
218+
value?.also { vector.set(i, value.toNanoOfDay() / 1000) }
219+
?: vector.setNull(i)
220+
}
221+
222+
is TimeMilliVector -> column.convertToLocalTime()
223+
.forEachIndexed { i, value ->
224+
value?.also { vector.set(i, (value.toNanoOfDay() / 1000 / 1000).toInt()) }
225+
?: vector.setNull(i)
226+
}
227+
228+
is TimeSecVector -> column.convertToLocalTime()
229+
.forEachIndexed { i, value ->
230+
value?.also { vector.set(i, (value.toNanoOfDay() / 1000 / 1000 / 1000).toInt()) }
231+
?: vector.setNull(i)
232+
}
233+
145234
else -> {
146235
// TODO implement other vector types from [readField] (VarBinaryVector, UIntVector, DurationVector, StructVector) and may be others (ListVector, FixedSizeListVector etc)
147236
throw NotImplementedError("Saving to ${vector.javaClass.canonicalName} is currently not implemented")
@@ -154,7 +243,12 @@ internal class ArrowWriterImpl(
154243
/**
155244
* Create Arrow FieldVector with [column] content cast to [field] type according to [strictType] and [strictNullable] settings.
156245
*/
157-
private fun allocateVectorAndInfill(field: Field, column: AnyCol?, strictType: Boolean, strictNullable: Boolean): FieldVector {
246+
private fun allocateVectorAndInfill(
247+
field: Field,
248+
column: AnyCol?,
249+
strictType: Boolean,
250+
strictNullable: Boolean,
251+
): FieldVector {
158252
val containNulls = (column == null || column.hasNulls())
159253
// Convert the column to type specified in field. (If we already have target type, convertTo will do nothing)
160254

@@ -163,12 +257,19 @@ internal class ArrowWriterImpl(
163257
} catch (e: CellConversionException) {
164258
if (strictType) {
165259
// If conversion failed but strictType is enabled, throw the exception
166-
val mismatch = ConvertingMismatch.TypeConversionFail.ConversionFailError(e.column?.name() ?: "", e.row, e)
260+
val mismatch =
261+
ConvertingMismatch.TypeConversionFail.ConversionFailError(e.column?.name() ?: "", e.row, e)
167262
mismatchSubscriber(mismatch)
168263
throw ConvertingException(mismatch)
169264
} else {
170265
// If strictType is not enabled, use original data with its type. Target nullable is saved at this step.
171-
mismatchSubscriber(ConvertingMismatch.TypeConversionFail.ConversionFailIgnored(e.column?.name() ?: "", e.row, e))
266+
mismatchSubscriber(
267+
ConvertingMismatch.TypeConversionFail.ConversionFailIgnored(
268+
e.column?.name() ?: "",
269+
e.row,
270+
e
271+
)
272+
)
172273
column to column!!.toArrowField(mismatchSubscriber)
173274
}
174275
} catch (e: TypeConverterNotFoundException) {
@@ -197,8 +298,17 @@ internal class ArrowWriterImpl(
197298
mismatchSubscriber(mismatch)
198299
throw ConvertingException(mismatch)
199300
} else {
200-
mismatchSubscriber(ConvertingMismatch.NullableMismatch.NullValueIgnored(actualField.name, firstNullValue))
201-
Field(actualField.name, FieldType(true, actualField.fieldType.type, actualField.fieldType.dictionary), actualField.children).createVector(allocator)!!
301+
mismatchSubscriber(
302+
ConvertingMismatch.NullableMismatch.NullValueIgnored(
303+
actualField.name,
304+
firstNullValue
305+
)
306+
)
307+
Field(
308+
actualField.name,
309+
FieldType(true, actualField.fieldType.type, actualField.fieldType.dictionary),
310+
actualField.children
311+
).createVector(allocator)!!
202312
}
203313
} else {
204314
actualField.createVector(allocator)!!

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/ConvertingMismatch.kt

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,74 +8,107 @@ import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
88
* Can be sent to callback, written to log or encapsulated to exception
99
*/
1010
public sealed class ConvertingMismatch(
11-
/**Name of the column with mismatch*/
11+
/** Name of the column with mismatch */
1212
public open val column: String,
13-
/**Number of first row with mismatch (0-based) if defined*/
13+
/** Number of first row with mismatch (0-based) if defined */
1414
public open val row: Int?,
15-
/**Original exception if exist*/
16-
public open val cause: Exception?
15+
/** Original exception if exist */
16+
public open val cause: Exception?,
1717
) {
1818

1919
public sealed class WideningMismatch(column: String) : ConvertingMismatch(column, null, null) {
2020
public data class AddedColumn(override val column: String) : WideningMismatch(column) {
2121
override fun toString(): String = "Added column \"$column\" not described in target schema"
2222
}
23+
2324
public data class RejectedColumn(override val column: String) : WideningMismatch(column) {
2425
override fun toString(): String = "Column \"$column\" is not described in target schema and was ignored"
2526
}
2627
}
2728

2829
public sealed class NarrowingMismatch(column: String) : ConvertingMismatch(column, null, null) {
2930
public data class NotPresentedColumnIgnored(override val column: String) : NarrowingMismatch(column) {
30-
override fun toString(): String = "Not nullable column \"$column\" is not presented in actual data, saving as is"
31+
override fun toString(): String =
32+
"Not nullable column \"$column\" is not presented in actual data, saving as is"
3133
}
34+
3235
public data class NotPresentedColumnError(override val column: String) : NarrowingMismatch(column) {
33-
override fun toString(): String = "Not nullable column \"$column\" is not presented in actual data, can not save"
36+
override fun toString(): String =
37+
"Not nullable column \"$column\" is not presented in actual data, can not save"
3438
}
3539
}
3640

3741
public sealed class TypeConversionNotFound(
3842
column: String,
39-
cause: TypeConverterNotFoundException
43+
cause: TypeConverterNotFoundException,
4044
) : ConvertingMismatch(column, null, cause) {
41-
public data class ConversionNotFoundIgnored(override val column: String, override val cause: TypeConverterNotFoundException) : TypeConversionNotFound(column, cause) {
45+
46+
public data class ConversionNotFoundIgnored(
47+
override val column: String,
48+
override val cause: TypeConverterNotFoundException,
49+
) : TypeConversionNotFound(column, cause) {
4250
override fun toString(): String = "${cause.message} for column \"$column\", saving as is"
4351
}
44-
public data class ConversionNotFoundError(override val column: String, val e: TypeConverterNotFoundException) : TypeConversionNotFound(column, e) {
52+
53+
public data class ConversionNotFoundError(
54+
override val column: String,
55+
val e: TypeConverterNotFoundException,
56+
) : TypeConversionNotFound(column, e) {
4557
override fun toString(): String = "${e.message} for column \"$column\", can not save"
4658
}
4759
}
4860

4961
public sealed class TypeConversionFail(
50-
column: String, row: Int?,
51-
public override val cause: CellConversionException
62+
column: String,
63+
row: Int?,
64+
public override val cause: CellConversionException,
5265
) : ConvertingMismatch(column, row, cause) {
53-
public data class ConversionFailIgnored(override val column: String, override val row: Int?, override val cause: CellConversionException) : TypeConversionFail(column, row, cause) {
66+
67+
public data class ConversionFailIgnored(
68+
override val column: String,
69+
override val row: Int?,
70+
override val cause: CellConversionException,
71+
) : TypeConversionFail(column, row, cause) {
5472
override fun toString(): String = "${cause.message}, saving as is"
5573
}
56-
public data class ConversionFailError(override val column: String, override val row: Int?, override val cause: CellConversionException) : TypeConversionFail(column, row, cause) {
74+
75+
public data class ConversionFailError(
76+
override val column: String,
77+
override val row: Int?,
78+
override val cause: CellConversionException,
79+
) : TypeConversionFail(column, row, cause) {
5780
override fun toString(): String = "${cause.message}, can not save"
5881
}
5982
}
6083

6184
public data class SavedAsString(
6285
override val column: String,
63-
val type: Class<*>
86+
val type: Class<*>,
6487
) : ConvertingMismatch(column, null, null) {
6588
override fun toString(): String = "Column \"$column\" has type ${type.canonicalName}, will be saved as String\""
6689
}
6790

6891
public sealed class NullableMismatch(
6992
column: String,
70-
row: Int?
93+
row: Int?,
7194
) : ConvertingMismatch(column, row, null) {
72-
public data class NullValueIgnored(override val column: String, override val row: Int?) : NullableMismatch(column, row) {
73-
override fun toString(): String = "Column \"$column\" contains nulls in row $row but expected not nullable, saving as is"
95+
public data class NullValueIgnored(
96+
override val column: String,
97+
override val row: Int?,
98+
) : NullableMismatch(column, row) {
99+
override fun toString(): String =
100+
"Column \"$column\" contains nulls in row $row but expected not nullable, saving as is"
74101
}
75-
public data class NullValueError(override val column: String, override val row: Int?) : NullableMismatch(column, row) {
76-
override fun toString(): String = "Column \"$column\" contains nulls in row $row but expected not nullable, can not save"
102+
103+
public data class NullValueError(
104+
override val column: String,
105+
override val row: Int?,
106+
) : NullableMismatch(column, row) {
107+
override fun toString(): String =
108+
"Column \"$column\" contains nulls in row $row but expected not nullable, can not save"
77109
}
78110
}
79111
}
80112

81-
public class ConvertingException(public val mismatchCase: ConvertingMismatch) : IllegalArgumentException(mismatchCase.toString(), mismatchCase.cause)
113+
public class ConvertingException(public val mismatchCase: ConvertingMismatch) :
114+
IllegalArgumentException(mismatchCase.toString(), mismatchCase.cause)

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public fun DataFrame.Companion.readArrowIPC(
5252
allocator: RootAllocator = Allocator.ROOT,
5353
nullability: NullabilityOptions = NullabilityOptions.Infer,
5454
): AnyFrame = readArrowIPCImpl(channel, allocator, nullability)
55+
5556
/**
5657
* Read [Arrow random access format](https://arrow.apache.org/docs/java/ipc.html#writing-and-reading-random-access-files) data from existing [channel]
5758
*/

0 commit comments

Comments
 (0)