Skip to content

Commit c0dc1b1

Browse files
Automated commit of generated code
1 parent 9870305 commit c0dc1b1

File tree

2 files changed

+23
-15
lines changed
  • core/generated-sources/src
    • main/kotlin/org/jetbrains/kotlinx/dataframe/io
    • test/kotlin/org/jetbrains/kotlinx/dataframe/io

2 files changed

+23
-15
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.io
22

33
import org.apache.commons.csv.CSVFormat
44
import org.apache.commons.csv.CSVRecord
5+
import org.apache.commons.io.input.BOMInputStream
56
import org.jetbrains.kotlinx.dataframe.AnyFrame
67
import org.jetbrains.kotlinx.dataframe.AnyRow
78
import org.jetbrains.kotlinx.dataframe.DataColumn
@@ -19,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
1920
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
2021
import org.jetbrains.kotlinx.dataframe.impl.api.parse
2122
import org.jetbrains.kotlinx.dataframe.values
23+
import java.io.BufferedInputStream
2224
import java.io.BufferedReader
2325
import java.io.File
2426
import java.io.FileInputStream
@@ -272,21 +274,20 @@ public fun DataFrame.Companion.readDelim(
272274
duplicate: Boolean = true,
273275
charset: Charset = defaultCharset,
274276
parserOptions: ParserOptions? = null,
275-
): AnyFrame =
276-
if (isCompressed) {
277-
InputStreamReader(GZIPInputStream(inStream), charset)
278-
} else {
279-
BufferedReader(InputStreamReader(inStream, charset))
280-
}.run {
281-
readDelim(
282-
this,
283-
getFormat(csvType, delimiter, header, duplicate),
284-
colTypes,
285-
skipLines,
286-
readLines,
287-
parserOptions,
288-
)
289-
}
277+
): AnyFrame {
278+
val bufferedInStream = BufferedInputStream(if (isCompressed) GZIPInputStream(inStream) else inStream)
279+
val bomIn = BOMInputStream.builder().setInputStream(bufferedInStream).get()
280+
val bufferedReader = BufferedReader(InputStreamReader(bomIn, charset))
281+
282+
return readDelim(
283+
reader = bufferedReader,
284+
format = getFormat(csvType, delimiter, header, duplicate),
285+
colTypes = colTypes,
286+
skipLines = skipLines,
287+
readLines = readLines,
288+
parserOptions = parserOptions,
289+
)
290+
}
290291

291292
public enum class ColType {
292293
Int,

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,17 @@ class CsvTests {
276276
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
277277
}
278278

279+
@Test
280+
fun `file with BOM`() {
281+
val df = DataFrame.readCSV(withBomCsv, delimiter = ';')
282+
df.columnNames() shouldBe listOf("Column1", "Column2")
283+
}
284+
279285
companion object {
280286
private val simpleCsv = testCsv("testCSV")
281287
private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale")
282288
private val wineCsv = testCsv("wine")
283289
private val durationCsv = testCsv("duration")
290+
private val withBomCsv = testCsv("with-bom")
284291
}
285292
}

0 commit comments

Comments
 (0)