@@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.io
2
2
3
3
import org.apache.commons.csv.CSVFormat
4
4
import org.apache.commons.csv.CSVRecord
5
+ import org.apache.commons.io.input.BOMInputStream
5
6
import org.jetbrains.kotlinx.dataframe.AnyFrame
6
7
import org.jetbrains.kotlinx.dataframe.AnyRow
7
8
import org.jetbrains.kotlinx.dataframe.DataColumn
@@ -19,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
19
20
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
20
21
import org.jetbrains.kotlinx.dataframe.impl.api.parse
21
22
import org.jetbrains.kotlinx.dataframe.values
23
+ import java.io.BufferedInputStream
22
24
import java.io.BufferedReader
23
25
import java.io.File
24
26
import java.io.FileInputStream
@@ -272,21 +274,20 @@ public fun DataFrame.Companion.readDelim(
272
274
duplicate : Boolean = true,
273
275
charset : Charset = defaultCharset,
274
276
parserOptions : ParserOptions ? = null,
275
- ): AnyFrame =
276
- if (isCompressed) {
277
- InputStreamReader (GZIPInputStream (inStream), charset)
278
- } else {
279
- BufferedReader (InputStreamReader (inStream, charset))
280
- }.run {
281
- readDelim(
282
- this ,
283
- getFormat(csvType, delimiter, header, duplicate),
284
- colTypes,
285
- skipLines,
286
- readLines,
287
- parserOptions,
288
- )
289
- }
277
+ ): AnyFrame {
278
+ val bufferedInStream = BufferedInputStream (if (isCompressed) GZIPInputStream (inStream) else inStream)
279
+ val bomIn = BOMInputStream .builder().setInputStream(bufferedInStream).get()
280
+ val bufferedReader = BufferedReader (InputStreamReader (bomIn, charset))
281
+
282
+ return readDelim(
283
+ reader = bufferedReader,
284
+ format = getFormat(csvType, delimiter, header, duplicate),
285
+ colTypes = colTypes,
286
+ skipLines = skipLines,
287
+ readLines = readLines,
288
+ parserOptions = parserOptions,
289
+ )
290
+ }
290
291
291
292
public enum class ColType {
292
293
Int ,
0 commit comments