Kotlin
diff --git a/‎docs/StardustDocs/topics/read.md
Lines changed: 2 additions & 2 deletions b/‎docs/StardustDocs/topics/read.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt
Lines changed: 7 additions & 0 deletions b/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrow.kt
Lines changed: 7 additions & 13 deletions b/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrow.kt
Lines changed: 7 additions & 13 deletions
diff --git a/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt
Lines changed: 10 additions & 2 deletions b/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt
Lines changed: 10 additions & 2 deletions
diff --git a/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt
Lines changed: 74 additions & 48 deletions b/‎src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt
Lines changed: 74 additions & 48 deletions
@@ -25,15 +25,15 @@ DataFrame.readCSV(URL("https://raw.githubusercontent.com/Kotlin/dataframe/master
 
 All `readCSV` overloads support different options.
 For example, you can specify custom delimiter if it differs from `,`, charset
-and headers names if your CSV is missing them
+and column names if your CSV is missing them
 
 <!---FUN readCsvCustom-->
 
 ```kotlin
 val df = DataFrame.readCSV(
     file,
     delimiter = '|',
-    headers = listOf("A", "B", "C", "D"),
+    header = listOf("A", "B", "C", "D"),
     parserOptions = ParserOptions(nullStrings = setOf("not assigned"))
 )
 ```
 
@@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.api
 
 import org.jetbrains.kotlinx.dataframe.AnyFrame
 import org.jetbrains.kotlinx.dataframe.ColumnsSelector
+import org.jetbrains.kotlinx.dataframe.DataColumn
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.DataRow
 import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
@@ -291,3 +292,9 @@ public inline fun <T, C : Iterable<R>, reified R> Split<T, C>.inplace(): DataFra
 public fun <T, C, R> SplitWithTransform<T, C, R>.inplace(): DataFrame<T> = df.convert(columns).splitInplace(tartypeOf, transform)
 
 // endregion
+
+// region DataColumn
+
+public fun DataColumn<Iterable<*>>.splitInto(vararg names: String): AnyFrame = toDataFrame().split { this@splitInto }.into(*names)
+
+// endregion
@@ -35,6 +35,7 @@ import org.jetbrains.kotlinx.dataframe.api.Infer
 import org.jetbrains.kotlinx.dataframe.api.concat
 import org.jetbrains.kotlinx.dataframe.api.toDataFrame
 import java.io.File
+import java.io.InputStream
 import java.math.BigDecimal
 import java.math.BigInteger
 import java.net.URL
@@ -174,23 +175,16 @@ public fun DataFrame.Companion.readArrow(file: File): AnyFrame {
     return Files.newByteChannel(file.toPath()).use { readArrow(it) }
 }
 
-public fun DataFrame.Companion.readArrow(url: URL): AnyFrame {
+public fun DataFrame.Companion.readArrow(stream: InputStream): AnyFrame = Channels.newChannel(stream).use { readArrow(it) }
+
+public fun DataFrame.Companion.readArrow(url: URL): AnyFrame =
     when {
-        setOf("http", "https", "ftp").any { url.protocol == it } -> {
-            url.openStream().use { stream ->
-                Channels.newChannel(stream).use { channel ->
-                    return readArrow(channel)
-                }
-            }
-        }
-        setOf("file").any { url.protocol == it } -> {
-            return readArrow(File(url.path))
-        }
+        url.isFile() -> readArrow(url.asFile())
+        url.isProtocolSupported() -> url.openStream().use { readArrow(it) }
         else -> {
-            throw IllegalArgumentException("invalid protocol for url $url")
+            throw IllegalArgumentException("Invalid protocol for url $url")
         }
     }
-}
 
 public fun DataFrame.Companion.readArrow(path: String): AnyFrame = if (path.isURL()) {
     readArrow(URL(path))
 
@@ -6,14 +6,14 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
 import org.jetbrains.kotlinx.dataframe.api.toDataFrame
 import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
+import java.io.File
 import java.io.IOException
 import java.io.InputStream
 import java.net.URL
 
 internal fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFrame {
     try {
-        val stream = url.openStream()
-        return body(stream)
+        return url.openStream().use(body)
     } catch (e: IOException) {
         if (e.message?.startsWith("Server returned HTTP response code") == true) {
             val (_, response, _) = url.toString().httpGet().responseString()
@@ -51,3 +51,11 @@ public fun <T> List<List<T>>.toDataFrame(containsColumns: Boolean = false): AnyF
 }
 
 internal fun String.isURL(): Boolean = listOf("http:", "https:", "ftp:").any { startsWith(it) }
+
+internal fun URL.isFile(): Boolean = protocol == "file"
+
+internal fun URL.asFileOrNull(): File? = if (isFile()) File(path) else null
+
+internal fun URL.asFile(): File = asFileOrNull()!!
+
+internal fun URL.isProtocolSupported(): Boolean = protocol in setOf("http", "https", "ftp")
@@ -53,12 +53,12 @@ public fun DataFrame.Companion.readDelimStr(
     colTypes: Map<String, ColType> = mapOf(),
     skipLines: Int = 0,
     readLines: Int? = null
-): DataFrame<*> = readDelim(StringReader(text), CSVType.DEFAULT.format.withHeader(), colTypes, skipLines, readLines)
+): DataFrame<*> = StringReader(text).use { readDelim(it, CSVType.DEFAULT.format.withHeader(), colTypes, skipLines, readLines) }
 
 public fun DataFrame.Companion.read(
     fileOrUrl: String,
     delimiter: Char,
-    headers: List<String> = listOf(),
+    header: List<String> = listOf(),
     colTypes: Map<String, ColType> = mapOf(),
     skipLines: Int = 0,
     readLines: Int? = null,
@@ -68,7 +68,7 @@ public fun DataFrame.Companion.read(
     catchHttpResponse(asURL(fileOrUrl)) {
         readDelim(
             it, delimiter,
-            headers, isCompressed(fileOrUrl),
+            header, isCompressed(fileOrUrl),
             getCSVType(fileOrUrl), colTypes,
             skipLines, readLines,
             duplicate, charset
@@ -78,7 +78,7 @@ public fun DataFrame.Companion.read(
 public fun DataFrame.Companion.readCSV(
     fileOrUrl: String,
     delimiter: Char = ',',
-    headers: List<String> = listOf(),
+    header: List<String> = listOf(),
     colTypes: Map<String, ColType> = mapOf(),
     skipLines: Int = 0,
     readLines: Int? = null,
@@ -89,7 +89,7 @@ public fun DataFrame.Companion.readCSV(
     catchHttpResponse(asURL(fileOrUrl)) {
         readDelim(
             it, delimiter,
-            headers, isCompressed(fileOrUrl),
+            header, isCompressed(fileOrUrl),
             CSVType.DEFAULT, colTypes,
             skipLines, readLines,
             duplicate, charset,
@@ -100,7 +100,7 @@ public fun DataFrame.Companion.readCSV(
 public fun DataFrame.Companion.readCSV(
     file: File,
     delimiter: Char = ',',
-    headers: List<String> = listOf(),
+    header: List<String> = listOf(),
     colTypes: Map<String, ColType> = mapOf(),
     skipLines: Int = 0,
     readLines: Int? = null,
@@ -110,7 +110,7 @@ public fun DataFrame.Companion.readCSV(
 ): DataFrame<*> =
     readDelim(
         FileInputStream(file), delimiter,
-        headers, isCompressed(file),
+        header, isCompressed(file),
         CSVType.DEFAULT, colTypes,
         skipLines, readLines,
         duplicate, charset,
@@ -120,23 +120,43 @@ public fun DataFrame.Companion.readCSV(
 public fun DataFrame.Companion.readCSV(
     url: URL,
     delimiter: Char = ',',
-    headers: List<String> = listOf(),
+    header: List<String> = listOf(),
     colTypes: Map<String, ColType> = mapOf(),
     skipLines: Int = 0,
     readLines: Int? = null,
     duplicate: Boolean = true,
     charset: Charset = Charsets.UTF_8,
     parserOptions: ParserOptions? = null
 ): DataFrame<*> =
-    readDelim(
+    readCSV(
         url.openStream(), delimiter,
-        headers, isCompressed(url),
-        CSVType.DEFAULT, colTypes,
+        header, isCompressed(url),
+        colTypes,
         skipLines, readLines,
         duplicate, charset,
         parserOptions
     )
 
+public fun DataFrame.Companion.readCSV(
+    stream: InputStream,
+    delimiter: Char = ',',
+    header: List<String> = listOf(),
+    isCompressed: Boolean = false,
+    colTypes: Map<String, ColType> = mapOf(),
+    skipLines: Int = 0,
+    readLines: Int? = null,
+    duplicate: Boolean = true,
+    charset: Charset = Charsets.UTF_8,
+    parserOptions: ParserOptions? = null
+): DataFrame<*> = readDelim(
+    stream, delimiter,
+    header, isCompressed,
+    CSVType.DEFAULT, colTypes,
+    skipLines, readLines,
+    duplicate, charset,
+    parserOptions
+)
+
 private fun getCSVType(path: String): CSVType =
     when (path.substringAfterLast('.').toLowerCase()) {
         "csv" -> CSVType.DEFAULT
@@ -160,13 +180,13 @@ internal fun asURL(fileOrUrl: String): URL = (
     }
     ).toURL()
 
-private fun getFormat(type: CSVType, delimiter: Char, headers: List<String>, duplicate: Boolean): CSVFormat =
-    type.format.withDelimiter(delimiter).withHeader(*headers.toTypedArray()).withAllowDuplicateHeaderNames(duplicate)
+private fun getFormat(type: CSVType, delimiter: Char, header: List<String>, duplicate: Boolean): CSVFormat =
+    type.format.withDelimiter(delimiter).withHeader(*header.toTypedArray()).withAllowDuplicateHeaderNames(duplicate)
 
 public fun DataFrame.Companion.readDelim(
     inStream: InputStream,
     delimiter: Char = ',',
-    headers: List<String> = listOf(),
+    header: List<String> = listOf(),
     isCompressed: Boolean = false,
     csvType: CSVType,
     colTypes: Map<String, ColType> = mapOf(),
@@ -181,7 +201,14 @@ public fun DataFrame.Companion.readDelim(
     } else {
         BufferedReader(InputStreamReader(inStream, charset))
     }.run {
-        readDelim(this, getFormat(csvType, delimiter, headers, duplicate), colTypes, skipLines, readLines, parserOptions)
+        readDelim(
+            this,
+            getFormat(csvType, delimiter, header, duplicate),
+            colTypes,
+            skipLines,
+            readLines,
+            parserOptions
+        )
     }
 
 public enum class ColType {
@@ -222,47 +249,46 @@ public fun DataFrame.Companion.readDelim(
         repeat(skipLines) { reader.readLine() }
     }
 
-    format.parse(reader).use { csvParser ->
-        val records = if (readLines == null) {
-            csvParser.records
-        } else {
-            require(readLines >= 0) { "`readLines` must not be negative" }
-            val records = ArrayList<CSVRecord>(readLines)
-            val iter = csvParser.iterator()
-            var count = readLines ?: 0
-            while (iter.hasNext() && 0 < count--) {
-                records.add(iter.next())
-            }
-            records
+    val csvParser = format.parse(reader)
+    val records = if (readLines == null) {
+        csvParser.records
+    } else {
+        require(readLines >= 0) { "`readLines` must not be negative" }
+        val records = ArrayList<CSVRecord>(readLines)
+        val iter = csvParser.iterator()
+        var count = readLines ?: 0
+        while (iter.hasNext() && 0 < count--) {
+            records.add(iter.next())
         }
+        records
+    }
 
-        val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
-            ?: (1..records[0].count()).map { index -> "X$index" }
+    val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
+        ?: (1..records[0].count()).map { index -> "X$index" }
 
-        val generator = ColumnNameGenerator()
-        val uniqueNames = columnNames.map { generator.addUnique(it) }
+    val generator = ColumnNameGenerator()
+    val uniqueNames = columnNames.map { generator.addUnique(it) }
 
-        val cols = uniqueNames.mapIndexed { colIndex, colName ->
-            val defaultColType = colTypes[".default"]
-            val colType = colTypes[colName] ?: defaultColType
-            var hasNulls = false
-            val values = records.map {
-                it[colIndex].ifEmpty {
-                    hasNulls = true
-                    null
-                }
+    val cols = uniqueNames.mapIndexed { colIndex, colName ->
+        val defaultColType = colTypes[".default"]
+        val colType = colTypes[colName] ?: defaultColType
+        var hasNulls = false
+        val values = records.map {
+            it[colIndex].ifEmpty {
+                hasNulls = true
+                null
             }
-            val column = DataColumn.createValueColumn(colName, values, typeOf<String>().withNullability(hasNulls))
-            when (colType) {
-                null -> column.tryParse(parserOptions)
-                else -> {
-                    val parser = Parsers[colType.toType()]!!
-                    column.parse(parser, parserOptions)
-                }
+        }
+        val column = DataColumn.createValueColumn(colName, values, typeOf<String>().withNullability(hasNulls))
+        when (colType) {
+            null -> column.tryParse(parserOptions)
+            else -> {
+                val parser = Parsers[colType.toType()]!!
+                column.parse(parser, parserOptions)
             }
         }
-        return cols.toDataFrame()
     }
+    return cols.toDataFrame()
 }
 
 public fun AnyFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT.withHeader()): Unit =