Skip to content

Commit 09def22

Browse files
nikitinaskoperagen
authored andcommitted
Extract dataframe-arrow subproject
1 parent 562073b commit 09def22

File tree

11 files changed

+53
-61
lines changed

11 files changed

+53
-61
lines changed

build.gradle.kts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,6 @@ dependencies {
4545

4646
implementation(libs.kotlin.datetimeJvm)
4747

48-
implementation(libs.arrow.vector)
49-
implementation(libs.arrow.format)
50-
implementation(libs.arrow.memory)
51-
5248
testImplementation(libs.junit)
5349
testImplementation(libs.kotestAssertions) {
5450
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")

dataframe-arrow/build.gradle.kts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
plugins {
2+
kotlin("jvm") apply true
3+
}
4+
5+
repositories {
6+
mavenCentral()
7+
}
8+
9+
dependencies {
10+
api(project(":"))
11+
12+
implementation(libs.arrow.vector)
13+
implementation(libs.arrow.format)
14+
implementation(libs.arrow.memory)
15+
implementation(libs.commonsCompress)
16+
17+
testApi(project(":"))
18+
testImplementation(libs.junit)
19+
testImplementation(libs.kotestAssertions) {
20+
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
21+
}
22+
}

src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrow.kt renamed to dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrow.kt

Lines changed: 10 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,13 @@ private fun readField(root: VectorSchemaRoot, field: Field): AnyBaseColumn {
196196

197197
// IPC reading block
198198

199-
private fun DataFrame.Companion.readArrowIPC(file: File): AnyFrame = Files.newByteChannel(file.toPath()).use { readArrowIPC(it) }
199+
public fun DataFrame.Companion.readArrowIPC(file: File): AnyFrame = Files.newByteChannel(file.toPath()).use { readArrowIPC(it) }
200200

201-
private fun DataFrame.Companion.readArrowIPC(byteArray: ByteArray): AnyFrame = SeekableInMemoryByteChannel(byteArray).use { readArrowIPC(it) }
201+
public fun DataFrame.Companion.readArrowIPC(byteArray: ByteArray): AnyFrame = SeekableInMemoryByteChannel(byteArray).use { readArrowIPC(it) }
202202

203-
private fun DataFrame.Companion.readArrowIPC(stream: InputStream): AnyFrame = Channels.newChannel(stream).use { readArrowIPC(it) }
203+
public fun DataFrame.Companion.readArrowIPC(stream: InputStream): AnyFrame = Channels.newChannel(stream).use { readArrowIPC(it) }
204204

205-
private fun DataFrame.Companion.readArrowIPC(url: URL): AnyFrame =
205+
public fun DataFrame.Companion.readArrowIPC(url: URL): AnyFrame =
206206
when {
207207
isFile(url) -> readArrowIPC(urlAsFile(url))
208208
isProtocolSupported(url) -> url.openStream().use { readArrowIPC(it) }
@@ -211,21 +211,21 @@ private fun DataFrame.Companion.readArrowIPC(url: URL): AnyFrame =
211211
}
212212
}
213213

214-
private fun DataFrame.Companion.readArrowIPC(path: String): AnyFrame = if (isURL(path)) {
214+
public fun DataFrame.Companion.readArrowIPC(path: String): AnyFrame = if (isURL(path)) {
215215
readArrowIPC(URL(path))
216216
} else {
217217
readArrowIPC(File(path))
218218
}
219219

220220
// Feather reading block
221221

222-
private fun DataFrame.Companion.readArrowFeather(file: File): AnyFrame = Files.newByteChannel(file.toPath()).use { readArrowFeather(it) }
222+
public fun DataFrame.Companion.readArrowFeather(file: File): AnyFrame = Files.newByteChannel(file.toPath()).use { readArrowFeather(it) }
223223

224-
private fun DataFrame.Companion.readArrowFeather(byteArray: ByteArray): AnyFrame = SeekableInMemoryByteChannel(byteArray).use { readArrowFeather(it) }
224+
public fun DataFrame.Companion.readArrowFeather(byteArray: ByteArray): AnyFrame = SeekableInMemoryByteChannel(byteArray).use { readArrowFeather(it) }
225225

226-
private fun DataFrame.Companion.readArrowFeather(stream: InputStream): AnyFrame = readArrowFeather(stream.readAllBytes())
226+
public fun DataFrame.Companion.readArrowFeather(stream: InputStream): AnyFrame = readArrowFeather(stream.readAllBytes())
227227

228-
private fun DataFrame.Companion.readArrowFeather(url: URL): AnyFrame =
228+
public fun DataFrame.Companion.readArrowFeather(url: URL): AnyFrame =
229229
when {
230230
isFile(url) -> readArrowFeather(urlAsFile(url))
231231
isProtocolSupported(url) -> readArrowFeather(url.readBytes())
@@ -234,40 +234,8 @@ private fun DataFrame.Companion.readArrowFeather(url: URL): AnyFrame =
234234
}
235235
}
236236

237-
private fun DataFrame.Companion.readArrowFeather(path: String): AnyFrame = if (isURL(path)) {
237+
public fun DataFrame.Companion.readArrowFeather(path: String): AnyFrame = if (isURL(path)) {
238238
readArrowFeather(URL(path))
239239
} else {
240240
readArrowFeather(File(path))
241241
}
242-
243-
// Common reading block
244-
245-
public fun DataFrame.Companion.readArrow(file: File, format: ArrowFormat = ArrowFormat.FEATHER): AnyFrame =
246-
when (format) {
247-
ArrowFormat.IPC -> readArrowIPC(file)
248-
ArrowFormat.FEATHER -> readArrowFeather(file)
249-
}
250-
251-
public fun DataFrame.Companion.readArrow(byteArray: ByteArray, format: ArrowFormat = ArrowFormat.FEATHER): AnyFrame =
252-
when (format) {
253-
ArrowFormat.IPC -> readArrowIPC(byteArray)
254-
ArrowFormat.FEATHER -> readArrowFeather(byteArray)
255-
}
256-
257-
public fun DataFrame.Companion.readArrow(stream: InputStream, format: ArrowFormat = ArrowFormat.IPC): AnyFrame =
258-
when (format) {
259-
ArrowFormat.IPC -> readArrowIPC(stream)
260-
ArrowFormat.FEATHER -> readArrowFeather(stream)
261-
}
262-
263-
public fun DataFrame.Companion.readArrow(url: URL, format: ArrowFormat = ArrowFormat.IPC): AnyFrame =
264-
when (format) {
265-
ArrowFormat.IPC -> readArrowIPC(url)
266-
ArrowFormat.FEATHER -> readArrowFeather(url)
267-
}
268-
269-
public fun DataFrame.Companion.readArrow(path: String, format: ArrowFormat = ArrowFormat.IPC): AnyFrame =
270-
when (format) {
271-
ArrowFormat.IPC -> readArrowIPC(path)
272-
ArrowFormat.FEATHER -> readArrowFeather(path)
273-
}

src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt renamed to dataframe-arrow/src/test/kotlin/ArrowKtTest.kt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,23 @@
1-
package org.jetbrains.kotlinx.dataframe.io
2-
31
import io.kotest.matchers.shouldBe
42
import org.apache.arrow.vector.util.Text
3+
import org.jetbrains.kotlinx.dataframe.DataFrame
54
import org.jetbrains.kotlinx.dataframe.api.columnOf
65
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
76
import org.jetbrains.kotlinx.dataframe.api.toColumn
8-
import org.jetbrains.kotlinx.dataframe.testArrowFeather
7+
import org.jetbrains.kotlinx.dataframe.io.readArrowFeather
98
import org.junit.Test
9+
import java.net.URL
1010

1111
internal class ArrowKtTest {
12+
13+
fun testResource(resourcePath: String): URL = ArrowKtTest::class.java.classLoader.getResource(resourcePath)!!
14+
15+
fun testArrowFeather(name: String) = testResource("$name.feather")
16+
1217
@Test
1318
fun testReadingFromFile() {
1419
val feather = testArrowFeather("data-arrow_2.0.0_uncompressed")
15-
val df = feather.readDataFrame()
20+
val df = DataFrame.readArrowFeather(feather)
1621
val a by columnOf("one")
1722
val b by columnOf(2.0)
1823
val c by listOf(

gradle/libs.versions.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dataframe = "0.8.0-dev-939"
99
korro = "0.1.1-dev-29"
1010

1111
commonsCsv = "1.8"
12+
commonsCompress = "1.21"
1213
klaxon = "5.5"
1314
fuel = "2.3.1"
1415
poi = "5.2.0"
@@ -30,6 +31,7 @@ kotlin-stdlib-jdk8 = { group = "org.jetbrains.kotlin", name = "kotlin-stdlib-jdk
3031
kotlin-reflect = { group = "org.jetbrains.kotlin", name = "kotlin-reflect", version.ref = "kotlin" }
3132
kotlin-scriptingJvm = { group = "org.jetbrains.kotlin", name = "kotlin-scripting-jvm", version.ref = "kotlin" }
3233
commonsCsv = { module = "org.apache.commons:commons-csv", version.ref = "commonsCsv" }
34+
commonsCompress = { module = "org.apache.commons:commons-compress", version.ref = "commonsCompress" }
3335
klaxon = { module = "com.beust:klaxon", version.ref = "klaxon" }
3436
fuel = { module = "com.github.kittinunf.fuel:fuel", version.ref = "fuel" }
3537
poi = { module = "org.apache.poi:poi", version.ref = "poi" }

settings.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ includeBuild("generator")
77
include("plugins:dataframe-gradle-plugin")
88
include("plugins:symbol-processor")
99
include("tests")
10+
include("dataframe-arrow")
1011

1112
//include("examples:idea-examples:titanic")
1213
//include("examples:idea-examples:movies")

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/CodeGeneratorImpl.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,6 @@ private fun collectAdditionalImports(format: SupportedFormats): List<String> {
302302
SupportedFormats.TSV -> listOf("import org.jetbrains.kotlinx.dataframe.io.readTSV")
303303
SupportedFormats.JSON -> listOf("import org.jetbrains.kotlinx.dataframe.io.readJson")
304304
SupportedFormats.EXCEL -> listOf("import org.jetbrains.kotlinx.dataframe.io.readExcel")
305-
SupportedFormats.ARROW -> listOf("import org.jetbrains.kotlinx.dataframe.io.readArrow")
305+
// SupportedFormats.ARROW -> listOf("import org.jetbrains.kotlinx.dataframe.io.readArrow")
306306
}
307307
}

src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/SchemaReader.kt

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@ import org.jetbrains.kotlinx.dataframe.AnyFrame
55
import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.api.schema
77
import org.jetbrains.kotlinx.dataframe.codeGen.CsvOptions
8-
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadArrowMethod
98
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadCsvMethod
109
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
1110
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadExcelMethod
1211
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadJsonMethod
1312
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadTsvMethod
1413
import org.jetbrains.kotlinx.dataframe.io.SupportedFormats
1514
import org.jetbrains.kotlinx.dataframe.io.guessFormat
16-
import org.jetbrains.kotlinx.dataframe.io.readArrow
1715
import org.jetbrains.kotlinx.dataframe.io.readCSV
1816
import org.jetbrains.kotlinx.dataframe.io.readExcel
1917
import org.jetbrains.kotlinx.dataframe.io.readJson
@@ -29,9 +27,9 @@ public val CodeGenerator.Companion.urlReader: (url: URL, csvOptions: CsvOptions)
2927
DfReadResult.Success(DataFrame.readCSV(url, delimiter = delimiter), SupportedFormats.CSV, csvOptions)
3028
}
3129

32-
fun readArrow(url: URL) = run {
30+
/* fun readArrow(url: URL) = run {
3331
DfReadResult.Success(DataFrame.readArrow(url), SupportedFormats.ARROW, csvOptions)
34-
}
32+
} */
3533

3634
fun readTSV(url: URL) = DfReadResult.Success(DataFrame.readTSV(url), SupportedFormats.TSV, csvOptions)
3735

@@ -43,7 +41,7 @@ public val CodeGenerator.Companion.urlReader: (url: URL, csvOptions: CsvOptions)
4341
SupportedFormats.CSV -> readCSV(url)
4442
SupportedFormats.TSV -> readTSV(url)
4543
SupportedFormats.JSON -> readJson(url)
46-
SupportedFormats.ARROW -> readArrow(url)
44+
// SupportedFormats.ARROW -> readArrow(url)
4745
SupportedFormats.EXCEL -> readExcel(url)
4846
null -> try {
4947
readExcel(url)
@@ -77,7 +75,7 @@ public sealed interface DfReadResult {
7775
SupportedFormats.CSV -> DefaultReadCsvMethod(pathRepresentation, csvOptions)
7876
SupportedFormats.JSON -> DefaultReadJsonMethod(pathRepresentation)
7977
SupportedFormats.TSV -> DefaultReadTsvMethod(pathRepresentation)
80-
SupportedFormats.ARROW -> DefaultReadArrowMethod(pathRepresentation)
78+
// SupportedFormats.ARROW -> DefaultReadArrowMethod(pathRepresentation)
8179
SupportedFormats.EXCEL -> DefaultReadExcelMethod(pathRepresentation)
8280
}
8381
}

src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ public enum class SupportedFormats {
3333

3434
override fun acceptsExtension(ext: String): Boolean = ext == "json"
3535
},
36-
ARROW {
36+
37+
/* ARROW {
3738
override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame = DataFrame.readArrow(stream)
3839
3940
override fun readDataFrame(file: File, header: List<String>): AnyFrame = DataFrame.readArrow(file)
4041
4142
override fun acceptsExtension(ext: String): Boolean = ext == "feather"
42-
},
43+
}, */
4344
EXCEL {
4445
override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame = DataFrame.readExcel(stream)
4546

@@ -60,7 +61,7 @@ private val testOrder get() = listOf(
6061
SupportedFormats.CSV,
6162
SupportedFormats.TSV,
6263
SupportedFormats.EXCEL,
63-
SupportedFormats.ARROW,
64+
// SupportedFormats.ARROW,
6465
)
6566

6667
internal fun guessFormatForExtension(ext: String) = SupportedFormats.values().firstOrNull { it.acceptsExtension(ext) }

0 commit comments

Comments
 (0)