Skip to content

Commit 299adb8

Browse files
[Junie] make Path implementation default
1 parent a1f4c60 commit 299adb8

File tree

18 files changed

+143
-117
lines changed

18 files changed

+143
-117
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ import java.math.BigInteger
5858
import java.net.URL
5959
import java.nio.charset.Charset
6060
import java.util.zip.GZIPInputStream
61+
import java.nio.file.Path
6162
import kotlin.reflect.KClass
6263
import kotlin.reflect.KType
6364
import kotlin.reflect.typeOf
@@ -73,6 +74,10 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat {
7374
override fun readDataFrame(file: File, header: List<String>): AnyFrame =
7475
DataFrame.readCSV(file = file, delimiter = delimiter, header = header)
7576

77+
override fun readDataFrame(path: Path, header: List<String>): AnyFrame =
78+
// core CSV impl is deprecated, delegate via File to preserve module boundaries
79+
DataFrame.readCSV(file = path.toFile(), delimiter = delimiter, header = header)
80+
7681
override fun acceptsExtension(ext: String): Boolean = ext == "csv"
7782

7883
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,9 @@ public interface SupportedDataFrameFormat : SupportedFormat {
5858
public fun readDataFrame(stream: InputStream, header: List<String> = emptyList()): DataFrame<*>
5959

6060
public fun readDataFrame(file: File, header: List<String> = emptyList()): DataFrame<*>
61+
= readDataFrame(file.toPath(), header)
6162

62-
/**
63-
* Path overload for reading a DataFrame. Default implementation delegates to the [File] overload.
64-
* Implementors are not required to override this method.
65-
*/
66-
public fun readDataFrame(path: Path, header: List<String> = emptyList()): DataFrame<*> =
67-
readDataFrame(path.toFile(), header)
63+
public fun readDataFrame(path: Path, header: List<String> = emptyList()): DataFrame<*>
6864
}
6965

7066
/**
@@ -144,12 +140,6 @@ internal fun guessFormatForExtension(
144140
sample: SupportedFormatSample? = null,
145141
): SupportedFormat? = formats.firstOrNull { it.acceptsExtension(ext) && (sample == null || it.acceptsSample(sample)) }
146142

147-
internal fun guessFormat(
148-
file: File,
149-
formats: List<SupportedFormat> = supportedFormats,
150-
sample: SupportedFormatSample.DataFile? = SupportedFormatSample.DataFile(file),
151-
): SupportedFormat? = guessFormatForExtension(file.extension.lowercase(), formats, sample = sample)
152-
153143
internal fun guessFormat(
154144
url: URL,
155145
formats: List<SupportedFormat> = supportedFormats,
@@ -231,15 +221,15 @@ internal fun DataFrame.Companion.read(
231221
}
232222

233223
internal fun DataFrame.Companion.read(
234-
file: File,
224+
path: Path,
235225
format: SupportedDataFrameFormat? = null,
236226
header: List<String> = emptyList(),
237227
formats: List<SupportedDataFrameFormat> = supportedFormats.filterIsInstance<SupportedDataFrameFormat>(),
238228
): ReadAnyFrame {
239-
if (format != null) return format to format.readDataFrame(file, header = header)
229+
if (format != null) return format to format.readDataFrame(path, header = header)
240230
formats.sortedBy { it.testOrder }.forEach {
241231
try {
242-
return it to it.readDataFrame(file, header = header)
232+
return it to it.readDataFrame(path, header = header)
243233
} catch (e: FileNotFoundException) {
244234
throw e
245235
} catch (e: Exception) {
@@ -257,16 +247,10 @@ internal data class GeneratedCode(val format: SupportedCodeGenerationFormat, val
257247
internal infix fun SupportedCodeGenerationFormat.to(code: Code) = GeneratedCode(this, code)
258248

259249
public fun DataFrame.Companion.read(file: File, header: List<String> = emptyList()): AnyFrame =
260-
read(
261-
file = file,
262-
format = guessFormat(file)?.also {
263-
if (it !is SupportedDataFrameFormat) error("Format $it does not support reading dataframes")
264-
} as SupportedDataFrameFormat?,
265-
header = header,
266-
).df
250+
read(file.toPath(), header)
267251

268252
public fun DataRow.Companion.read(file: File, header: List<String> = emptyList()): AnyRow =
269-
DataFrame.read(file, header).single()
253+
DataFrame.read(file.toPath(), header).single()
270254

271255
public fun DataFrame.Companion.read(url: URL, header: List<String> = emptyList()): AnyFrame =
272256
when {
@@ -304,7 +288,13 @@ public fun File.readDataRow(header: List<String> = emptyList()): AnyRow = DataRo
304288

305289
// Path-based overloads and extensions
306290
public fun DataFrame.Companion.read(path: Path, header: List<String> = emptyList()): AnyFrame =
307-
read(path.toFile(), header)
291+
read(
292+
path = path,
293+
format = guessFormat(path.toString())?.also {
294+
if (it !is SupportedDataFrameFormat) error("Format $it does not support reading dataframes")
295+
} as SupportedDataFrameFormat?,
296+
header = header,
297+
).df
308298

309299
public fun DataRow.Companion.read(path: Path, header: List<String> = emptyList()): AnyRow =
310300
DataFrame.read(path, header).single()

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ public class DataFrameHtmlData(
761761
)
762762

763763
public fun writeHtml(destination: File) {
764-
destination.writeText(toString())
764+
writeHtml(destination.toPath())
765765
}
766766

767767
public fun writeHtml(destination: String) {
@@ -774,7 +774,7 @@ public class DataFrameHtmlData(
774774

775775
@Deprecated(WRITE_HTML, ReplaceWith(WRITE_HTML_REPLACE), DeprecationLevel.ERROR)
776776
public fun writeHTML(destination: File) {
777-
destination.writeText(toString())
777+
writeHtml(destination.toPath())
778778
}
779779

780780
@Deprecated(WRITE_HTML, ReplaceWith(WRITE_HTML_REPLACE), DeprecationLevel.ERROR)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import java.io.FileInputStream
1717
import java.io.InputStream
1818
import java.net.URL
1919
import java.nio.charset.Charset
20+
import java.nio.file.Path
2021

2122
@Deprecated(
2223
message = APACHE_CSV,
@@ -28,6 +29,10 @@ public class TSV : SupportedDataFrameFormat {
2829

2930
override fun readDataFrame(file: File, header: List<String>): AnyFrame = DataFrame.readTSV(file, header = header)
3031

32+
override fun readDataFrame(path: Path, header: List<String>): AnyFrame =
33+
// legacy TSV implementation lives in this module; delegate via File to keep behavior
34+
DataFrame.readTSV(path.toFile(), header = header)
35+
3136
override fun acceptsExtension(ext: String): Boolean = ext == "tsv"
3237

3338
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/importDataSchema.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,20 @@ import org.intellij.lang.annotations.Language
44
import java.io.File
55
import java.net.URI
66
import java.net.URL
7+
import java.nio.file.Path
78

89
public class ImportDataSchema(public val url: URL) {
910
public constructor(path: String) : this(URI(path).toURL())
11+
public constructor(path: Path) : this(path.toUri().toURL())
1012
public constructor(file: File) : this(file.toURI().toURL())
1113
}
1214

1315
public fun importDataSchema(url: URL): ImportDataSchema = ImportDataSchema(url)
1416

1517
public fun importDataSchema(path: String): ImportDataSchema = ImportDataSchema(path)
1618

19+
public fun importDataSchema(path: Path): ImportDataSchema = ImportDataSchema(path)
20+
1721
public fun importDataSchema(file: File): ImportDataSchema = ImportDataSchema(file)
1822

1923
@Language("kts")
@@ -50,6 +54,9 @@ internal val importDataSchema =
5054
/** Import the type-only data schema from [path]. */
5155
fun importDataSchema(path: String, name: String): Unit = importDataSchema(URI(path).toURL(), name)
5256
57+
/** Import the type-only data schema from [path]. */
58+
fun importDataSchema(path: Path, name: String): Unit = importDataSchema(path.toUri().toURL(), name)
59+
5360
/** Import the type-only data schema from [file]. */
5461
fun importDataSchema(file: File, name: String): Unit = importDataSchema(file.toURI().toURL(), name)
5562
""".trimIndent()

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowWriter.kt

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ import java.io.FileOutputStream
1313
import java.io.OutputStream
1414
import java.nio.channels.Channels
1515
import java.nio.channels.WritableByteChannel
16+
import java.nio.file.Files
1617
import java.nio.file.Path
18+
import java.nio.file.StandardOpenOption
1719

1820
public val ignoreMismatchMessage: (ConvertingMismatch) -> Unit = { message: ConvertingMismatch -> }
1921
public val writeMismatchMessage: (ConvertingMismatch) -> Unit = { message: ConvertingMismatch ->
@@ -94,12 +96,16 @@ public interface ArrowWriter : AutoCloseable {
9496
* If file exists, it can be recreated or expanded.
9597
*/
9698
public fun writeArrowIPC(file: File, append: Boolean = true) {
97-
writeArrowIPC(FileOutputStream(file, append))
99+
writeArrowIPC(file.toPath(), append)
98100
}
99101

100102
/** Path overload for Arrow IPC writing. */
101103
public fun writeArrowIPC(path: Path, append: Boolean = true) {
102-
writeArrowIPC(path.toFile(), append)
104+
val options = if (append) arrayOf(StandardOpenOption.CREATE, StandardOpenOption.APPEND)
105+
else arrayOf(StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)
106+
Files.newOutputStream(path, *options).use { os ->
107+
writeArrowIPC(os)
108+
}
103109
}
104110

105111
/**
@@ -136,12 +142,16 @@ public interface ArrowWriter : AutoCloseable {
136142
* If file exists, it would be recreated.
137143
*/
138144
public fun writeArrowFeather(file: File) {
139-
writeArrowFeather(FileOutputStream(file))
145+
writeArrowFeather(file.toPath())
140146
}
141147

142148
/** Path overload for Arrow Feather writing. */
143149
public fun writeArrowFeather(path: Path) {
144-
writeArrowFeather(path.toFile())
150+
Files.newOutputStream(
151+
path, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING,
152+
).use { os ->
153+
writeArrowFeather(os)
154+
}
145155
}
146156

147157
/**

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ public class ArrowFeather : SupportedDataFrameFormat {
2626
override fun readDataFrame(file: File, header: List<String>): AnyFrame =
2727
DataFrame.readArrowFeather(file, NullabilityOptions.Widening)
2828

29+
override fun readDataFrame(path: Path, header: List<String>): AnyFrame =
30+
DataFrame.readArrowFeather(path, NullabilityOptions.Widening)
31+
2932
override fun acceptsExtension(ext: String): Boolean = ext == "feather"
3033

3134
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowWriting.kt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ public fun AnyFrame.writeArrowIPC(stream: OutputStream) {
4747
* If file exists, it can be recreated or expanded.
4848
*/
4949
public fun AnyFrame.writeArrowIPC(file: File, append: Boolean = true) {
50-
this.arrowWriter().use { writer ->
51-
writer.writeArrowIPC(file, append)
52-
}
50+
writeArrowIPC(file.toPath(), append)
5351
}
5452

5553
/** Path overload for IPC writing. */
5654
public fun AnyFrame.writeArrowIPC(path: Path, append: Boolean = true) {
57-
writeArrowIPC(path.toFile(), append)
55+
this.arrowWriter().use { writer ->
56+
writer.writeArrowIPC(path, append)
57+
}
5858
}
5959

6060
/**
@@ -90,14 +90,14 @@ public fun AnyFrame.writeArrowFeather(stream: OutputStream) {
9090
* If file exists, it would be recreated.
9191
*/
9292
public fun AnyFrame.writeArrowFeather(file: File) {
93-
this.arrowWriter().use { writer ->
94-
writer.writeArrowFeather(file)
95-
}
93+
writeArrowFeather(file.toPath())
9694
}
9795

9896
/** Path overload for Feather writing. */
9997
public fun AnyFrame.writeArrowFeather(path: Path) {
100-
writeArrowFeather(path.toFile())
98+
this.arrowWriter().use { writer ->
99+
writer.writeArrowFeather(path)
100+
}
101101
}
102102

103103
/**

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
88
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
99
import java.io.File
1010
import java.io.InputStream
11+
import java.nio.file.Path
1112
import kotlin.reflect.typeOf
1213

1314
public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat {
@@ -17,6 +18,9 @@ public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITE
1718
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
1819
DataFrame.readCsv(file = file, header = header, delimiter = delimiter)
1920

21+
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
22+
DataFrame.readCsv(path = path, header = header, delimiter = delimiter)
23+
2024
override fun acceptsExtension(ext: String): Boolean = ext == "csv"
2125

2226
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough

dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readCsv.kt

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -114,28 +114,25 @@ public fun DataFrame.Companion.readCsv(
114114
parseParallel: Boolean = PARSE_PARALLEL,
115115
compression: Compression<*> = Compression.of(file),
116116
): DataFrame<*> =
117-
FileInputStream(file).use {
118-
readDelimImpl(
119-
inputStream = it,
120-
delimiter = delimiter,
121-
header = header,
122-
hasFixedWidthColumns = hasFixedWidthColumns,
123-
fixedColumnWidths = fixedColumnWidths,
124-
colTypes = colTypes,
125-
skipLines = skipLines,
126-
readLines = readLines,
127-
parserOptions = parserOptions,
128-
ignoreEmptyLines = ignoreEmptyLines,
129-
allowMissingColumns = allowMissingColumns,
130-
ignoreExcessColumns = ignoreExcessColumns,
131-
quote = quote,
132-
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
133-
trimInsideQuoted = trimInsideQuoted,
134-
parseParallel = parseParallel,
135-
compression = compression,
136-
adjustCsvSpecs = ADJUST_CSV_SPECS,
137-
)
138-
}
117+
readCsv(
118+
path = file.toPath(),
119+
delimiter = delimiter,
120+
header = header,
121+
hasFixedWidthColumns = hasFixedWidthColumns,
122+
fixedColumnWidths = fixedColumnWidths,
123+
colTypes = colTypes,
124+
skipLines = skipLines,
125+
readLines = readLines,
126+
parserOptions = parserOptions,
127+
ignoreEmptyLines = ignoreEmptyLines,
128+
allowMissingColumns = allowMissingColumns,
129+
ignoreExcessColumns = ignoreExcessColumns,
130+
quote = quote,
131+
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
132+
trimInsideQuoted = trimInsideQuoted,
133+
parseParallel = parseParallel,
134+
compression = Compression.of(file.toPath()),
135+
)
139136

140137
/**
141138
* @include [CommonReadDelimDocs.CsvDocs]

0 commit comments

Comments
 (0)