Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -6310,23 +6310,31 @@ public final class org/jetbrains/kotlinx/dataframe/io/GuessKt {
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static synthetic fun read$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/nio/file/Path;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static final fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDataFrame (Ljava/net/URL;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Ljava/io/File;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Ljava/net/URL;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Ljava/nio/file/Path;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDataRow (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static final fun readDataRow (Ljava/net/URL;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static final fun readDataRow (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static synthetic fun readDataRow$default (Ljava/io/File;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static synthetic fun readDataRow$default (Ljava/net/URL;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
public static synthetic fun readDataRow$default (Ljava/nio/file/Path;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
}

public final class org/jetbrains/kotlinx/dataframe/io/HtmlKt {
Expand Down Expand Up @@ -6394,8 +6402,10 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/io/SupportedCode
public abstract interface class org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat : org/jetbrains/kotlinx/dataframe/io/SupportedFormat {
public abstract fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public abstract fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Lorg/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat;Ljava/io/File;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Lorg/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat;Ljava/io/InputStream;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Lorg/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat;Ljava/nio/file/Path;Ljava/util/List;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public abstract interface class org/jetbrains/kotlinx/dataframe/io/SupportedFormat {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import java.math.BigInteger
import java.net.URL
import java.nio.charset.Charset
import java.util.zip.GZIPInputStream
import java.nio.file.Path
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.typeOf
Expand All @@ -73,6 +74,10 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat {
override fun readDataFrame(file: File, header: List<String>): AnyFrame =
DataFrame.readCSV(file = file, delimiter = delimiter, header = header)

override fun readDataFrame(path: Path, header: List<String>): AnyFrame =
// core CSV impl is deprecated, delegate via File to preserve module boundaries
DataFrame.readCSV(file = path.toFile(), delimiter = delimiter, header = header)

override fun acceptsExtension(ext: String): Boolean = ext == "csv"

override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
Expand Down
45 changes: 30 additions & 15 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/guess.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import java.io.File
import java.io.FileNotFoundException
import java.io.InputStream
import java.net.URL
import java.nio.file.Path
import java.util.ServiceLoader
import kotlin.io.path.extension
import kotlin.reflect.KType

public sealed interface SupportedFormat {
Expand All @@ -35,7 +37,7 @@ public sealed interface SupportedFormat {
public sealed interface SupportedFormatSample {

@JvmInline
public value class DataFile(public val sampleFile: File) : SupportedFormatSample
public value class DataFile(public val sampleFilePath: Path) : SupportedFormatSample

@JvmInline
public value class DataUrl(public val sampleUrl: URL) : SupportedFormatSample
Expand All @@ -57,6 +59,9 @@ public interface SupportedDataFrameFormat : SupportedFormat {
public fun readDataFrame(stream: InputStream, header: List<String> = emptyList()): DataFrame<*>

public fun readDataFrame(file: File, header: List<String> = emptyList()): DataFrame<*>
= readDataFrame(file.toPath(), header)

public fun readDataFrame(path: Path, header: List<String> = emptyList()): DataFrame<*>
}

/**
Expand Down Expand Up @@ -137,10 +142,10 @@ internal fun guessFormatForExtension(
): SupportedFormat? = formats.firstOrNull { it.acceptsExtension(ext) && (sample == null || it.acceptsSample(sample)) }

internal fun guessFormat(
file: File,
path: Path,
formats: List<SupportedFormat> = supportedFormats,
sample: SupportedFormatSample.DataFile? = SupportedFormatSample.DataFile(file),
): SupportedFormat? = guessFormatForExtension(file.extension.lowercase(), formats, sample = sample)
sample: SupportedFormatSample.DataFile? = SupportedFormatSample.DataFile(path),
): SupportedFormat? = guessFormatForExtension(path.extension.lowercase(), formats, sample = sample)

internal fun guessFormat(
url: URL,
Expand Down Expand Up @@ -223,15 +228,15 @@ internal fun DataFrame.Companion.read(
}

internal fun DataFrame.Companion.read(
file: File,
path: Path,
format: SupportedDataFrameFormat? = null,
header: List<String> = emptyList(),
formats: List<SupportedDataFrameFormat> = supportedFormats.filterIsInstance<SupportedDataFrameFormat>(),
): ReadAnyFrame {
if (format != null) return format to format.readDataFrame(file, header = header)
if (format != null) return format to format.readDataFrame(path, header = header)
formats.sortedBy { it.testOrder }.forEach {
try {
return it to it.readDataFrame(file, header = header)
return it to it.readDataFrame(path, header = header)
} catch (e: FileNotFoundException) {
throw e
} catch (e: Exception) {
Expand All @@ -249,16 +254,10 @@ internal data class GeneratedCode(val format: SupportedCodeGenerationFormat, val
internal infix fun SupportedCodeGenerationFormat.to(code: Code) = GeneratedCode(this, code)

public fun DataFrame.Companion.read(file: File, header: List<String> = emptyList()): AnyFrame =
read(
file = file,
format = guessFormat(file)?.also {
if (it !is SupportedDataFrameFormat) error("Format $it does not support reading dataframes")
} as SupportedDataFrameFormat?,
header = header,
).df
read(file.toPath(), header)

public fun DataRow.Companion.read(file: File, header: List<String> = emptyList()): AnyRow =
DataFrame.read(file, header).single()
DataFrame.read(file.toPath(), header).single()

public fun DataFrame.Companion.read(url: URL, header: List<String> = emptyList()): AnyFrame =
when {
Expand Down Expand Up @@ -293,3 +292,19 @@ public fun URL.readDataRow(header: List<String> = emptyList()): AnyRow = DataRow
public fun File.readDataFrame(header: List<String> = emptyList()): AnyFrame = DataFrame.read(this, header)

public fun File.readDataRow(header: List<String> = emptyList()): AnyRow = DataRow.read(this, header)

public fun DataFrame.Companion.read(path: Path, header: List<String> = emptyList()): AnyFrame =
read(
path = path,
format = guessFormat(path.toString())?.also {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't use guessFormatForExtension() anymore, does it? the logic has changed a bit

if (it !is SupportedDataFrameFormat) error("Format $it does not support reading dataframes")
} as SupportedDataFrameFormat?,
header = header,
).df

public fun DataRow.Companion.read(path: Path, header: List<String> = emptyList()): AnyRow =
DataFrame.read(path, header).single()

public fun Path.readDataFrame(header: List<String> = emptyList()): AnyFrame = DataFrame.read(this, header)

public fun Path.readDataRow(header: List<String> = emptyList()): AnyRow = DataRow.read(this, header)
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ public class DataFrameHtmlData(
)

public fun writeHtml(destination: File) {
destination.writeText(toString())
writeHtml(destination.toPath())
}

public fun writeHtml(destination: String) {
Expand All @@ -774,7 +774,7 @@ public class DataFrameHtmlData(

@Deprecated(WRITE_HTML, ReplaceWith(WRITE_HTML_REPLACE), DeprecationLevel.ERROR)
public fun writeHTML(destination: File) {
destination.writeText(toString())
writeHtml(destination.toPath())
}

@Deprecated(WRITE_HTML, ReplaceWith(WRITE_HTML_REPLACE), DeprecationLevel.ERROR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import java.io.FileInputStream
import java.io.InputStream
import java.net.URL
import java.nio.charset.Charset
import java.nio.file.Path

@Deprecated(
message = APACHE_CSV,
Expand All @@ -28,6 +29,10 @@ public class TSV : SupportedDataFrameFormat {

override fun readDataFrame(file: File, header: List<String>): AnyFrame = DataFrame.readTSV(file, header = header)

override fun readDataFrame(path: Path, header: List<String>): AnyFrame =
// legacy TSV implementation lives in this module; delegate via File to keep behavior
DataFrame.readTSV(path.toFile(), header = header)

override fun acceptsExtension(ext: String): Boolean = ext == "tsv"

override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@ import org.intellij.lang.annotations.Language
import java.io.File
import java.net.URI
import java.net.URL
import java.nio.file.Path

public class ImportDataSchema(public val url: URL) {
public constructor(path: String) : this(URI(path).toURL())
public constructor(path: Path) : this(path.toUri().toURL())
public constructor(file: File) : this(file.toURI().toURL())
}

public fun importDataSchema(url: URL): ImportDataSchema = ImportDataSchema(url)

public fun importDataSchema(path: String): ImportDataSchema = ImportDataSchema(path)

public fun importDataSchema(path: Path): ImportDataSchema = ImportDataSchema(path)

public fun importDataSchema(file: File): ImportDataSchema = ImportDataSchema(file)

@Language("kts")
Expand Down Expand Up @@ -50,6 +54,9 @@ internal val importDataSchema =
/** Import the type-only data schema from [path]. */
fun importDataSchema(path: String, name: String): Unit = importDataSchema(URI(path).toURL(), name)

/** Import the type-only data schema from [path]. */
fun importDataSchema(path: Path, name: String): Unit = importDataSchema(path.toUri().toURL(), name)

/** Import the type-only data schema from [file]. */
fun importDataSchema(file: File, name: String): Unit = importDataSchema(file.toURI().toURL(), name)
""".trimIndent()
Loading
Loading