Skip to content

Commit 0089ed3

Browse files
authored
Merge pull request #173 from Kotlin/new-open-api
OpenAPI/Swagger JSON type schema support + many small fixes I came across
2 parents 48a3594 + 4673b9c commit 0089ed3

File tree

119 files changed

+14197
-899
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+14197
-899
lines changed

.editorconfig

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,7 @@ indent_size=4
77
max_line_length=120
88

99
[*.json]
10-
indent_size=2
10+
indent_size=2
11+
12+
[*.yaml]
13+
indent_size=2

build.gradle.kts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ plugins {
99
kotlin("libs.publisher") version libs.versions.libsPublisher
1010
kotlin("plugin.serialization") version libs.versions.kotlin
1111
id("org.jetbrains.kotlinx.dataframe") version libs.versions.dataframe apply false
12+
kotlin("jupyter.api") version libs.versions.kotlinJupyter apply false
1213

1314
id("org.jetbrains.dokka") version libs.versions.dokka
1415
id("org.jetbrains.kotlinx.kover") version libs.versions.kover
@@ -73,14 +74,13 @@ group = "org.jetbrains.kotlinx"
7374
fun detectVersion(): String {
7475
val buildNumber = rootProject.findProperty("build.number") as String?
7576
val versionProp = property("version") as String
76-
return if(buildNumber != null) {
77+
return if (buildNumber != null) {
7778
if (rootProject.findProperty("build.number.detection") == "true") {
7879
"$versionProp-dev-$buildNumber"
7980
} else {
8081
buildNumber
8182
}
82-
}
83-
else if(hasProperty("release")) {
83+
} else if (hasProperty("release")) {
8484
versionProp
8585
} else {
8686
"$versionProp-dev"
@@ -104,15 +104,15 @@ kotlinPublications {
104104
fairDokkaJars.set(false)
105105

106106
sonatypeSettings(
107-
project.findProperty("kds.sonatype.user") as String?,
108-
project.findProperty("kds.sonatype.password") as String?,
109-
"dataframe project, v. ${project.version}"
107+
project.findProperty("kds.sonatype.user") as String?,
108+
project.findProperty("kds.sonatype.password") as String?,
109+
"dataframe project, v. ${project.version}"
110110
)
111111

112112
signingCredentials(
113-
project.findProperty("kds.sign.key.id") as String?,
114-
project.findProperty("kds.sign.key.private") as String?,
115-
project.findProperty("kds.sign.key.passphrase") as String?
113+
project.findProperty("kds.sign.key.id") as String?,
114+
project.findProperty("kds.sign.key.private") as String?,
115+
project.findProperty("kds.sign.key.passphrase") as String?
116116
)
117117

118118
pom {

core/build.gradle.kts

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
21
@Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage")
32
plugins {
43
kotlin("jvm")
54
kotlin("libs.publisher")
65
kotlin("plugin.serialization")
7-
kotlin("jupyter.api") version libs.versions.kotlinJupyter
6+
kotlin("jupyter.api")
87

98
id("io.github.devcrocod.korro") version libs.versions.korro
109
id("org.jetbrains.dataframe.generator")
@@ -25,16 +24,16 @@ repositories {
2524
}
2625

2726
dependencies {
27+
api(libs.kotlin.reflect)
2828
implementation(libs.kotlin.stdlib)
2929
implementation(libs.kotlin.stdlib.jdk8)
30-
implementation(libs.kotlin.reflect)
3130

3231
api(libs.commonsCsv)
3332
implementation(libs.klaxon)
3433
implementation(libs.fuel)
3534

36-
implementation(libs.kotlin.datetimeJvm)
37-
implementation("com.squareup:kotlinpoet:1.11.0")
35+
api(libs.kotlin.datetimeJvm)
36+
implementation(libs.kotlinpoet)
3837

3938
testImplementation(libs.junit)
4039
testImplementation(libs.kotestAssertions) {
@@ -114,7 +113,8 @@ kotlinter {
114113
"experimental:annotation",
115114
"max-line-length",
116115
"filename",
117-
"comment-spacing"
116+
"comment-spacing",
117+
"curly-spacing",
118118
)
119119
}
120120

@@ -137,10 +137,12 @@ tasks.withType<org.jetbrains.kotlin.gradle.tasks.KotlinCompile> {
137137
tasks.test {
138138
maxHeapSize = "2048m"
139139
extensions.configure(kotlinx.kover.api.KoverTaskExtension::class) {
140-
excludes.set(listOf(
141-
"org.jetbrains.kotlinx.dataframe.jupyter.*",
142-
"org.jetbrains.kotlinx.dataframe.jupyter.SampleNotebooksTests"
143-
))
140+
excludes.set(
141+
listOf(
142+
"org.jetbrains.kotlinx.dataframe.jupyter.*",
143+
"org.jetbrains.kotlinx.dataframe.jupyter.SampleNotebooksTests"
144+
)
145+
)
144146
}
145147
}
146148

@@ -168,6 +170,7 @@ artifacts {
168170
}
169171
}
170172

173+
// Disable and enable if updating plugin breaks the build
171174
dataframes {
172175
schema {
173176
sourceSet = "test"

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy
1313
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
1414
import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
1515
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
16+
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
1617
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
1718
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
1819
import kotlin.reflect.KType
@@ -53,10 +54,13 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
5354

5455
// region get columns
5556

56-
override operator fun <C> get(columns: ColumnsSelector<T, C>): List<DataColumn<C>> = getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns)
57+
override operator fun <C> get(columns: ColumnsSelector<T, C>): List<DataColumn<C>> =
58+
getColumnsImpl(UnresolvedColumnsPolicy.Fail, columns)
59+
5760
public operator fun get(first: Column, vararg other: Column): DataFrame<T> = select(listOf(first) + other)
5861
public operator fun get(first: String, vararg other: String): DataFrame<T> = select(listOf(first) + other)
59-
public operator fun get(columnRange: ClosedRange<String>): DataFrame<T> = select { columnRange.start..columnRange.endInclusive }
62+
public operator fun get(columnRange: ClosedRange<String>): DataFrame<T> =
63+
select { columnRange.start..columnRange.endInclusive }
6064

6165
// endregion
6266

@@ -65,8 +69,11 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
6569
public operator fun get(index: Int): DataRow<T>
6670
public operator fun get(indices: Iterable<Int>): DataFrame<T> = getRows(indices)
6771
public operator fun get(range: IntRange): DataFrame<T> = getRows(range)
68-
public operator fun get(vararg ranges: IntRange): DataFrame<T> = getRows(ranges.asSequence().flatMap { it.asSequence() }.asIterable())
69-
public operator fun get(firstIndex: Int, vararg otherIndices: Int): DataFrame<T> = get(headPlusIterable(firstIndex, otherIndices.asIterable()))
72+
public operator fun get(first: IntRange, vararg ranges: IntRange): DataFrame<T> =
73+
getRows(headPlusArray(first, ranges).asSequence().flatMap { it.asSequence() }.asIterable())
74+
75+
public operator fun get(firstIndex: Int, vararg otherIndices: Int): DataFrame<T> =
76+
get(headPlusIterable(firstIndex, otherIndices.asIterable()))
7077

7178
// endregion
7279

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
package org.jetbrains.kotlinx.dataframe.annotations
22

3+
import org.jetbrains.kotlinx.dataframe.api.JsonPath
4+
import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
5+
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
6+
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
7+
import org.jetbrains.kotlinx.dataframe.io.JSON
8+
39
/**
410
* Annotation preprocessing will generate a DataSchema interface from the data at `path`.
5-
* Data must be of supported format: CSV, JSON, Apache Arrow, Excel.
11+
* Data must be of supported format: CSV, JSON, Apache Arrow, Excel, OpenAPI (Swagger) in YAML/JSON.
612
* Generated data schema has properties inferred from data and a companion object with `read method`.
713
* `read method` is either `readCSV` or `readJson` that returns `DataFrame<name>`
814
*
@@ -15,7 +21,8 @@ package org.jetbrains.kotlinx.dataframe.annotations
1521
* @param normalizationDelimiters if not empty, split property names by delimiters,
1622
* lowercase parts and join to camel case. Set empty list to disable normalization
1723
* @param withDefaultPath if `true`, generate `defaultPath` property to the data schema's companion object and make it default argument for a `read method`
18-
* @param csvOptions options to parse CSV data. Not used when data is JSON
24+
* @param csvOptions options to parse CSV data. Not used when data is not Csv
25+
* @param jsonOptions options to parse JSON data. Not used when data is not Json
1926
*/
2027
@Retention(AnnotationRetention.SOURCE)
2128
@Target(AnnotationTarget.FILE)
@@ -26,13 +33,29 @@ public annotation class ImportDataSchema(
2633
val visibility: DataSchemaVisibility = DataSchemaVisibility.IMPLICIT_PUBLIC,
2734
val normalizationDelimiters: CharArray = ['\t', ' ', '_'],
2835
val withDefaultPath: Boolean = true,
29-
val csvOptions: CsvOptions = CsvOptions(',')
36+
val csvOptions: CsvOptions = CsvOptions(','),
37+
val jsonOptions: JsonOptions = JsonOptions(),
3038
)
3139

3240
public enum class DataSchemaVisibility {
3341
INTERNAL, IMPLICIT_PUBLIC, EXPLICIT_PUBLIC
3442
}
3543

3644
public annotation class CsvOptions(
37-
val delimiter: Char
45+
public val delimiter: Char,
46+
)
47+
48+
public annotation class JsonOptions(
49+
50+
/** Allows the choice of how to handle type clashes when reading a JSON file. */
51+
public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS,
52+
53+
/**
54+
* List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]>
55+
* will be created.
56+
*
57+
* Example:
58+
* `["""$["store"]["book"][*]["author"]"""]`
59+
*/
60+
public val keyValuePaths: Array<String> = [],
3861
)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataRowApi.kt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,17 @@ public operator fun AnyRow.contains(column: KProperty<*>): Boolean = containsKey
6767

6868
@OptIn(ExperimentalTypeInference::class)
6969
@OverloadResolutionByLambdaReturnType
70-
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Double>): Double? = prev()?.let { p -> expression(this, this) - expression(p, p) }
70+
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Double>): Double? =
71+
prev()?.let { p -> expression(this, this) - expression(p, p) }
7172

72-
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Int>): Int? = prev()?.let { p -> expression(this, this) - expression(p, p) }
73+
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Int>): Int? =
74+
prev()?.let { p -> expression(this, this) - expression(p, p) }
7375

74-
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Long>): Long? = prev()?.let { p -> expression(this, this) - expression(p, p) }
76+
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Long>): Long? =
77+
prev()?.let { p -> expression(this, this) - expression(p, p) }
7578

76-
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Float>): Float? = prev()?.let { p -> expression(this, this) - expression(p, p) }
79+
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Float>): Float? =
80+
prev()?.let { p -> expression(this, this) - expression(p, p) }
7781

7882
public fun AnyRow.columnsCount(): Int = df().ncol
7983
public fun AnyRow.columnNames(): List<String> = df().columnNames()
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import org.intellij.lang.annotations.Language
4+
import java.io.Serializable
5+
6+
/**
7+
* Simplistic JSON path implementation.
8+
* Supports just keys (in bracket notation), double quotes, arrays and wildcards.
9+
*
10+
* Examples:
11+
* `$["store"]["book"][*]["author"]`
12+
*
13+
* `$[1]` will match `$[*]`
14+
*/
15+
@JvmInline
16+
public value class JsonPath(@Language("jsonpath") public val path: String = "$") : Serializable {
17+
18+
public fun append(name: String): JsonPath = JsonPath("$path[\"$name\"]")
19+
20+
public fun appendWildcard(): JsonPath = JsonPath("$path[*]")
21+
22+
public fun appendArrayWithIndex(index: Int): JsonPath = JsonPath("$path[$index]")
23+
24+
public fun appendArrayWithWildcard(): JsonPath = JsonPath("$path[*]")
25+
26+
public fun replaceLastWildcardWithIndex(index: Int): JsonPath = JsonPath(
27+
path.toCharArray().let { chars ->
28+
val lastStarIndex = chars.lastIndexOf('*')
29+
chars.flatMapIndexed { i, c ->
30+
if (i == lastStarIndex) index.toString().toCharArray().toList()
31+
else listOf(c)
32+
}.joinToString("")
33+
}
34+
)
35+
36+
public fun prepend(name: String): JsonPath = JsonPath(
37+
"\$[\"$name\"]" + path.removePrefix("$")
38+
)
39+
40+
public fun prependWildcard(): JsonPath = JsonPath(
41+
"\$[*]" + path.removePrefix("$")
42+
)
43+
44+
public fun prependArrayWithIndex(index: Int): JsonPath = JsonPath(
45+
"\$[$index]" + path.removePrefix("$")
46+
)
47+
48+
public fun prependArrayWithWildcard(): JsonPath = JsonPath(
49+
"\$[*]" + path.removePrefix("$")
50+
)
51+
52+
public fun erasedIndices(): JsonPath = JsonPath(
53+
path.replace("""\[[0-9]+]""".toRegex(), "[*]")
54+
)
55+
56+
private fun splitPath() = path.split("[", "]").filter { it.isNotBlank() }
57+
58+
public fun matches(other: JsonPath): Boolean =
59+
path == other.path ||
60+
run {
61+
val path = splitPath()
62+
val otherPath = other.splitPath()
63+
64+
if (path.size != otherPath.size) false
65+
else path.zip(otherPath).all { (p, o) ->
66+
p == o || p == "*" || o == "*"
67+
}
68+
}
69+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
4+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
5+
6+
/** A [DataSchema] interface / class can implement this if it represents a map-like data schema (so key: value). */
7+
@DataSchema
8+
public interface KeyValueProperty<T> {
9+
// needs to be explicitly overridden in @DataSchema interface, otherwise extension functions won't generate (TODO)
10+
public val key: String
11+
12+
// needs to be explicitly overridden in @DataSchema interface, otherwise type will be read as `T` and extensions won't generate (TODO)
13+
@ColumnName("value")
14+
public val `value`: T
15+
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ public fun <T, G> DataFrame<T>.asGroupBy(selector: ColumnSelector<T, DataFrame<G
298298

299299
public fun <T> DataRow<T>.toDataFrame(): DataFrame<T> = owner[index..index]
300300

301-
public fun AnyRow.toMap(): Map<String, Any?> = df().columns().map { it.name() to it[index] }.toMap()
301+
public fun AnyRow.toMap(): Map<String, Any?> = df().columns().associate { it.name() to it[index] }
302302

303303
// endregion
304304

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/all.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ import org.jetbrains.kotlinx.dataframe.index
1212

1313
// region DataColumn
1414

15+
/** Returns `true` if all [values] match the given [predicate] or [values] is empty. */
1516
public fun <T> DataColumn<T>.all(predicate: Predicate<T>): Boolean = values.all(predicate)
1617

18+
/** Returns `true` if all [values] are `null` or [values] is empty. */
1719
public fun <C> DataColumn<C>.allNulls(): Boolean = size == 0 || all { it == null }
1820

1921
// endregion
@@ -26,6 +28,7 @@ public fun AnyRow.allNA(): Boolean = owner.columns().all { it[index].isNA }
2628

2729
// region DataFrame
2830

31+
/** Returns `true` if all [rows] match the given [predicate] or [rows] is empty. */
2932
public fun <T> DataFrame<T>.all(predicate: RowFilter<T>): Boolean = rows().all { predicate(it, it) }
3033

3134
// endregion

0 commit comments

Comments
 (0)