Merge branch 'master' into jpc-kdoc-reuse

Jolanrensen · Jolanrensen · commit b87085d32c8f · 2023-03-20T12:34:42.000+01:00
# Conflicts:
#	core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt
@@ -108,4 +108,4 @@ public infix fun <T> DataColumn<T>.neq(value: T): DataColumn<Boolean> = isMatchi
 public infix fun <T : Comparable<T>> DataColumn<T>.gt(value: T): DataColumn<Boolean> = isMatching { it > value }
 public infix fun <T : Comparable<T>> DataColumn<T>.lt(value: T): DataColumn<Boolean> = isMatching { it < value }
 
-internal infix fun <T> DataColumn<T>.isMatching(predicate: Predicate<T>): DataColumn<Boolean> = map { predicate(it) }
+internal fun <T> DataColumn<T>.isMatching(predicate: Predicate<T>): DataColumn<Boolean> = map { predicate(it) }
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt
@@ -398,3 +398,35 @@ public fun <T> DataColumn<T?>.dropNA(): DataColumn<T> =
     }
 
 // endregion
+
+// region dropNaNs
+
+public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false, selector: ColumnsSelector<T, *>): DataFrame<T> {
+    val cols = this[selector]
+
+    return if (whereAllNaN) drop { cols.all { this[it].isNaN } }
+    else drop { cols.any { this[it].isNaN } }
+}
+
+public fun <T> DataFrame<T>.dropNaNs(vararg cols: KProperty<*>, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumns() }
+
+public fun <T> DataFrame<T>.dropNaNs(vararg cols: String, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumns() }
+
+public fun <T> DataFrame<T>.dropNaNs(vararg cols: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumns() }
+
+public fun <T> DataFrame<T>.dropNaNs(cols: Iterable<AnyColumnReference>, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumnSet() }
+
+public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { all() }
+
+public fun <T> DataColumn<T>.dropNaNs(): DataColumn<T> =
+    when (typeClass) {
+        Double::class, Float::class -> filter { !it.isNaN }.cast()
+        else -> this
+    }
+
+// endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt
@@ -2,14 +2,22 @@ package org.jetbrains.kotlinx.dataframe.api
 
 import org.jetbrains.kotlinx.dataframe.ColumnsSelector
 import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
 import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl
+import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
+import kotlin.reflect.KProperty
 
 // region DataFrame
 
 public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
 
-public fun <T, C> DataFrame<T>.flatten(
-    columns: ColumnsSelector<T, C>
-): DataFrame<T> = flattenImpl(columns)
+public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
+
+public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumns() }
+
+public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> = flattenImpl { columns.toColumns() }
+
+public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
+    flattenImpl { columns.toColumns() }
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt
@@ -22,12 +22,12 @@ public fun <T> DataFrame<T>.group(vararg columns: KProperty<*>): GroupClause<T,
 @JvmName("intoString")
 @OverloadResolutionByLambdaReturnType
 @OptIn(ExperimentalTypeInference::class)
-public infix fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> String): DataFrame<T> = df.move(columns).under { column(it).toColumnAccessor() }
+public fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> String): DataFrame<T> = df.move(columns).under { column(it).toColumnAccessor() }
 
 @JvmName("intoColumn")
-public infix fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> AnyColumnReference): DataFrame<T> = df.move(columns).under(column)
-public infix fun <T, C> GroupClause<T, C>.into(column: String): DataFrame<T> = into(columnGroup().named(column))
-public infix fun <T, C> GroupClause<T, C>.into(column: AnyColumnGroupAccessor): DataFrame<T> = df.move(columns).under(column)
-public infix fun <T, C> GroupClause<T, C>.into(column: KProperty<*>): DataFrame<T> = into(column.columnName)
+public fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> AnyColumnReference): DataFrame<T> = df.move(columns).under(column)
+public fun <T, C> GroupClause<T, C>.into(column: String): DataFrame<T> = into(columnGroup().named(column))
+public fun <T, C> GroupClause<T, C>.into(column: AnyColumnGroupAccessor): DataFrame<T> = df.move(columns).under(column)
+public fun <T, C> GroupClause<T, C>.into(column: KProperty<*>): DataFrame<T> = into(column.columnName)
 
 // endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt
@@ -199,7 +199,7 @@ public fun <T, C> Update<T, C>.at(rowRange: IntRange): Update<T, C> = where { in
  *  - {@include [SeeAlsoPerCol]}
  * @param expression The {@include [ExpressionsGivenRowAndColumn.RowColumnExpressionLink]} to provide a new value for every selected cell giving its row and column.
  */
-public infix fun <T, C> Update<T, C>.perRowCol(expression: RowColumnExpression<T, C, C>): DataFrame<T> =
+public fun <T, C> Update<T, C>.perRowCol(expression: RowColumnExpression<T, C, C>): DataFrame<T> =
     updateImpl { row, column, _ -> expression(row, column) }
 
 /** [Update per row col][Update.perRowCol] to provide a new value for every selected cell giving its row and column. */
@@ -222,7 +222,7 @@ public typealias UpdateExpression<T, C, R> = AddDataRow<T>.(C) -> R
  * - {@include [SeeAlsoPerRowCol]}
  * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with.
  */
-public infix fun <T, C> Update<T, C>.with(expression: UpdateExpression<T, C, C?>): DataFrame<T> =
+public fun <T, C> Update<T, C>.with(expression: UpdateExpression<T, C, C?>): DataFrame<T> =
     updateImpl { row, _, value ->
         expression(row, value)
     }
@@ -238,7 +238,7 @@ private interface SeeAlsoWith
  * {@arg [ExpressionsGivenDataFrame.OperationArg] `df.`[update][update]` { name \}.`[asFrame][asFrame]}
  * @param expression The {@include [ExpressionsGivenDataFrame.DataFrameExpressionLink]} to replace the selected column group with.
  */
-public infix fun <T, C, R> Update<T, DataRow<C>>.asFrame(expression: DataFrameExpression<C, DataFrame<R>>): DataFrame<T> =
+public fun <T, C, R> Update<T, DataRow<C>>.asFrame(expression: DataFrameExpression<C, DataFrame<R>>): DataFrame<T> =
     asFrameImpl(expression)
 
 @Deprecated(
@@ -447,4 +447,4 @@ public fun <T, C> Update<T, C>.withZero(): DataFrame<T> = updateWithValuePerColu
  *
  * @param value The value to set the selected rows to. In contrast to [with][Update.with], this must be the same exact type.
  */
-public infix fun <T, C> Update<T, C>.withValue(value: C): DataFrame<T> = with { value }
+public fun <T, C> Update<T, C>.withValue(value: C): DataFrame<T> = with { value }
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt
@@ -261,7 +261,7 @@ internal fun String.escapeHTML(): String {
     val str = this
     return buildString {
         for (c in str) {
-            if (c.code > 127 || c == '"' || c == '\'' || c == '<' || c == '>' || c == '&') {
+            if (c.code > 127 || c == '"' || c == '\'' || c == '<' || c == '>' || c == '&' || c == '\\') {
                 append("&#")
                 append(c.code)
                 append(';')
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt
@@ -1,6 +1,8 @@
 package org.jetbrains.kotlinx.dataframe.api
 
 import io.kotest.matchers.shouldBe
+import org.jetbrains.kotlinx.dataframe.DataRow
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
 import org.junit.Test
 
 class FlattenTests {
@@ -13,6 +15,41 @@ class FlattenTests {
         grouped.add("a") { 0 }.flatten().columnNames() shouldBe listOf("a1", "b", "c", "a")
     }
 
+    @DataSchema
+    interface TestRow {
+        val a: String
+        val b: String
+        val c: String
+    }
+
+    @DataSchema
+    interface Grouped {
+        val d: DataRow<TestRow>
+    }
+
+    @Test
+    fun `flatten access APIs`() {
+        val df = dataFrameOf("a", "b", "c")(1, 2, 3)
+        val grouped = df.group("a", "b").into("d")
+
+        // String API
+        grouped.flatten("d") shouldBe df
+        val castedGroupedDF = grouped.cast<Grouped>()
+
+        // KProperties API
+        castedGroupedDF.flatten(Grouped::d) shouldBe df
+
+        // Extension properties API
+        castedGroupedDF.flatten { d } shouldBe df
+
+        // Column accessors API
+        val d by columnGroup()
+        val a by d.column<String>()
+        val b by d.column<String>()
+        val c by d.column<String>()
+        grouped.flatten(d) shouldBe df
+    }
+
     @Test
     fun `flatten nested`() {
         val df = dataFrameOf("a", "b", "c", "d")(1, 2, 3, 4)
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTests.kt
@@ -56,6 +56,13 @@ class RenderingTests {
         html shouldContain "&#60;Air France&#62;"
     }
 
+    @Test
+    fun unicodeEscapeSequencesAreEscaped() {
+        val df = dataFrameOf("content")("""Hello\nfrom \x and \y""")
+        val html = df.toHTML().toString()
+        html shouldContain "Hello&#92;nfrom &#92;x and &#92;y"
+    }
+
     @Test
     fun `long text is trimmed without escaping`() {
         val df = dataFrameOf("text")("asdfkjasdlkjfhasljkddasdasdasdasdasdasdhf")
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt
@@ -409,14 +409,25 @@ class Access : TestBase() {
         // SampleEnd
     }
 
+    @Test
+    fun dropNaNs() {
+        // SampleStart
+        df.dropNaNs() // remove rows containing NaN in any column
+        df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns
+        df.dropNaNs { weight } // remove rows where 'weight' is NaN
+        df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN
+        df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN
+        // SampleEnd
+    }
+
     @Test
     fun dropNA() {
         // SampleStart
-        df.dropNA() // remove rows containing null or Double.NaN in any column
-        df.dropNA(whereAllNA = true) // remove rows with null or Double.NaN in all columns
-        df.dropNA { weight } // remove rows where 'weight' is null or Double.NaN
-        df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or Double.NaN
-        df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or Double.NaN
+        df.dropNA() // remove rows containing null or NaN in any column
+        df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns
+        df.dropNA { weight } // remove rows where 'weight' is null or NaN
+        df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN
+        df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN
         // SampleEnd
     }
 
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt
@@ -1052,14 +1052,44 @@ class Modify : TestBase() {
     }
 
     @Test
-    fun flatten() {
+    fun flatten_properties() {
         // SampleStart
         // name.firstName -> firstName
         // name.lastName -> lastName
         df.flatten { name }
         // SampleEnd
     }
 
+    @Test
+    fun flatten_strings() {
+        // SampleStart
+        // name.firstName -> firstName
+        // name.lastName -> lastName
+        df.flatten("name")
+        // SampleEnd
+    }
+
+    @Test
+    fun flatten_accessors() {
+        // SampleStart
+        val name by columnGroup()
+        val firstName by name.column<String>()
+        val lastName by name.column<String>()
+        // name.firstName -> firstName
+        // name.lastName -> lastName
+        df.flatten(name)
+        // SampleEnd
+    }
+
+    @Test
+    fun flatten_KProperties() {
+        // SampleStart
+        // name.firstName -> firstName
+        // name.lastName -> lastName
+        df.flatten(df::name)
+        // SampleEnd
+    }
+
     @Test
     fun flattenAll() {
         // SampleStart
diff --git a/docs/StardustDocs/d.tree b/docs/StardustDocs/d.tree
@@ -3,7 +3,7 @@
         SYSTEM "https://helpserver.labs.jb.gg/help/schemas/mvp/product-profile.dtd">
 
 <instance-profile id="d"
-                 name="Dataframe"
+                 name="DataFrame"
                  status="release"
                  start-page="overview.md">
     <toc-element topic="gettingStarted.md"/>
diff --git a/docs/StardustDocs/r.list b/docs/StardustDocs/r.list
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE resources SYSTEM "https://helpserver.labs.jb.gg/help/schemas/mvp/resources.dtd">
+<resources>
+    <resource id="movie-sample-data" file="movies.csv"/>
+</resources>
diff --git a/docs/StardustDocs/resources/movies.csv b/docs/StardustDocs/resources/movies.csv
@@ -0,0 +1,21 @@
+movieId,title,genres
+9b30aff7943f44579e92c261f3adc193,Women in Black (1997),Fantasy|Suspenseful|Comedy
+2a1ba1fc5caf492a80188e032995843e,Bumblebee Movie (2007),Comedy|Jazz|Family|Animation
+f44ceb4771504342bb856d76c112d5a6,Magical School Boy and the Rock of Wise Men (2001),Fantasy|Growing up|Magic
+43d02fb064514ff3bd30d1e3a7398357,Master of the Jewlery: The Company of the Jewel (2001),Fantasy|Magic|Suspenseful
+6aa0d26a483148998c250b9c80ddf550,Sun Conflicts: Part IV: A Novel Espair (1977),Fantasy
+eace16e59ce24eff90bf8924eb6a926c,The Outstanding Bulk (2008),Fantasy|Superhero|Family
+ae916bc4844a4bb7b42b70d9573d05cd,In Automata (2014),Horror|Existential
+c1f0a868aeb44c5ea8d154ec3ca295ac,Interplanetary (2014),Sci-fi|Futuristic
+9595b771f87f42a3b8dd07d91e7cb328,Woods Run (1994),Family|Drama
+aa9fc400e068443488b259ea0802a975,Anthropod-Dude (2002),Superhero|Fantasy|Family|Growing up
+22d20c2ba11d44cab83aceea39dc00bd,The Chamber (2003),Comedy|Drama
+8cf4d0c1bd7b41fab6af9d92c892141f,That Thing About an Iceberg (1997),Drama|History|Family|Romance
+c2f3e7588da84684a7d78d6bd8d8e1f4,Vehicles (2006),Animation|Family
+ce06175106af4105945f245161eac3c7,Playthings Tale (1995),Animation|Family
+ee28d7e69103485c83e10b8055ef15fb,Metal Man 2 (2010),Fantasy|Superhero|Family
+c32bdeed466f4ec09de828bb4b6fc649,Surgeon Odd in the Omniverse of Crazy (2022),Fantasy|Superhero|Family|Horror
+d4a325ab648a42c4a2d6f35dfabb387f,Bad Dream on Pine Street (1984),Horror
+60ebe74947234ddcab49dea1a958faed,The Shimmering (1980),Horror
+f24327f2b05147b197ca34bf13ae3524,Krubit: Societal Teachings for Do Many Good Amazing Country of Uzbekistan (2006),Comedy
+2bb29b3a245e434fa80542e711fd2cee,This is No Movie (1950),(no genres listed)
diff --git a/docs/StardustDocs/topics/convert.md b/docs/StardustDocs/topics/convert.md
@@ -8,7 +8,7 @@ convert { columnsSelector }
     .with { rowExpression } | .perRowCol { rowColExpression } | .withValue(value)  | to<Type>() | to { colExpression }
 
 rowExpression = DataRow.(OldValue) -> NewValue
-rowColExpression = DataRow.(DataColumn) -> NewValue
+rowColExpression = (DataRow, DataColumn) -> NewValue
 colExpression = DataFrame.(DataColumn) -> DataColumn
 ```
 
diff --git a/docs/StardustDocs/topics/drop.md b/docs/StardustDocs/topics/drop.md
@@ -51,18 +51,34 @@ df.dropNulls(whereAllNull = true) { city and weight } // remove rows with null v
 
 <!---END-->
 
+## dropNaNs
+
+Remove rows with `Double.NaN` or `Float.NaN` values
+
+<!---FUN dropNaNs-->
+
+```kotlin
+df.dropNaNs() // remove rows containing NaN in any column
+df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns
+df.dropNaNs { weight } // remove rows where 'weight' is NaN
+df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN
+df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN
+```
+
+<!---END-->
+
 ## dropNA
 
 Remove rows with `null`, `Double.NaN` or `Float.NaN` values
 
 <!---FUN dropNA-->
 
 ```kotlin
-df.dropNA() // remove rows containing null or Double.NaN in any column
-df.dropNA(whereAllNA = true) // remove rows with null or Double.NaN in all columns
-df.dropNA { weight } // remove rows where 'weight' is null or Double.NaN
-df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or Double.NaN
-df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or Double.NaN
+df.dropNA() // remove rows containing null or NaN in any column
+df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns
+df.dropNA { weight } // remove rows where 'weight' is null or NaN
+df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN
+df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN
 ```
 
 <!---END-->
diff --git a/docs/StardustDocs/topics/flatten.md b/docs/StardustDocs/topics/flatten.md
@@ -11,13 +11,36 @@ flatten  [ { columns } ]
 Columns after flattening will keep their original names. Potential column name clashes are resolved by adding minimal possible name prefix from ancestor columns.
 
 <!---FUN flatten-->
+<tabs>
+<tab title="Properties">
 
 ```kotlin
 // name.firstName -> firstName
 // name.lastName -> lastName
 df.flatten { name }
 ```
 
+</tab>
+<tab title="Accessors">
+
+```kotlin
+val name by columnGroup()
+val firstName by name.column<String>()
+val lastName by name.column<String>()
+
+// name.firstName -> firstName
+// name.lastName -> lastName
+df.flatten(name)
+```
+
+</tab>
+<tab title="Strings">
+
+```kotlin
+df.flatten("name")
+```
+
+</tab></tabs>
 <!---END-->
 
 To remove all column groupings in [`DataFrame`](DataFrame.md), invoke `flatten` without parameters:
diff --git a/docs/StardustDocs/topics/gettingStarted.md b/docs/StardustDocs/topics/gettingStarted.md
diff --git a/docs/StardustDocs/topics/operations.md b/docs/StardustDocs/topics/operations.md
diff --git a/docs/StardustDocs/topics/overview.md b/docs/StardustDocs/topics/overview.md
diff --git a/docs/StardustDocs/topics/update.md b/docs/StardustDocs/topics/update.md
diff --git a/docs/StardustDocs/v.list b/docs/StardustDocs/v.list