Merge branch 'master' into enable-kdocs-plugin

Jolanrensen · Jolanrensen · commit dece7a42ac6f · 2023-03-22T15:15:13.000+01:00
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt
@@ -151,3 +151,35 @@ public fun <T> DataColumn<T?>.dropNA(): DataColumn<T> =
     }
 
 // endregion
+
+// region dropNaNs
+
+public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false, selector: ColumnsSelector<T, *>): DataFrame<T> {
+    val cols = this[selector]
+
+    return if (whereAllNaN) drop { cols.all { this[it].isNaN } }
+    else drop { cols.any { this[it].isNaN } }
+}
+
+public fun <T> DataFrame<T>.dropNaNs(vararg cols: KProperty<*>, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumns() }
+
+public fun <T> DataFrame<T>.dropNaNs(vararg cols: String, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumns() }
+
+public fun <T> DataFrame<T>.dropNaNs(vararg cols: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumns() }
+
+public fun <T> DataFrame<T>.dropNaNs(cols: Iterable<AnyColumnReference>, whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { cols.toColumnSet() }
+
+public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false): DataFrame<T> =
+    dropNaNs(whereAllNaN) { all() }
+
+public fun <T> DataColumn<T>.dropNaNs(): DataColumn<T> =
+    when (typeClass) {
+        Double::class, Float::class -> filter { !it.isNaN }.cast()
+        else -> this
+    }
+
+// endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt
@@ -2,14 +2,22 @@ package org.jetbrains.kotlinx.dataframe.api
 
 import org.jetbrains.kotlinx.dataframe.ColumnsSelector
 import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
 import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl
+import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
+import kotlin.reflect.KProperty
 
 // region DataFrame
 
 public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
 
-public fun <T, C> DataFrame<T>.flatten(
-    columns: ColumnsSelector<T, C>
-): DataFrame<T> = flattenImpl(columns)
+public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
+
+public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumns() }
+
+public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> = flattenImpl { columns.toColumns() }
+
+public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
+    flattenImpl { columns.toColumns() }
 
 // endregion
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt
@@ -1,6 +1,8 @@
 package org.jetbrains.kotlinx.dataframe.api
 
 import io.kotest.matchers.shouldBe
+import org.jetbrains.kotlinx.dataframe.DataRow
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
 import org.junit.Test
 
 class FlattenTests {
@@ -13,6 +15,41 @@ class FlattenTests {
         grouped.add("a") { 0 }.flatten().columnNames() shouldBe listOf("a1", "b", "c", "a")
     }
 
+    @DataSchema
+    interface TestRow {
+        val a: String
+        val b: String
+        val c: String
+    }
+
+    @DataSchema
+    interface Grouped {
+        val d: DataRow<TestRow>
+    }
+
+    @Test
+    fun `flatten access APIs`() {
+        val df = dataFrameOf("a", "b", "c")(1, 2, 3)
+        val grouped = df.group("a", "b").into("d")
+
+        // String API
+        grouped.flatten("d") shouldBe df
+        val castedGroupedDF = grouped.cast<Grouped>()
+
+        // KProperties API
+        castedGroupedDF.flatten(Grouped::d) shouldBe df
+
+        // Extension properties API
+        castedGroupedDF.flatten { d } shouldBe df
+
+        // Column accessors API
+        val d by columnGroup()
+        val a by d.column<String>()
+        val b by d.column<String>()
+        val c by d.column<String>()
+        grouped.flatten(d) shouldBe df
+    }
+
     @Test
     fun `flatten nested`() {
         val df = dataFrameOf("a", "b", "c", "d")(1, 2, 3, 4)
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt
@@ -409,14 +409,25 @@ class Access : TestBase() {
         // SampleEnd
     }
 
+    @Test
+    fun dropNaNs() {
+        // SampleStart
+        df.dropNaNs() // remove rows containing NaN in any column
+        df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns
+        df.dropNaNs { weight } // remove rows where 'weight' is NaN
+        df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN
+        df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN
+        // SampleEnd
+    }
+
     @Test
     fun dropNA() {
         // SampleStart
-        df.dropNA() // remove rows containing null or Double.NaN in any column
-        df.dropNA(whereAllNA = true) // remove rows with null or Double.NaN in all columns
-        df.dropNA { weight } // remove rows where 'weight' is null or Double.NaN
-        df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or Double.NaN
-        df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or Double.NaN
+        df.dropNA() // remove rows containing null or NaN in any column
+        df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns
+        df.dropNA { weight } // remove rows where 'weight' is null or NaN
+        df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN
+        df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN
         // SampleEnd
     }
 
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt
@@ -1052,14 +1052,44 @@ class Modify : TestBase() {
     }
 
     @Test
-    fun flatten() {
+    fun flatten_properties() {
         // SampleStart
         // name.firstName -> firstName
         // name.lastName -> lastName
         df.flatten { name }
         // SampleEnd
     }
 
+    @Test
+    fun flatten_strings() {
+        // SampleStart
+        // name.firstName -> firstName
+        // name.lastName -> lastName
+        df.flatten("name")
+        // SampleEnd
+    }
+
+    @Test
+    fun flatten_accessors() {
+        // SampleStart
+        val name by columnGroup()
+        val firstName by name.column<String>()
+        val lastName by name.column<String>()
+        // name.firstName -> firstName
+        // name.lastName -> lastName
+        df.flatten(name)
+        // SampleEnd
+    }
+
+    @Test
+    fun flatten_KProperties() {
+        // SampleStart
+        // name.firstName -> firstName
+        // name.lastName -> lastName
+        df.flatten(df::name)
+        // SampleEnd
+    }
+
     @Test
     fun flattenAll() {
         // SampleStart
diff --git a/docs/StardustDocs/topics/convert.md b/docs/StardustDocs/topics/convert.md
@@ -8,7 +8,7 @@ convert { columnsSelector }
     .with { rowExpression } | .perRowCol { rowColExpression } | .withValue(value)  | to<Type>() | to { colExpression }
 
 rowExpression = DataRow.(OldValue) -> NewValue
-rowColExpression = DataRow.(DataColumn) -> NewValue
+rowColExpression = (DataRow, DataColumn) -> NewValue
 colExpression = DataFrame.(DataColumn) -> DataColumn
 ```
 
diff --git a/docs/StardustDocs/topics/drop.md b/docs/StardustDocs/topics/drop.md
@@ -51,18 +51,34 @@ df.dropNulls(whereAllNull = true) { city and weight } // remove rows with null v
 
 <!---END-->
 
+## dropNaNs
+
+Remove rows with `Double.NaN` or `Float.NaN` values
+
+<!---FUN dropNaNs-->
+
+```kotlin
+df.dropNaNs() // remove rows containing NaN in any column
+df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns
+df.dropNaNs { weight } // remove rows where 'weight' is NaN
+df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN
+df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN
+```
+
+<!---END-->
+
 ## dropNA
 
 Remove rows with `null`, `Double.NaN` or `Float.NaN` values
 
 <!---FUN dropNA-->
 
 ```kotlin
-df.dropNA() // remove rows containing null or Double.NaN in any column
-df.dropNA(whereAllNA = true) // remove rows with null or Double.NaN in all columns
-df.dropNA { weight } // remove rows where 'weight' is null or Double.NaN
-df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or Double.NaN
-df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or Double.NaN
+df.dropNA() // remove rows containing null or NaN in any column
+df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns
+df.dropNA { weight } // remove rows where 'weight' is null or NaN
+df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN
+df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN
 ```
 
 <!---END-->
diff --git a/docs/StardustDocs/topics/flatten.md b/docs/StardustDocs/topics/flatten.md
@@ -11,13 +11,36 @@ flatten  [ { columns } ]
 Columns after flattening will keep their original names. Potential column name clashes are resolved by adding minimal possible name prefix from ancestor columns.
 
 <!---FUN flatten-->
+<tabs>
+<tab title="Properties">
 
 ```kotlin
 // name.firstName -> firstName
 // name.lastName -> lastName
 df.flatten { name }
 ```
 
+</tab>
+<tab title="Accessors">
+
+```kotlin
+val name by columnGroup()
+val firstName by name.column<String>()
+val lastName by name.column<String>()
+
+// name.firstName -> firstName
+// name.lastName -> lastName
+df.flatten(name)
+```
+
+</tab>
+<tab title="Strings">
+
+```kotlin
+df.flatten("name")
+```
+
+</tab></tabs>
 <!---END-->
 
 To remove all column groupings in [`DataFrame`](DataFrame.md), invoke `flatten` without parameters:
diff --git a/docs/StardustDocs/topics/operations.md b/docs/StardustDocs/topics/operations.md
@@ -52,7 +52,7 @@ Most multiplex operations end with `into` or `with` function. The following nami
 * [cumSum](cumSum.md) — cumulative sum of column values
 * [describe](describe.md) — basic column statistics
 * [distinct](distinct.md) / [distinctBy](distinct.md#distinctby) — remove duplicated rows
-* [drop](drop.md) / [dropLast](sliceRows.md#droplast) / [dropWhile](sliceRows.md#dropwhile) / [dropNulls](drop.md#dropnulls) / [dropNA](drop.md#dropna) — remove rows by condition
+* [drop](drop.md) / [dropLast](sliceRows.md#droplast) / [dropWhile](sliceRows.md#dropwhile) / [dropNulls](drop.md#dropnulls) / [dropNA](drop.md#dropna) / [dropNaNs](drop.md#dropnans) — remove rows by condition
 * [duplicate](duplicate.md) — duplicate rows 
 * [explode](explode.md) — spread lists and [`DataFrames`](DataFrame.md) vertically into new rows
 * [fillNulls](fill.md#fillnulls) / [fillNaNs](fill.md#fillnans) / [fillNA](fill.md#fillna) — replace missing values
@@ -98,6 +98,7 @@ Most multiplex operations end with `into` or `with` function. The following nami
 * [take](sliceRows.md#take) / [takeLast](sliceRows.md#takelast) / [takeWhile](sliceRows.md#takewhile) — get first/last rows
 * [toList](toList.md) / [toListOf](toList.md#tolistof) — export [`DataFrame`](DataFrame.md) into a list of data classes
 * [toMap](toMap.md) — export [`DataFrame`](DataFrame.md) into a map from column names to column values
+* [unfold](unfold.md) - unfold objects (normal class instances) in columns according to their properties
 * [ungroup](ungroup.md) — remove column groupings
 * [update](update.md) — update column values preserving column types
 * [values](values.md) — [`Sequence`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.sequences/-sequence/) of values traversed by row or by column 
diff --git a/docs/StardustDocs/topics/update.md b/docs/StardustDocs/topics/update.md
@@ -13,7 +13,7 @@ update { columns }
 rowCondition: DataRow.(OldValue) -> Boolean
 rowExpression: DataRow.(OldValue) -> NewValue
 colExpression: DataColumn.(DataColumn) -> NewValue
-rowColExpression: DataRow.(DataColumn) -> NewValue
+rowColExpression: (DataRow, DataColumn) -> NewValue
 frameExpression: DataFrame.(DataFrame) -> DataFrame
 ```
 
diff --git a/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPlugin.kt b/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorPlugin.kt
@@ -4,6 +4,8 @@ import org.gradle.api.Plugin
 import org.gradle.api.Project
 import org.gradle.api.Task
 import org.gradle.api.file.FileCollection
+import org.gradle.api.logging.LogLevel
+import org.gradle.internal.logging.services.DefaultLoggingManager
 import org.gradle.kotlin.dsl.create
 import org.gradle.kotlin.dsl.withType
 import org.jetbrains.kotlin.gradle.dsl.ExplicitApiMode
@@ -103,6 +105,7 @@ class SchemaGeneratorPlugin : Plugin<Project> {
         val delimiters = schema.withNormalizationBy ?: extension.withNormalizationBy ?: setOf('\t', ' ', '_')
 
         return target.tasks.create("generateDataFrame${interfaceName}", GenerateDataSchemaTask::class.java) {
+            (logging as? DefaultLoggingManager)?.setLevelInternal(LogLevel.QUIET)
             group = GROUP
             data.set(schema.data)
             this.interfaceName.set(interfaceName)