Skip to content

Commit dece7a4

Browse files
committed
Merge branch 'master' into enable-kdocs-plugin
2 parents 5f54638 + 62ab8a3 commit dece7a4

File tree

11 files changed

+178
-17
lines changed

11 files changed

+178
-17
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,35 @@ public fun <T> DataColumn<T?>.dropNA(): DataColumn<T> =
151151
}
152152

153153
// endregion
154+
155+
// region dropNaNs
156+
157+
public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false, selector: ColumnsSelector<T, *>): DataFrame<T> {
158+
val cols = this[selector]
159+
160+
return if (whereAllNaN) drop { cols.all { this[it].isNaN } }
161+
else drop { cols.any { this[it].isNaN } }
162+
}
163+
164+
public fun <T> DataFrame<T>.dropNaNs(vararg cols: KProperty<*>, whereAllNaN: Boolean = false): DataFrame<T> =
165+
dropNaNs(whereAllNaN) { cols.toColumns() }
166+
167+
public fun <T> DataFrame<T>.dropNaNs(vararg cols: String, whereAllNaN: Boolean = false): DataFrame<T> =
168+
dropNaNs(whereAllNaN) { cols.toColumns() }
169+
170+
public fun <T> DataFrame<T>.dropNaNs(vararg cols: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame<T> =
171+
dropNaNs(whereAllNaN) { cols.toColumns() }
172+
173+
public fun <T> DataFrame<T>.dropNaNs(cols: Iterable<AnyColumnReference>, whereAllNaN: Boolean = false): DataFrame<T> =
174+
dropNaNs(whereAllNaN) { cols.toColumnSet() }
175+
176+
public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false): DataFrame<T> =
177+
dropNaNs(whereAllNaN) { all() }
178+
179+
public fun <T> DataColumn<T>.dropNaNs(): DataColumn<T> =
180+
when (typeClass) {
181+
Double::class, Float::class -> filter { !it.isNaN }.cast()
182+
else -> this
183+
}
184+
185+
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,22 @@ package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
44
import org.jetbrains.kotlinx.dataframe.DataFrame
5+
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
56
import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl
7+
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
8+
import kotlin.reflect.KProperty
69

710
// region DataFrame
811

912
public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
1013

11-
public fun <T, C> DataFrame<T>.flatten(
12-
columns: ColumnsSelector<T, C>
13-
): DataFrame<T> = flattenImpl(columns)
14+
public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
15+
16+
public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumns() }
17+
18+
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> = flattenImpl { columns.toColumns() }
19+
20+
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
21+
flattenImpl { columns.toColumns() }
1422

1523
// endregion

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.DataRow
5+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
46
import org.junit.Test
57

68
class FlattenTests {
@@ -13,6 +15,41 @@ class FlattenTests {
1315
grouped.add("a") { 0 }.flatten().columnNames() shouldBe listOf("a1", "b", "c", "a")
1416
}
1517

18+
@DataSchema
19+
interface TestRow {
20+
val a: String
21+
val b: String
22+
val c: String
23+
}
24+
25+
@DataSchema
26+
interface Grouped {
27+
val d: DataRow<TestRow>
28+
}
29+
30+
@Test
31+
fun `flatten access APIs`() {
32+
val df = dataFrameOf("a", "b", "c")(1, 2, 3)
33+
val grouped = df.group("a", "b").into("d")
34+
35+
// String API
36+
grouped.flatten("d") shouldBe df
37+
val castedGroupedDF = grouped.cast<Grouped>()
38+
39+
// KProperties API
40+
castedGroupedDF.flatten(Grouped::d) shouldBe df
41+
42+
// Extension properties API
43+
castedGroupedDF.flatten { d } shouldBe df
44+
45+
// Column accessors API
46+
val d by columnGroup()
47+
val a by d.column<String>()
48+
val b by d.column<String>()
49+
val c by d.column<String>()
50+
grouped.flatten(d) shouldBe df
51+
}
52+
1653
@Test
1754
fun `flatten nested`() {
1855
val df = dataFrameOf("a", "b", "c", "d")(1, 2, 3, 4)

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -409,14 +409,25 @@ class Access : TestBase() {
409409
// SampleEnd
410410
}
411411

412+
@Test
413+
fun dropNaNs() {
414+
// SampleStart
415+
df.dropNaNs() // remove rows containing NaN in any column
416+
df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns
417+
df.dropNaNs { weight } // remove rows where 'weight' is NaN
418+
df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN
419+
df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN
420+
// SampleEnd
421+
}
422+
412423
@Test
413424
fun dropNA() {
414425
// SampleStart
415-
df.dropNA() // remove rows containing null or Double.NaN in any column
416-
df.dropNA(whereAllNA = true) // remove rows with null or Double.NaN in all columns
417-
df.dropNA { weight } // remove rows where 'weight' is null or Double.NaN
418-
df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or Double.NaN
419-
df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or Double.NaN
426+
df.dropNA() // remove rows containing null or NaN in any column
427+
df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns
428+
df.dropNA { weight } // remove rows where 'weight' is null or NaN
429+
df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN
430+
df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN
420431
// SampleEnd
421432
}
422433

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1052,14 +1052,44 @@ class Modify : TestBase() {
10521052
}
10531053

10541054
@Test
1055-
fun flatten() {
1055+
fun flatten_properties() {
10561056
// SampleStart
10571057
// name.firstName -> firstName
10581058
// name.lastName -> lastName
10591059
df.flatten { name }
10601060
// SampleEnd
10611061
}
10621062

1063+
@Test
1064+
fun flatten_strings() {
1065+
// SampleStart
1066+
// name.firstName -> firstName
1067+
// name.lastName -> lastName
1068+
df.flatten("name")
1069+
// SampleEnd
1070+
}
1071+
1072+
@Test
1073+
fun flatten_accessors() {
1074+
// SampleStart
1075+
val name by columnGroup()
1076+
val firstName by name.column<String>()
1077+
val lastName by name.column<String>()
1078+
// name.firstName -> firstName
1079+
// name.lastName -> lastName
1080+
df.flatten(name)
1081+
// SampleEnd
1082+
}
1083+
1084+
@Test
1085+
fun flatten_KProperties() {
1086+
// SampleStart
1087+
// name.firstName -> firstName
1088+
// name.lastName -> lastName
1089+
df.flatten(df::name)
1090+
// SampleEnd
1091+
}
1092+
10631093
@Test
10641094
fun flattenAll() {
10651095
// SampleStart

docs/StardustDocs/topics/convert.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ convert { columnsSelector }
88
.with { rowExpression } | .perRowCol { rowColExpression } | .withValue(value) | to<Type>() | to { colExpression }
99
1010
rowExpression = DataRow.(OldValue) -> NewValue
11-
rowColExpression = DataRow.(DataColumn) -> NewValue
11+
rowColExpression = (DataRow, DataColumn) -> NewValue
1212
colExpression = DataFrame.(DataColumn) -> DataColumn
1313
```
1414

docs/StardustDocs/topics/drop.md

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,34 @@ df.dropNulls(whereAllNull = true) { city and weight } // remove rows with null v
5151

5252
<!---END-->
5353

54+
## dropNaNs
55+
56+
Remove rows with `Double.NaN` or `Float.NaN` values
57+
58+
<!---FUN dropNaNs-->
59+
60+
```kotlin
61+
df.dropNaNs() // remove rows containing NaN in any column
62+
df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns
63+
df.dropNaNs { weight } // remove rows where 'weight' is NaN
64+
df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN
65+
df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN
66+
```
67+
68+
<!---END-->
69+
5470
## dropNA
5571

5672
Remove rows with `null`, `Double.NaN` or `Float.NaN` values
5773

5874
<!---FUN dropNA-->
5975

6076
```kotlin
61-
df.dropNA() // remove rows containing null or Double.NaN in any column
62-
df.dropNA(whereAllNA = true) // remove rows with null or Double.NaN in all columns
63-
df.dropNA { weight } // remove rows where 'weight' is null or Double.NaN
64-
df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or Double.NaN
65-
df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or Double.NaN
77+
df.dropNA() // remove rows containing null or NaN in any column
78+
df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns
79+
df.dropNA { weight } // remove rows where 'weight' is null or NaN
80+
df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN
81+
df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN
6682
```
6783

6884
<!---END-->

docs/StardustDocs/topics/flatten.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,36 @@ flatten [ { columns } ]
1111
Columns after flattening will keep their original names. Potential column name clashes are resolved by adding minimal possible name prefix from ancestor columns.
1212

1313
<!---FUN flatten-->
14+
<tabs>
15+
<tab title="Properties">
1416

1517
```kotlin
1618
// name.firstName -> firstName
1719
// name.lastName -> lastName
1820
df.flatten { name }
1921
```
2022

23+
</tab>
24+
<tab title="Accessors">
25+
26+
```kotlin
27+
val name by columnGroup()
28+
val firstName by name.column<String>()
29+
val lastName by name.column<String>()
30+
31+
// name.firstName -> firstName
32+
// name.lastName -> lastName
33+
df.flatten(name)
34+
```
35+
36+
</tab>
37+
<tab title="Strings">
38+
39+
```kotlin
40+
df.flatten("name")
41+
```
42+
43+
</tab></tabs>
2144
<!---END-->
2245

2346
To remove all column groupings in [`DataFrame`](DataFrame.md), invoke `flatten` without parameters:

docs/StardustDocs/topics/operations.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Most multiplex operations end with `into` or `with` function. The following nami
5252
* [cumSum](cumSum.md) — cumulative sum of column values
5353
* [describe](describe.md) — basic column statistics
5454
* [distinct](distinct.md) / [distinctBy](distinct.md#distinctby) — remove duplicated rows
55-
* [drop](drop.md) / [dropLast](sliceRows.md#droplast) / [dropWhile](sliceRows.md#dropwhile) / [dropNulls](drop.md#dropnulls) / [dropNA](drop.md#dropna) — remove rows by condition
55+
* [drop](drop.md) / [dropLast](sliceRows.md#droplast) / [dropWhile](sliceRows.md#dropwhile) / [dropNulls](drop.md#dropnulls) / [dropNA](drop.md#dropna) / [dropNaNs](drop.md#dropnans) — remove rows by condition
5656
* [duplicate](duplicate.md) — duplicate rows
5757
* [explode](explode.md) — spread lists and [`DataFrames`](DataFrame.md) vertically into new rows
5858
* [fillNulls](fill.md#fillnulls) / [fillNaNs](fill.md#fillnans) / [fillNA](fill.md#fillna) — replace missing values
@@ -98,6 +98,7 @@ Most multiplex operations end with `into` or `with` function. The following nami
9898
* [take](sliceRows.md#take) / [takeLast](sliceRows.md#takelast) / [takeWhile](sliceRows.md#takewhile) — get first/last rows
9999
* [toList](toList.md) / [toListOf](toList.md#tolistof) — export [`DataFrame`](DataFrame.md) into a list of data classes
100100
* [toMap](toMap.md) — export [`DataFrame`](DataFrame.md) into a map from column names to column values
101+
* [unfold](unfold.md) - unfold objects (normal class instances) in columns according to their properties
101102
* [ungroup](ungroup.md) — remove column groupings
102103
* [update](update.md) — update column values preserving column types
103104
* [values](values.md)[`Sequence`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.sequences/-sequence/) of values traversed by row or by column

docs/StardustDocs/topics/update.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ update { columns }
1313
rowCondition: DataRow.(OldValue) -> Boolean
1414
rowExpression: DataRow.(OldValue) -> NewValue
1515
colExpression: DataColumn.(DataColumn) -> NewValue
16-
rowColExpression: DataRow.(DataColumn) -> NewValue
16+
rowColExpression: (DataRow, DataColumn) -> NewValue
1717
frameExpression: DataFrame.(DataFrame) -> DataFrame
1818
```
1919

0 commit comments

Comments
 (0)