Skip to content

Commit b87085d

Browse files
committed
Merge branch 'master' into jpc-kdoc-reuse
# Conflicts: # core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt
2 parents 490ecdd + a1ef553 commit b87085d

File tree

21 files changed

+384
-44
lines changed

21 files changed

+384
-44
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnArithmetics.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,4 @@ public infix fun <T> DataColumn<T>.neq(value: T): DataColumn<Boolean> = isMatchi
108108
public infix fun <T : Comparable<T>> DataColumn<T>.gt(value: T): DataColumn<Boolean> = isMatching { it > value }
109109
public infix fun <T : Comparable<T>> DataColumn<T>.lt(value: T): DataColumn<Boolean> = isMatching { it < value }
110110

111-
internal infix fun <T> DataColumn<T>.isMatching(predicate: Predicate<T>): DataColumn<Boolean> = map { predicate(it) }
111+
internal fun <T> DataColumn<T>.isMatching(predicate: Predicate<T>): DataColumn<Boolean> = map { predicate(it) }

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,3 +398,35 @@ public fun <T> DataColumn<T?>.dropNA(): DataColumn<T> =
398398
}
399399

400400
// endregion
401+
402+
// region dropNaNs
403+
404+
public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false, selector: ColumnsSelector<T, *>): DataFrame<T> {
405+
val cols = this[selector]
406+
407+
return if (whereAllNaN) drop { cols.all { this[it].isNaN } }
408+
else drop { cols.any { this[it].isNaN } }
409+
}
410+
411+
public fun <T> DataFrame<T>.dropNaNs(vararg cols: KProperty<*>, whereAllNaN: Boolean = false): DataFrame<T> =
412+
dropNaNs(whereAllNaN) { cols.toColumns() }
413+
414+
public fun <T> DataFrame<T>.dropNaNs(vararg cols: String, whereAllNaN: Boolean = false): DataFrame<T> =
415+
dropNaNs(whereAllNaN) { cols.toColumns() }
416+
417+
public fun <T> DataFrame<T>.dropNaNs(vararg cols: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame<T> =
418+
dropNaNs(whereAllNaN) { cols.toColumns() }
419+
420+
public fun <T> DataFrame<T>.dropNaNs(cols: Iterable<AnyColumnReference>, whereAllNaN: Boolean = false): DataFrame<T> =
421+
dropNaNs(whereAllNaN) { cols.toColumnSet() }
422+
423+
public fun <T> DataFrame<T>.dropNaNs(whereAllNaN: Boolean = false): DataFrame<T> =
424+
dropNaNs(whereAllNaN) { all() }
425+
426+
public fun <T> DataColumn<T>.dropNaNs(): DataColumn<T> =
427+
when (typeClass) {
428+
Double::class, Float::class -> filter { !it.isNaN }.cast()
429+
else -> this
430+
}
431+
432+
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,22 @@ package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
44
import org.jetbrains.kotlinx.dataframe.DataFrame
5+
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
56
import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl
7+
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns
8+
import kotlin.reflect.KProperty
69

710
// region DataFrame
811

912
public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
1013

11-
public fun <T, C> DataFrame<T>.flatten(
12-
columns: ColumnsSelector<T, C>
13-
): DataFrame<T> = flattenImpl(columns)
14+
public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
15+
16+
public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumns() }
17+
18+
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> = flattenImpl { columns.toColumns() }
19+
20+
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
21+
flattenImpl { columns.toColumns() }
1422

1523
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/group.kt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ public fun <T> DataFrame<T>.group(vararg columns: KProperty<*>): GroupClause<T,
2222
@JvmName("intoString")
2323
@OverloadResolutionByLambdaReturnType
2424
@OptIn(ExperimentalTypeInference::class)
25-
public infix fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> String): DataFrame<T> = df.move(columns).under { column(it).toColumnAccessor() }
25+
public fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> String): DataFrame<T> = df.move(columns).under { column(it).toColumnAccessor() }
2626

2727
@JvmName("intoColumn")
28-
public infix fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> AnyColumnReference): DataFrame<T> = df.move(columns).under(column)
29-
public infix fun <T, C> GroupClause<T, C>.into(column: String): DataFrame<T> = into(columnGroup().named(column))
30-
public infix fun <T, C> GroupClause<T, C>.into(column: AnyColumnGroupAccessor): DataFrame<T> = df.move(columns).under(column)
31-
public infix fun <T, C> GroupClause<T, C>.into(column: KProperty<*>): DataFrame<T> = into(column.columnName)
28+
public fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnWithPath<C>) -> AnyColumnReference): DataFrame<T> = df.move(columns).under(column)
29+
public fun <T, C> GroupClause<T, C>.into(column: String): DataFrame<T> = into(columnGroup().named(column))
30+
public fun <T, C> GroupClause<T, C>.into(column: AnyColumnGroupAccessor): DataFrame<T> = df.move(columns).under(column)
31+
public fun <T, C> GroupClause<T, C>.into(column: KProperty<*>): DataFrame<T> = into(column.columnName)
3232

3333
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ public fun <T, C> Update<T, C>.at(rowRange: IntRange): Update<T, C> = where { in
199199
* - {@include [SeeAlsoPerCol]}
200200
* @param expression The {@include [ExpressionsGivenRowAndColumn.RowColumnExpressionLink]} to provide a new value for every selected cell giving its row and column.
201201
*/
202-
public infix fun <T, C> Update<T, C>.perRowCol(expression: RowColumnExpression<T, C, C>): DataFrame<T> =
202+
public fun <T, C> Update<T, C>.perRowCol(expression: RowColumnExpression<T, C, C>): DataFrame<T> =
203203
updateImpl { row, column, _ -> expression(row, column) }
204204

205205
/** [Update per row col][Update.perRowCol] to provide a new value for every selected cell giving its row and column. */
@@ -222,7 +222,7 @@ public typealias UpdateExpression<T, C, R> = AddDataRow<T>.(C) -> R
222222
* - {@include [SeeAlsoPerRowCol]}
223223
* @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with.
224224
*/
225-
public infix fun <T, C> Update<T, C>.with(expression: UpdateExpression<T, C, C?>): DataFrame<T> =
225+
public fun <T, C> Update<T, C>.with(expression: UpdateExpression<T, C, C?>): DataFrame<T> =
226226
updateImpl { row, _, value ->
227227
expression(row, value)
228228
}
@@ -238,7 +238,7 @@ private interface SeeAlsoWith
238238
* {@arg [ExpressionsGivenDataFrame.OperationArg] `df.`[update][update]` { name \}.`[asFrame][asFrame]}
239239
* @param expression The {@include [ExpressionsGivenDataFrame.DataFrameExpressionLink]} to replace the selected column group with.
240240
*/
241-
public infix fun <T, C, R> Update<T, DataRow<C>>.asFrame(expression: DataFrameExpression<C, DataFrame<R>>): DataFrame<T> =
241+
public fun <T, C, R> Update<T, DataRow<C>>.asFrame(expression: DataFrameExpression<C, DataFrame<R>>): DataFrame<T> =
242242
asFrameImpl(expression)
243243

244244
@Deprecated(
@@ -447,4 +447,4 @@ public fun <T, C> Update<T, C>.withZero(): DataFrame<T> = updateWithValuePerColu
447447
*
448448
* @param value The value to set the selected rows to. In contrast to [with][Update.with], this must be the same exact type.
449449
*/
450-
public infix fun <T, C> Update<T, C>.withValue(value: C): DataFrame<T> = with { value }
450+
public fun <T, C> Update<T, C>.withValue(value: C): DataFrame<T> = with { value }

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ internal fun String.escapeHTML(): String {
261261
val str = this
262262
return buildString {
263263
for (c in str) {
264-
if (c.code > 127 || c == '"' || c == '\'' || c == '<' || c == '>' || c == '&') {
264+
if (c.code > 127 || c == '"' || c == '\'' || c == '<' || c == '>' || c == '&' || c == '\\') {
265265
append("&#")
266266
append(c.code)
267267
append(';')

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.DataRow
5+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
46
import org.junit.Test
57

68
class FlattenTests {
@@ -13,6 +15,41 @@ class FlattenTests {
1315
grouped.add("a") { 0 }.flatten().columnNames() shouldBe listOf("a1", "b", "c", "a")
1416
}
1517

18+
@DataSchema
19+
interface TestRow {
20+
val a: String
21+
val b: String
22+
val c: String
23+
}
24+
25+
@DataSchema
26+
interface Grouped {
27+
val d: DataRow<TestRow>
28+
}
29+
30+
@Test
31+
fun `flatten access APIs`() {
32+
val df = dataFrameOf("a", "b", "c")(1, 2, 3)
33+
val grouped = df.group("a", "b").into("d")
34+
35+
// String API
36+
grouped.flatten("d") shouldBe df
37+
val castedGroupedDF = grouped.cast<Grouped>()
38+
39+
// KProperties API
40+
castedGroupedDF.flatten(Grouped::d) shouldBe df
41+
42+
// Extension properties API
43+
castedGroupedDF.flatten { d } shouldBe df
44+
45+
// Column accessors API
46+
val d by columnGroup()
47+
val a by d.column<String>()
48+
val b by d.column<String>()
49+
val c by d.column<String>()
50+
grouped.flatten(d) shouldBe df
51+
}
52+
1653
@Test
1754
fun `flatten nested`() {
1855
val df = dataFrameOf("a", "b", "c", "d")(1, 2, 3, 4)

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTests.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ class RenderingTests {
5656
html shouldContain "&#60;Air France&#62;"
5757
}
5858

59+
@Test
60+
fun unicodeEscapeSequencesAreEscaped() {
61+
val df = dataFrameOf("content")("""Hello\nfrom \x and \y""")
62+
val html = df.toHTML().toString()
63+
html shouldContain "Hello&#92;nfrom &#92;x and &#92;y"
64+
}
65+
5966
@Test
6067
fun `long text is trimmed without escaping`() {
6168
val df = dataFrameOf("text")("asdfkjasdlkjfhasljkddasdasdasdasdasdasdhf")

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -409,14 +409,25 @@ class Access : TestBase() {
409409
// SampleEnd
410410
}
411411

412+
@Test
413+
fun dropNaNs() {
414+
// SampleStart
415+
df.dropNaNs() // remove rows containing NaN in any column
416+
df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns
417+
df.dropNaNs { weight } // remove rows where 'weight' is NaN
418+
df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN
419+
df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN
420+
// SampleEnd
421+
}
422+
412423
@Test
413424
fun dropNA() {
414425
// SampleStart
415-
df.dropNA() // remove rows containing null or Double.NaN in any column
416-
df.dropNA(whereAllNA = true) // remove rows with null or Double.NaN in all columns
417-
df.dropNA { weight } // remove rows where 'weight' is null or Double.NaN
418-
df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or Double.NaN
419-
df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or Double.NaN
426+
df.dropNA() // remove rows containing null or NaN in any column
427+
df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns
428+
df.dropNA { weight } // remove rows where 'weight' is null or NaN
429+
df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN
430+
df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN
420431
// SampleEnd
421432
}
422433

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1052,14 +1052,44 @@ class Modify : TestBase() {
10521052
}
10531053

10541054
@Test
1055-
fun flatten() {
1055+
fun flatten_properties() {
10561056
// SampleStart
10571057
// name.firstName -> firstName
10581058
// name.lastName -> lastName
10591059
df.flatten { name }
10601060
// SampleEnd
10611061
}
10621062

1063+
@Test
1064+
fun flatten_strings() {
1065+
// SampleStart
1066+
// name.firstName -> firstName
1067+
// name.lastName -> lastName
1068+
df.flatten("name")
1069+
// SampleEnd
1070+
}
1071+
1072+
@Test
1073+
fun flatten_accessors() {
1074+
// SampleStart
1075+
val name by columnGroup()
1076+
val firstName by name.column<String>()
1077+
val lastName by name.column<String>()
1078+
// name.firstName -> firstName
1079+
// name.lastName -> lastName
1080+
df.flatten(name)
1081+
// SampleEnd
1082+
}
1083+
1084+
@Test
1085+
fun flatten_KProperties() {
1086+
// SampleStart
1087+
// name.firstName -> firstName
1088+
// name.lastName -> lastName
1089+
df.flatten(df::name)
1090+
// SampleEnd
1091+
}
1092+
10631093
@Test
10641094
fun flattenAll() {
10651095
// SampleStart

0 commit comments

Comments
 (0)