Skip to content

Commit 5bf8463

Browse files
committed
Merge branch 'master' into dfs-rename3
# Conflicts: # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt # core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt # core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt # core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt # core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt # core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt
2 parents 9f40e89 + ea7569e commit 5bf8463

File tree

13 files changed

+127
-23
lines changed

13 files changed

+127
-23
lines changed

build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ fun detectVersion(): String {
8080
if (rootProject.findProperty("build.number.detection") == "true") {
8181
"$versionProp-dev-$buildNumber"
8282
} else {
83-
buildNumber
83+
error("use build.number + build.number.detection = true or release build")
8484
}
8585
} else if (hasProperty("release")) {
8686
versionProp

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@ import kotlin.reflect.KProperty
99

1010
// region DataFrame
1111

12-
public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
12+
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { all() }
1313

14-
public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
14+
public fun <T, C> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns)
1515

16-
public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumnSet() }
16+
public fun <T> DataFrame<T>.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { columns.toColumnSet() }
1717

18-
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
19-
flattenImpl { columns.toColumnSet() }
18+
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
19+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2020

21-
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> =
22-
flattenImpl { columns.toColumnSet() }
21+
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
22+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2323

2424
// endregion

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,29 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam
4343
public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)
4444

4545
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
46+
// recursively rename all column groups to camel case
4647
.rename {
47-
// rename groups first, because groups and their children cannot be renamed/moved at the same time
4848
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
4949
}.toCamelCase()
5050

51+
// recursively rename all other columns to camel case
5152
.rename {
5253
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
5354
}.toCamelCase()
5455

56+
// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
5557
.update {
5658
colsOf<AnyFrame>().recursively()
5759
}.with { it.renameToCamelCase() }
5860

61+
// convert all first chars of all columns to the lowercase
62+
.rename {
63+
allDfs()
64+
}.into {
65+
it.name.replaceFirstChar { it.lowercaseChar() }
66+
}
67+
68+
5969
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
6070
into(*newColumns.map { it.name() }.toTypedArray())
6171

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet
1414

1515
internal fun <T, C> DataFrame<T>.flattenImpl(
1616
columns: ColumnsSelector<T, C>,
17+
keepParentNameForColumns: Boolean = false
1718
): DataFrame<T> {
1819
val rootColumns = getColumnsWithPaths {
1920
columns.toColumnSet().filter { it.isColumnGroup() }.roots()
@@ -31,7 +32,8 @@ internal fun <T, C> DataFrame<T>.flattenImpl(
3132
.into {
3233
val targetPath = getRootPrefix(it.path).dropLast(1)
3334
val nameGen = nameGenerators[targetPath]!!
34-
val name = nameGen.addUnique(it.name())
35+
val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name()
36+
val name = nameGen.addUnique(preferredName)
3537
targetPath + name
3638
}
3739
return result

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,37 @@ class FlattenTests {
7474
flattened.getColumnGroup("f").columnNames() shouldBe listOf("a", "b", "c")
7575
flattened.ungroup("f") shouldBe df
7676
}
77+
78+
@Test
79+
fun `flatten the aggregation and check column names`() {
80+
val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")(
81+
"Alice", "Cooper", 15, "London", 54, true,
82+
"Bob", "Dylan", 45, "Dubai", 87, true,
83+
"Charlie", "Daniels", 20, "Moscow", 35, false,
84+
"Charlie", "Chaplin", 40, "Milan", 41, true,
85+
"Bob", "Marley", 30, "Tokyo", 68, true,
86+
"Alice", "Wolf", 20, "Milan", 55, false,
87+
"Charlie", "Byrd", 30, "Moscow", 90, true
88+
).cast<Person>()
89+
90+
val aggregate = df.groupBy("city")
91+
.aggregate {
92+
mean() into "mean"
93+
std() into "std"
94+
}
95+
96+
aggregate
97+
.flatten(keepParentNameForColumns = true)
98+
.columnNames() shouldBe listOf("city", "age.mean", "weight.mean", "age.std", "weight.std")
99+
}
100+
101+
@DataSchema
102+
interface Person {
103+
val age: Int
104+
val city: String?
105+
val firstName: String
106+
val lastName: String
107+
val weight: Int?
108+
val isHappy: Boolean
109+
}
77110
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class MoveTests {
1313
val grouped = df.move { cols { it.name.contains(".") } }.into { it.name.split(".").toPath() }
1414

1515
@Test
16-
fun batchGrouping() {
16+
fun `batch grouping`() {
1717
grouped.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r")
1818
grouped["a"].asColumnGroup().columnNames() shouldBe listOf("b", "c")
1919
grouped["a"]["c"].asColumnGroup().columnNames() shouldBe listOf("d")
@@ -35,7 +35,7 @@ class MoveTests {
3535
}
3636

3737
@Test
38-
fun batchUngrouping() {
38+
fun `batch ungrouping`() {
3939
val ungrouped = grouped.move {
4040
cols { it.depth() > 0 && !it.isColumnGroup() }.rec()
4141
}.into { pathOf(it.path.joinToString(".")) }

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,11 @@ class RenameTests {
5555
}
5656
}
5757
}
58+
59+
@Test
60+
fun `rename to camelCase`() {
61+
val dfWithUpperCaseColumnNames = dataFrameOf("First_Column", "second_column", "ThirdColumn")(1, 2, 3)
62+
val df = dfWithUpperCaseColumnNames.renameToCamelCase()
63+
df.columnNames() shouldBe listOf("firstColumn", "secondColumn", "thirdColumn")
64+
}
5865
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@ import kotlin.reflect.KProperty
99

1010
// region DataFrame
1111

12-
public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
12+
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { all() }
1313

14-
public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
14+
public fun <T, C> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns)
1515

16-
public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumnSet() }
16+
public fun <T> DataFrame<T>.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { columns.toColumnSet() }
1717

18-
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
19-
flattenImpl { columns.toColumnSet() }
18+
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
19+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2020

21-
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> =
22-
flattenImpl { columns.toColumnSet() }
21+
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
22+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2323

2424
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,29 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam
4343
public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)
4444

4545
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
46+
// recursively rename all column groups to camel case
4647
.rename {
47-
// rename groups first, because groups and their children cannot be renamed/moved at the same time
4848
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
4949
}.toCamelCase()
5050

51+
// recursively rename all other columns to camel case
5152
.rename {
5253
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
5354
}.toCamelCase()
5455

56+
// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
5557
.update {
5658
colsOf<AnyFrame>().recursively()
5759
}.with { it.renameToCamelCase() }
5860

61+
// convert all first chars of all columns to the lowercase
62+
.rename {
63+
cols { !it.isColumnGroup() }.recursively()
64+
}.into {
65+
it.name.replaceFirstChar { it.lowercaseChar() }
66+
}
67+
68+
5969
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
6070
into(*newColumns.map { it.name() }.toTypedArray())
6171

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet
1414

1515
internal fun <T, C> DataFrame<T>.flattenImpl(
1616
columns: ColumnsSelector<T, C>,
17+
keepParentNameForColumns: Boolean = false
1718
): DataFrame<T> {
1819
val rootColumns = getColumnsWithPaths {
1920
columns.toColumnSet().filter { it.isColumnGroup() }.roots()
@@ -31,7 +32,8 @@ internal fun <T, C> DataFrame<T>.flattenImpl(
3132
.into {
3233
val targetPath = getRootPrefix(it.path).dropLast(1)
3334
val nameGen = nameGenerators[targetPath]!!
34-
val name = nameGen.addUnique(it.name())
35+
val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name()
36+
val name = nameGen.addUnique(preferredName)
3537
targetPath + name
3638
}
3739
return result

0 commit comments

Comments
 (0)