Skip to content

Commit 7d6c91e

Browse files
committed
fixed rename behavior such that groups and their children can be renamed at once.
added issue link for replace.with fixed renameToCamelCase using new replace behavior
1 parent 7b8d454 commit 7d6c91e

File tree

10 files changed

+314
-48
lines changed

10 files changed

+314
-48
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
77
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
88
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
9+
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
910
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1011
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
1112
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
13+
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
1214
import org.jetbrains.kotlinx.dataframe.impl.columnName
1315
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
1416
import org.jetbrains.kotlinx.dataframe.util.ITERABLE_COLUMNS_DEPRECATION_MESSAGE
@@ -42,47 +44,47 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam
4244

4345
public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)
4446

47+
/**
48+
* ## Rename to camelCase
49+
*
50+
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
51+
* and converting the first char to lowercase.
52+
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
53+
*/
4554
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
46-
// recursively rename all column groups to camel case
55+
// recursively rename all columns written with delimiters or starting with a capital to camel case
4756
.rename {
48-
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
49-
}.toCamelCase()
50-
51-
// recursively rename all other columns to camel case
52-
.rename {
53-
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
57+
cols { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }.recursively()
5458
}.toCamelCase()
5559

5660
// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
5761
.update {
5862
colsOf<AnyFrame>().recursively()
5963
}.with { it.renameToCamelCase() }
6064

61-
// convert all first chars of all columns to the lowercase
62-
.rename {
63-
cols { !it.isColumnGroup() }.recursively()
64-
}.into {
65-
it.name.replaceFirstChar { it.lowercaseChar() }
66-
}
67-
6865
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
6966
into(*newColumns.map { it.name() }.toTypedArray())
7067

7168
public fun <T, C> RenameClause<T, C>.into(vararg newNames: String): DataFrame<T> =
72-
df.move(columns).intoIndexed { col, index ->
73-
col.path.dropLast(1) + newNames[index]
74-
}
69+
renameImpl(newNames)
7570

7671
public fun <T, C> RenameClause<T, C>.into(vararg newNames: KProperty<*>): DataFrame<T> =
7772
into(*newNames.map { it.name }.toTypedArray())
7873

7974
public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> String): DataFrame<T> =
80-
df.move(columns).into {
81-
it.path.dropLast(1) + transform(it)
82-
}
83-
84-
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> =
85-
into { it.name().toCamelCaseByDelimiters(DELIMITERS_REGEX) }
75+
renameImpl(transform)
76+
77+
/**
78+
* ## Rename to camelCase
79+
*
80+
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
81+
* and converting the first char to lowercase.
82+
*/
83+
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into {
84+
it.name()
85+
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
86+
.replaceFirstChar { it.lowercaseChar() }
87+
}
8688

8789
// endregion
8890

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
5656
}
5757
}
5858

59+
/* TODO: Issue #418: breaks if running on ColumnGroup and its child */
5960
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
6061
val removeResult = df.removeImpl(columns = columns)
6162
val toInsert = removeResult.removedColumns.map {
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package org.jetbrains.kotlinx.dataframe.impl.api
2+
3+
import org.jetbrains.kotlinx.dataframe.DataFrame
4+
import org.jetbrains.kotlinx.dataframe.api.RenameClause
5+
import org.jetbrains.kotlinx.dataframe.api.cast
6+
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
7+
import org.jetbrains.kotlinx.dataframe.api.insert
8+
import org.jetbrains.kotlinx.dataframe.api.under
9+
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
10+
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
11+
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.allChildrenNotNull
12+
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.collectTree
13+
import org.jetbrains.kotlinx.dataframe.kind
14+
15+
16+
internal fun <T, C> RenameClause<T, C>.renameImpl(newNames: Array<out String>): DataFrame<T> {
17+
var i = 0
18+
return renameImpl { newNames[i++] }
19+
}
20+
21+
22+
internal fun <T, C> RenameClause<T, C>.renameImpl(transform: (ColumnWithPath<C>) -> String): DataFrame<T> {
23+
val selectedColumns = df.getColumnsWithPaths(columns)
24+
val tree = df.getColumnsWithPaths { all().rec() }.collectTree()
25+
26+
// perform rename in nodes
27+
tree.allChildrenNotNull().forEach { node ->
28+
val column = selectedColumns.find { it.data == node.data } ?: return@forEach
29+
val newName = transform(column)
30+
node.name = newName
31+
}
32+
33+
// build up a new DataFrame using the modified names
34+
var newDf = DataFrame.empty(df.rowsCount()).cast<T>()
35+
tree.allChildrenNotNull().forEach { node ->
36+
val path = node.pathFromRoot().dropLast(1)
37+
val col = node.data.rename(node.name)
38+
39+
when (col.kind) {
40+
ColumnKind.Value, ColumnKind.Frame ->
41+
newDf = newDf.insert(col).under(path)
42+
43+
ColumnKind.Group -> Unit
44+
}
45+
}
46+
47+
return newDf
48+
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.impl.columns.tree
33
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
44

55
internal class TreeNode<T>(
6-
override val name: String,
6+
override var name: String,
77
override val depth: Int,
88
override var data: T,
99
override val parent: TreeNode<T>? = null,

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,83 @@ package org.jetbrains.kotlinx.dataframe.api
33
import io.kotest.assertions.asClue
44
import io.kotest.assertions.throwables.shouldNotThrowAny
55
import io.kotest.matchers.shouldBe
6+
import org.jetbrains.kotlinx.dataframe.AnyRow
7+
import org.jetbrains.kotlinx.dataframe.alsoDebug
68
import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
79
import org.junit.Test
810

11+
912
class RenameTests {
13+
companion object {
14+
val simpleDf = dataFrameOf("a", "b", "c")(
15+
1, 2, 3,
16+
4, 5, 6,
17+
)
18+
val groupedDf = simpleDf.group { "a" and "b" }.into("group")
19+
20+
val doubleGroupedDf = groupedDf.group { "group"["a"] }.into { "group"["aGroup"] }
21+
}
22+
23+
@Test
24+
fun `simple rename`() {
25+
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
26+
1, 2, 3,
27+
4, 5, 6,
28+
)
29+
30+
simpleDf.rename { all() }.into { it.name + "_renamed" } shouldBe renamedDf
31+
simpleDf.rename { all() }.into("a_renamed", "b_renamed", "c_renamed") shouldBe renamedDf
32+
}
33+
34+
@Test
35+
fun `partial grouped rename`() {
36+
val renamedDf = dataFrameOf("a_renamed", "b", "c")(
37+
1, 2, 3,
38+
4, 5, 6,
39+
).group { "a_renamed" and "b" }.into("group_renamed")
40+
41+
groupedDf
42+
.rename { "group" and "group"["a"] }
43+
.into { it.name + "_renamed" } shouldBe renamedDf
44+
}
45+
46+
@Test
47+
fun `grouped rename`() {
48+
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
49+
1, 2, 3,
50+
4, 5, 6,
51+
).group { "a_renamed" and "b_renamed" }.into("group_renamed")
52+
53+
groupedDf
54+
.rename { all().recursively() }
55+
.into { it.name + "_renamed" } shouldBe renamedDf
56+
}
57+
58+
@Test
59+
fun `double grouped rename in 3 steps`() {
60+
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
61+
1, 2, 3,
62+
4, 5, 6,
63+
).group { "a_renamed" and "b_renamed" }.into("group_renamed")
64+
.group { "group_renamed"["a_renamed"] }.into { "group_renamed"["aGroup_renamed"] }
65+
66+
doubleGroupedDf
67+
.rename { all().recursively() }
68+
.into { it.name + "_renamed" } shouldBe renamedDf
69+
}
70+
}
71+
72+
class RenameToCamelCaseTests {
1073
companion object {
1174
val nestedDf = dataFrameOf("test_name")(dataFrameOf("another_name")(1))
1275
val nestedColumnGroup = dataFrameOf("test_name")(
1376
dataFrameOf("another_name")(1).first()
1477
)
78+
val doublyNestedColumnGroup = dataFrameOf("test_name")(
79+
dataFrameOf("another_name")(
80+
dataFrameOf("third_name")(1).first()
81+
).first()
82+
)
1583
val deeplyNestedDf = kotlin.run {
1684
val df = dataFrameOf("another_name")(1)
1785
val rowWithDf = dataFrameOf("group_name")(df).first()
@@ -36,6 +104,20 @@ class RenameTests {
36104
df.getColumnGroup("testName").columnNames() shouldBe listOf("anotherName")
37105
}
38106

107+
@Test
108+
fun `doubly nested row`() {
109+
val doublyNestedColumnGroup = dataFrameOf("test_name")(
110+
dataFrameOf("another_name")(
111+
dataFrameOf("third_name")(1).first()
112+
).first()
113+
)
114+
115+
val df = doublyNestedColumnGroup.renameToCamelCase()//.alsoDebug()
116+
df.columnNames() shouldBe listOf("testName")
117+
df["testName"].asColumnGroup().columnNames() shouldBe listOf("anotherName")
118+
df["testName"]["anotherName"].asColumnGroup().columnNames() shouldBe listOf("thirdName")
119+
}
120+
39121
@Test
40122
fun `deeply nested df`() {
41123
val df = deeplyNestedDf.renameToCamelCase()

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
77
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
88
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
9+
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
910
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1011
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
1112
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
13+
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
1214
import org.jetbrains.kotlinx.dataframe.impl.columnName
1315
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
1416
import org.jetbrains.kotlinx.dataframe.util.ITERABLE_COLUMNS_DEPRECATION_MESSAGE
@@ -42,47 +44,47 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam
4244

4345
public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)
4446

47+
/**
48+
* ## Rename to camelCase
49+
*
50+
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
51+
* and converting the first char to lowercase.
52+
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
53+
*/
4554
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
46-
// recursively rename all column groups to camel case
55+
// recursively rename all columns written with delimiters or starting with a capital to camel case
4756
.rename {
48-
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
49-
}.toCamelCase()
50-
51-
// recursively rename all other columns to camel case
52-
.rename {
53-
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
57+
cols { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }.recursively()
5458
}.toCamelCase()
5559

5660
// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
5761
.update {
5862
colsOf<AnyFrame>().recursively()
5963
}.with { it.renameToCamelCase() }
6064

61-
// convert all first chars of all columns to the lowercase
62-
.rename {
63-
cols { !it.isColumnGroup() }.recursively()
64-
}.into {
65-
it.name.replaceFirstChar { it.lowercaseChar() }
66-
}
67-
6865
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
6966
into(*newColumns.map { it.name() }.toTypedArray())
7067

7168
public fun <T, C> RenameClause<T, C>.into(vararg newNames: String): DataFrame<T> =
72-
df.move(columns).intoIndexed { col, index ->
73-
col.path.dropLast(1) + newNames[index]
74-
}
69+
renameImpl(newNames)
7570

7671
public fun <T, C> RenameClause<T, C>.into(vararg newNames: KProperty<*>): DataFrame<T> =
7772
into(*newNames.map { it.name }.toTypedArray())
7873

7974
public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> String): DataFrame<T> =
80-
df.move(columns).into {
81-
it.path.dropLast(1) + transform(it)
82-
}
83-
84-
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> =
85-
into { it.name().toCamelCaseByDelimiters(DELIMITERS_REGEX) }
75+
renameImpl(transform)
76+
77+
/**
78+
* ## Rename to camelCase
79+
*
80+
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
81+
* and converting the first char to lowercase.
82+
*/
83+
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into {
84+
it.name()
85+
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
86+
.replaceFirstChar { it.lowercaseChar() }
87+
}
8688

8789
// endregion
8890

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
5656
}
5757
}
5858

59+
/* TODO: Issue #418: breaks if running on ColumnGroup and its child */
5960
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
6061
val removeResult = df.removeImpl(columns = columns)
6162
val toInsert = removeResult.removedColumns.map {
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package org.jetbrains.kotlinx.dataframe.impl.api
2+
3+
import org.jetbrains.kotlinx.dataframe.DataFrame
4+
import org.jetbrains.kotlinx.dataframe.api.RenameClause
5+
import org.jetbrains.kotlinx.dataframe.api.cast
6+
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
7+
import org.jetbrains.kotlinx.dataframe.api.insert
8+
import org.jetbrains.kotlinx.dataframe.api.under
9+
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
10+
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
11+
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.allChildrenNotNull
12+
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.collectTree
13+
import org.jetbrains.kotlinx.dataframe.kind
14+
15+
16+
internal fun <T, C> RenameClause<T, C>.renameImpl(newNames: Array<out String>): DataFrame<T> {
17+
var i = 0
18+
return renameImpl { newNames[i++] }
19+
}
20+
21+
22+
internal fun <T, C> RenameClause<T, C>.renameImpl(transform: (ColumnWithPath<C>) -> String): DataFrame<T> {
23+
val selectedColumns = df.getColumnsWithPaths(columns)
24+
val tree = df.getColumnsWithPaths { all().rec() }.collectTree()
25+
26+
// perform rename in nodes
27+
tree.allChildrenNotNull().forEach { node ->
28+
val column = selectedColumns.find { it.data == node.data } ?: return@forEach
29+
val newName = transform(column)
30+
node.name = newName
31+
}
32+
33+
// build up a new DataFrame using the modified names
34+
var newDf = DataFrame.empty(df.rowsCount()).cast<T>()
35+
tree.allChildrenNotNull().forEach { node ->
36+
val path = node.pathFromRoot().dropLast(1)
37+
val col = node.data.rename(node.name)
38+
39+
when (col.kind) {
40+
ColumnKind.Value, ColumnKind.Frame ->
41+
newDf = newDf.insert(col).under(path)
42+
43+
ColumnKind.Group -> Unit
44+
}
45+
}
46+
47+
return newDf
48+
}

0 commit comments

Comments
 (0)