Skip to content

Commit 2eac6b6

Browse files
authored
Merge pull request #419 from Kotlin/rename
Fixed rename behavior
2 parents c59802d + 53c6531 commit 2eac6b6

File tree

12 files changed

+352
-48
lines changed

12 files changed

+352
-48
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
77
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
88
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
9+
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
910
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1011
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
1112
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
13+
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
1214
import org.jetbrains.kotlinx.dataframe.impl.columnName
1315
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
1416
import org.jetbrains.kotlinx.dataframe.util.ITERABLE_COLUMNS_DEPRECATION_MESSAGE
@@ -42,47 +44,47 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam
4244

4345
public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)
4446

47+
/**
48+
* ## Rename to camelCase
49+
*
50+
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
51+
* and converting the first char to lowercase.
52+
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
53+
*/
4554
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
46-
// recursively rename all column groups to camel case
55+
// recursively rename all columns written with delimiters or starting with a capital to camel case
4756
.rename {
48-
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
49-
}.toCamelCase()
50-
51-
// recursively rename all other columns to camel case
52-
.rename {
53-
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
57+
cols { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }.recursively()
5458
}.toCamelCase()
5559

5660
// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
5761
.update {
5862
colsOf<AnyFrame>().recursively()
5963
}.with { it.renameToCamelCase() }
6064

61-
// convert all first chars of all columns to the lowercase
62-
.rename {
63-
cols { !it.isColumnGroup() }.recursively()
64-
}.into {
65-
it.name.replaceFirstChar { it.lowercaseChar() }
66-
}
67-
6865
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
6966
into(*newColumns.map { it.name() }.toTypedArray())
7067

7168
public fun <T, C> RenameClause<T, C>.into(vararg newNames: String): DataFrame<T> =
72-
df.move(columns).intoIndexed { col, index ->
73-
col.path.dropLast(1) + newNames[index]
74-
}
69+
renameImpl(newNames)
7570

7671
public fun <T, C> RenameClause<T, C>.into(vararg newNames: KProperty<*>): DataFrame<T> =
7772
into(*newNames.map { it.name }.toTypedArray())
7873

7974
public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> String): DataFrame<T> =
80-
df.move(columns).into {
81-
it.path.dropLast(1) + transform(it)
82-
}
83-
84-
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> =
85-
into { it.name().toCamelCaseByDelimiters(DELIMITERS_REGEX) }
75+
renameImpl(transform)
76+
77+
/**
78+
* ## Rename to camelCase
79+
*
80+
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
81+
* and converting the first char to lowercase.
82+
*/
83+
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into {
84+
it.name()
85+
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
86+
.replaceFirstChar { it.lowercaseChar() }
87+
}
8688

8789
// endregion
8890

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
5656
}
5757
}
5858

59+
/* TODO: Issue #418: breaks if running on ColumnGroup and its child */
5960
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
6061
val removeResult = df.removeImpl(columns = columns)
6162
val toInsert = removeResult.removedColumns.map {
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package org.jetbrains.kotlinx.dataframe.impl.api
2+
3+
import org.jetbrains.kotlinx.dataframe.DataFrame
4+
import org.jetbrains.kotlinx.dataframe.api.RenameClause
5+
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
6+
import org.jetbrains.kotlinx.dataframe.api.cast
7+
import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
8+
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
9+
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
10+
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
11+
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
12+
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.allChildrenNotNull
13+
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.collectTree
14+
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.map
15+
import org.jetbrains.kotlinx.dataframe.kind
16+
17+
internal fun <T, C> RenameClause<T, C>.renameImpl(newNames: Array<out String>): DataFrame<T> {
18+
var i = 0
19+
return renameImpl { newNames[i++] }
20+
}
21+
22+
internal fun <T, C> RenameClause<T, C>.renameImpl(transform: (ColumnWithPath<C>) -> String): DataFrame<T> {
23+
// get all selected columns and their paths
24+
val selectedColumnsWithPath = df.getColumnsWithPaths(columns)
25+
.associateBy { it.data }
26+
// gather a tree of all columns where the nodes will be renamed
27+
val tree = df.getColumnsWithPaths { all().rec() }.collectTree()
28+
29+
// perform rename in nodes
30+
tree.allChildrenNotNull().forEach { node ->
31+
// Check if the current node/column is a selected column and, if so, get its ColumnWithPath
32+
val column = selectedColumnsWithPath[node.data] ?: return@forEach
33+
// Use the found selected ColumnWithPath to query for the new name
34+
val newColumnName = transform(column)
35+
node.name = newColumnName
36+
}
37+
38+
// use the mapping function to convert the tree to a ColumnGroup/ValueColumn structure
39+
// The result will be a ColumnGroup, since the root node's data is null
40+
val renamedDfAsColumnGroup = tree.map { node, children ->
41+
val col = node.data
42+
when (col?.kind) {
43+
// if the column is a value column or a frame column, rename it using the node's (new) name
44+
ColumnKind.Value, ColumnKind.Frame ->
45+
col.rename(node.name)
46+
47+
// if the column is a group column, create a new column group using the node's (new) name and children
48+
// if the column is null, node is the root, so we'll create a column group as well
49+
ColumnKind.Group, null ->
50+
children
51+
.toDataFrame()
52+
.asColumnGroup(node.name)
53+
}
54+
} as ColumnGroup<*>
55+
56+
// convert the created ColumnGroup to a DataFrame
57+
val renamedDf = renamedDfAsColumnGroup.columns().toDataFrame()
58+
return renamedDf.cast()
59+
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dataframe.impl.columns.tree
33
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
44

55
internal class TreeNode<T>(
6-
override val name: String,
6+
override var name: String,
77
override val depth: Int,
88
override var data: T,
99
override val parent: TreeNode<T>? = null,

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.jetbrains.kotlinx.dataframe.impl.columns.tree
22

33
import org.jetbrains.kotlinx.dataframe.AnyCol
4+
import org.jetbrains.kotlinx.dataframe.DataFrame
45
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
56
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
67
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
@@ -61,6 +62,16 @@ internal fun <T> TreeNode<T>.topmostChildrenExcluding(excludeRoot: TreeNode<*>):
6162
return result
6263
}
6364

65+
/**
66+
* Mapping function for [ReadonlyTreeNodes][ReadonlyTreeNode] (like [TreeNode])
67+
* which can convert the tree-structure (depth-first) to any other tree-type structure (e.g. [DataFrame]).
68+
*/
69+
@Suppress("UNCHECKED_CAST")
70+
internal fun <T : ReadonlyTreeNode<*>, R> T.map(operation: (node: T, children: List<R>) -> R): R {
71+
val children = children.map { (it as T).map(operation) }
72+
return operation(this, children)
73+
}
74+
6475
internal fun <T> TreeNode<T?>.allChildrenNotNull(): List<TreeNode<T>> =
6576
allChildren { it.data != null } as List<TreeNode<T>>
6677

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,76 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn
77
import org.junit.Test
88

99
class RenameTests {
10+
companion object {
11+
val simpleDf = dataFrameOf("a", "b", "c")(
12+
1, 2, 3,
13+
4, 5, 6,
14+
)
15+
val groupedDf = simpleDf.group { "a" and "b" }.into("group")
16+
17+
val doubleGroupedDf = groupedDf.group { "group"["a"] }.into { "group"["aGroup"] }
18+
}
19+
20+
@Test
21+
fun `simple rename`() {
22+
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
23+
1, 2, 3,
24+
4, 5, 6,
25+
)
26+
27+
simpleDf.rename { all() }.into { it.name + "_renamed" } shouldBe renamedDf
28+
simpleDf.rename { all() }.into("a_renamed", "b_renamed", "c_renamed") shouldBe renamedDf
29+
}
30+
31+
@Test
32+
fun `partial grouped rename`() {
33+
val renamedDf = dataFrameOf("a_renamed", "b", "c")(
34+
1, 2, 3,
35+
4, 5, 6,
36+
).group { "a_renamed" and "b" }.into("group_renamed")
37+
38+
groupedDf
39+
.rename { "group" and "group"["a"] }
40+
.into { it.name + "_renamed" } shouldBe renamedDf
41+
}
42+
43+
@Test
44+
fun `grouped rename`() {
45+
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
46+
1, 2, 3,
47+
4, 5, 6,
48+
).group { "a_renamed" and "b_renamed" }.into("group_renamed")
49+
50+
groupedDf
51+
.rename { all().recursively() }
52+
.into { it.name + "_renamed" } shouldBe renamedDf
53+
}
54+
55+
@Test
56+
fun `double grouped rename in 3 steps`() {
57+
val renamedDf = dataFrameOf("a_renamed", "b_renamed", "c_renamed")(
58+
1, 2, 3,
59+
4, 5, 6,
60+
).group { "a_renamed" and "b_renamed" }.into("group_renamed")
61+
.group { "group_renamed"["a_renamed"] }.into { "group_renamed"["aGroup_renamed"] }
62+
63+
doubleGroupedDf
64+
.rename { all().recursively() }
65+
.into { it.name + "_renamed" } shouldBe renamedDf
66+
}
67+
}
68+
69+
class RenameToCamelCaseTests {
1070
companion object {
1171
val nestedDf = dataFrameOf("test_name")(dataFrameOf("another_name")(1))
1272
val nestedColumnGroup = dataFrameOf("test_name")(
1373
dataFrameOf("another_name")(1).first()
1474
)
75+
val doublyNestedColumnGroup = dataFrameOf("test_name")(
76+
dataFrameOf("another_name")(
77+
dataFrameOf("third_name")(1).first()
78+
).first()
79+
)
1580
val deeplyNestedDf = kotlin.run {
1681
val df = dataFrameOf("another_name")(1)
1782
val rowWithDf = dataFrameOf("group_name")(df).first()
@@ -36,6 +101,20 @@ class RenameTests {
36101
df.getColumnGroup("testName").columnNames() shouldBe listOf("anotherName")
37102
}
38103

104+
@Test
105+
fun `doubly nested row`() {
106+
val doublyNestedColumnGroup = dataFrameOf("test_name")(
107+
dataFrameOf("another_name")(
108+
dataFrameOf("third_name")(1).first()
109+
).first()
110+
)
111+
112+
val df = doublyNestedColumnGroup.renameToCamelCase()
113+
df.columnNames() shouldBe listOf("testName")
114+
df["testName"].asColumnGroup().columnNames() shouldBe listOf("anotherName")
115+
df["testName"]["anotherName"].asColumnGroup().columnNames() shouldBe listOf("thirdName")
116+
}
117+
39118
@Test
40119
fun `deeply nested df`() {
41120
val df = deeplyNestedDf.renameToCamelCase()

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
77
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
88
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
9+
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
910
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1011
import org.jetbrains.kotlinx.dataframe.impl.DELIMITED_STRING_REGEX
1112
import org.jetbrains.kotlinx.dataframe.impl.DELIMITERS_REGEX
13+
import org.jetbrains.kotlinx.dataframe.impl.api.renameImpl
1214
import org.jetbrains.kotlinx.dataframe.impl.columnName
1315
import org.jetbrains.kotlinx.dataframe.impl.toCamelCaseByDelimiters
1416
import org.jetbrains.kotlinx.dataframe.util.ITERABLE_COLUMNS_DEPRECATION_MESSAGE
@@ -42,47 +44,47 @@ public fun <T, C> DataFrame<T>.rename(cols: Iterable<ColumnReference<C>>): Renam
4244

4345
public data class RenameClause<T, C>(val df: DataFrame<T>, val columns: ColumnsSelector<T, C>)
4446

47+
/**
48+
* ## Rename to camelCase
49+
*
50+
* This function renames all columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
51+
* and converting the first char to lowercase.
52+
* Even [DataFrames][DataFrame] inside [FrameColumns][FrameColumn] are traversed recursively.
53+
*/
4554
public fun <T> DataFrame<T>.renameToCamelCase(): DataFrame<T> = this
46-
// recursively rename all column groups to camel case
55+
// recursively rename all columns written with delimiters or starting with a capital to camel case
4756
.rename {
48-
groups { it.name() matches DELIMITED_STRING_REGEX }.recursively()
49-
}.toCamelCase()
50-
51-
// recursively rename all other columns to camel case
52-
.rename {
53-
cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively()
57+
cols { it.name() matches DELIMITED_STRING_REGEX || it.name[0].isUpperCase() }.recursively()
5458
}.toCamelCase()
5559

5660
// take all frame columns recursively and call renameToCamelCase() on all dataframes inside
5761
.update {
5862
colsOf<AnyFrame>().recursively()
5963
}.with { it.renameToCamelCase() }
6064

61-
// convert all first chars of all columns to the lowercase
62-
.rename {
63-
cols { !it.isColumnGroup() }.recursively()
64-
}.into {
65-
it.name.replaceFirstChar { it.lowercaseChar() }
66-
}
67-
6865
public fun <T, C> RenameClause<T, C>.into(vararg newColumns: ColumnReference<*>): DataFrame<T> =
6966
into(*newColumns.map { it.name() }.toTypedArray())
7067

7168
public fun <T, C> RenameClause<T, C>.into(vararg newNames: String): DataFrame<T> =
72-
df.move(columns).intoIndexed { col, index ->
73-
col.path.dropLast(1) + newNames[index]
74-
}
69+
renameImpl(newNames)
7570

7671
public fun <T, C> RenameClause<T, C>.into(vararg newNames: KProperty<*>): DataFrame<T> =
7772
into(*newNames.map { it.name }.toTypedArray())
7873

7974
public fun <T, C> RenameClause<T, C>.into(transform: (ColumnWithPath<C>) -> String): DataFrame<T> =
80-
df.move(columns).into {
81-
it.path.dropLast(1) + transform(it)
82-
}
83-
84-
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> =
85-
into { it.name().toCamelCaseByDelimiters(DELIMITERS_REGEX) }
75+
renameImpl(transform)
76+
77+
/**
78+
* ## Rename to camelCase
79+
*
80+
* Renames the selected columns to `camelCase` by replacing all [delimiters][DELIMITERS_REGEX]
81+
* and converting the first char to lowercase.
82+
*/
83+
public fun <T, C> RenameClause<T, C>.toCamelCase(): DataFrame<T> = into {
84+
it.name()
85+
.toCamelCaseByDelimiters(DELIMITERS_REGEX)
86+
.replaceFirstChar { it.lowercaseChar() }
87+
}
8688

8789
// endregion
8890

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
5656
}
5757
}
5858

59+
/* TODO: Issue #418: breaks if running on ColumnGroup and its child */
5960
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
6061
val removeResult = df.removeImpl(columns = columns)
6162
val toInsert = removeResult.removedColumns.map {

0 commit comments

Comments
 (0)