Skip to content

Commit 3101982

Browse files
authored
Adds the parent name in the flatten operation (#378)
* Added the test from the ticket * Fixed the missed parent name * Fixed the formatting and moved the test * Removed unused imports * Regenerated docs + fixed tests
1 parent 7db77c7 commit 3101982

File tree

8 files changed

+94
-24
lines changed

8 files changed

+94
-24
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@ import kotlin.reflect.KProperty
99

1010
// region DataFrame
1111

12-
public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
12+
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { all() }
1313

14-
public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
14+
public fun <T, C> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns)
1515

16-
public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumnSet() }
16+
public fun <T> DataFrame<T>.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { columns.toColumnSet() }
1717

18-
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
19-
flattenImpl { columns.toColumnSet() }
18+
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
19+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2020

21-
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> =
22-
flattenImpl { columns.toColumnSet() }
21+
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
22+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2323

2424
// endregion

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
1313
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet
1414

1515
internal fun <T, C> DataFrame<T>.flattenImpl(
16-
columns: ColumnsSelector<T, C>
16+
columns: ColumnsSelector<T, C>,
17+
keepParentNameForColumns: Boolean = false
1718
): DataFrame<T> {
1819
val rootColumns = getColumnsWithPaths { columns.toColumnSet().filter { it.isColumnGroup() }.top() }
1920
val rootPrefixes = rootColumns.map { it.path }.toSet()
@@ -29,7 +30,8 @@ internal fun <T, C> DataFrame<T>.flattenImpl(
2930
.into {
3031
val targetPath = getRootPrefix(it.path).dropLast(1)
3132
val nameGen = nameGenerators[targetPath]!!
32-
val name = nameGen.addUnique(it.name())
33+
val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name()
34+
val name = nameGen.addUnique(preferredName)
3335
targetPath + name
3436
}
3537
return result

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,37 @@ class FlattenTests {
7474
flattened.getColumnGroup("f").columnNames() shouldBe listOf("a", "b", "c")
7575
flattened.ungroup("f") shouldBe df
7676
}
77+
78+
@Test
79+
fun `flatten the aggregation and check column names`() {
80+
val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")(
81+
"Alice", "Cooper", 15, "London", 54, true,
82+
"Bob", "Dylan", 45, "Dubai", 87, true,
83+
"Charlie", "Daniels", 20, "Moscow", 35, false,
84+
"Charlie", "Chaplin", 40, "Milan", 41, true,
85+
"Bob", "Marley", 30, "Tokyo", 68, true,
86+
"Alice", "Wolf", 20, "Milan", 55, false,
87+
"Charlie", "Byrd", 30, "Moscow", 90, true
88+
).cast<Person>()
89+
90+
val aggregate = df.groupBy("city")
91+
.aggregate {
92+
mean() into "mean"
93+
std() into "std"
94+
}
95+
96+
aggregate
97+
.flatten(keepParentNameForColumns = true)
98+
.columnNames() shouldBe listOf("city", "age.mean", "weight.mean", "age.std", "weight.std")
99+
}
100+
101+
@DataSchema
102+
interface Person {
103+
val age: Int
104+
val city: String?
105+
val firstName: String
106+
val lastName: String
107+
val weight: Int?
108+
val isHappy: Boolean
109+
}
77110
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class MoveTests {
1313
val grouped = df.move { cols { it.name.contains(".") } }.into { it.name.split(".").toPath() }
1414

1515
@Test
16-
fun batchGrouping() {
16+
fun `batch grouping`() {
1717
grouped.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r")
1818
grouped["a"].asColumnGroup().columnNames() shouldBe listOf("b", "c")
1919
grouped["a"]["c"].asColumnGroup().columnNames() shouldBe listOf("d")
@@ -33,7 +33,7 @@ class MoveTests {
3333
}
3434

3535
@Test
36-
fun batchUngrouping() {
36+
fun `batch ungrouping`() {
3737
val ungrouped = grouped.move { dfs { it.depth() > 0 && !it.isColumnGroup() } }.into { pathOf(it.path.joinToString(".")) }
3838
ungrouped.columnNames() shouldBe listOf("q", "a.b", "a.c.d", "b.c", "b.d", "w", "e.f", "r")
3939
}
@@ -64,7 +64,7 @@ class MoveTests {
6464
}
6565

6666
@Test
67-
fun `selectDfs`() {
67+
fun `select Dfs`() {
6868
val selected = grouped.select { it["a"].dfs { !it.isColumnGroup() } }
6969
selected.columnNames() shouldBe listOf("b", "d")
7070
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@ import kotlin.reflect.KProperty
99

1010
// region DataFrame
1111

12-
public fun <T> DataFrame<T>.flatten(): DataFrame<T> = flatten { all() }
12+
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { all() }
1313

14-
public fun <T, C> DataFrame<T>.flatten(columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns)
14+
public fun <T, C> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, columns: ColumnsSelector<T, C>): DataFrame<T> = flattenImpl(columns, keepParentNameForColumns)
1515

16-
public fun <T> DataFrame<T>.flatten(vararg columns: String): DataFrame<T> = flattenImpl { columns.toColumnSet() }
16+
public fun <T> DataFrame<T>.flatten(vararg columns: String, keepParentNameForColumns: Boolean = false): DataFrame<T> = flatten(keepParentNameForColumns) { columns.toColumnSet() }
1717

18-
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>): DataFrame<T> =
19-
flattenImpl { columns.toColumnSet() }
18+
public fun <T, C> DataFrame<T>.flatten(vararg columns: ColumnReference<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
19+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2020

21-
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>): DataFrame<T> =
22-
flattenImpl { columns.toColumnSet() }
21+
public fun <T, C> DataFrame<T>.flatten(vararg columns: KProperty<C>, keepParentNameForColumns: Boolean = false): DataFrame<T> =
22+
flatten(keepParentNameForColumns) { columns.toColumnSet() }
2323

2424
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
1313
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet
1414

1515
internal fun <T, C> DataFrame<T>.flattenImpl(
16-
columns: ColumnsSelector<T, C>
16+
columns: ColumnsSelector<T, C>,
17+
keepParentNameForColumns: Boolean = false
1718
): DataFrame<T> {
1819
val rootColumns = getColumnsWithPaths { columns.toColumnSet().filter { it.isColumnGroup() }.top() }
1920
val rootPrefixes = rootColumns.map { it.path }.toSet()
@@ -29,7 +30,8 @@ internal fun <T, C> DataFrame<T>.flattenImpl(
2930
.into {
3031
val targetPath = getRootPrefix(it.path).dropLast(1)
3132
val nameGen = nameGenerators[targetPath]!!
32-
val name = nameGen.addUnique(it.name())
33+
val preferredName = if (keepParentNameForColumns) "${it.name()}.${it.parentName}" else it.name()
34+
val name = nameGen.addUnique(preferredName)
3335
targetPath + name
3436
}
3537
return result

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,37 @@ class FlattenTests {
7474
flattened.getColumnGroup("f").columnNames() shouldBe listOf("a", "b", "c")
7575
flattened.ungroup("f") shouldBe df
7676
}
77+
78+
@Test
79+
fun `flatten the aggregation and check column names`() {
80+
val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")(
81+
"Alice", "Cooper", 15, "London", 54, true,
82+
"Bob", "Dylan", 45, "Dubai", 87, true,
83+
"Charlie", "Daniels", 20, "Moscow", 35, false,
84+
"Charlie", "Chaplin", 40, "Milan", 41, true,
85+
"Bob", "Marley", 30, "Tokyo", 68, true,
86+
"Alice", "Wolf", 20, "Milan", 55, false,
87+
"Charlie", "Byrd", 30, "Moscow", 90, true
88+
).cast<Person>()
89+
90+
val aggregate = df.groupBy("city")
91+
.aggregate {
92+
mean() into "mean"
93+
std() into "std"
94+
}
95+
96+
aggregate
97+
.flatten(keepParentNameForColumns = true)
98+
.columnNames() shouldBe listOf("city", "age.mean", "weight.mean", "age.std", "weight.std")
99+
}
100+
101+
@DataSchema
102+
interface Person {
103+
val age: Int
104+
val city: String?
105+
val firstName: String
106+
val lastName: String
107+
val weight: Int?
108+
val isHappy: Boolean
109+
}
77110
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class MoveTests {
1313
val grouped = df.move { cols { it.name.contains(".") } }.into { it.name.split(".").toPath() }
1414

1515
@Test
16-
fun batchGrouping() {
16+
fun `batch grouping`() {
1717
grouped.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r")
1818
grouped["a"].asColumnGroup().columnNames() shouldBe listOf("b", "c")
1919
grouped["a"]["c"].asColumnGroup().columnNames() shouldBe listOf("d")
@@ -33,7 +33,7 @@ class MoveTests {
3333
}
3434

3535
@Test
36-
fun batchUngrouping() {
36+
fun `batch ungrouping`() {
3737
val ungrouped = grouped.move { dfs { it.depth() > 0 && !it.isColumnGroup() } }.into { pathOf(it.path.joinToString(".")) }
3838
ungrouped.columnNames() shouldBe listOf("q", "a.b", "a.c.d", "b.c", "b.d", "w", "e.f", "r")
3939
}
@@ -64,7 +64,7 @@ class MoveTests {
6464
}
6565

6666
@Test
67-
fun `selectDfs`() {
67+
fun `select Dfs`() {
6868
val selected = grouped.select { it["a"].dfs { !it.isColumnGroup() } }
6969
selected.columnNames() shouldBe listOf("b", "d")
7070
}

0 commit comments

Comments
 (0)