Skip to content

Commit 8fffbd1

Browse files
committed
fixed duplicate columns, needs more tests
1 parent e9dfab9 commit 8fffbd1

File tree

12 files changed

+140
-82
lines changed

12 files changed

+140
-82
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4430,17 +4430,14 @@ internal fun <T, C> ColumnsSelector<T, C>.filter(predicate: (ColumnWithPath<C>)
44304430
* match the given [predicate].
44314431
*/
44324432
internal fun ColumnSet<*>.colsInternal(predicate: ColumnFilter<*>): ColumnSet<*> =
4433-
when (this) {
4434-
is SingleColumn<*> -> transformSingle {
4435-
it.children().filter { predicate(it) }
4436-
}
4437-
4438-
else -> transform {
4439-
it.filter { predicate(it) }
4440-
}
4433+
transform {
4434+
if (this is SingleColumn<*> && it.singleOrNull()?.isColumnGroup() == true) {
4435+
it.single().children()
4436+
} else {
4437+
it
4438+
}.filter(predicate)
44414439
}
44424440

4443-
44444441
@Deprecated("Replaced with recursively()")
44454442
internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) =
44464443
transform { it.filter { it.isColumnGroup() }.flatMap { it.children().dfs().filter(predicate) } }

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ public interface ColumnSetTransformer {
3434

3535
public fun transformRemainingSingle(singleColumn: SingleColumn<*>): SingleColumn<*>
3636

37+
public fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*>
38+
3739
public fun transform(columnSet: ColumnSet<*>): ColumnSet<*>
3840
}
3941

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import org.jetbrains.kotlinx.dataframe.api.*
1010
import org.jetbrains.kotlinx.dataframe.api.name
1111
import org.jetbrains.kotlinx.dataframe.columns.*
1212
import org.jetbrains.kotlinx.dataframe.columns.values
13-
import org.jetbrains.kotlinx.dataframe.impl.*
1413
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
1514
import org.jetbrains.kotlinx.dataframe.impl.asNullable
1615
import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingDataColumn
@@ -134,15 +133,18 @@ internal fun <C> ColumnSet<C>.recursivelyImpl(
134133

135134
val flattenTransformer = object : ColumnSetTransformer {
136135

137-
private fun flattenColumnWithPaths(list: List<ColumnWithPath<*>>): List<ColumnWithPath<*>> =
138-
if (includeTopLevel) {
139-
list.dfs()
136+
private fun flattenColumnWithPaths(list: List<ColumnWithPath<*>>): List<ColumnWithPath<*>> {
137+
val cols = if (list.singleOrNull()?.isColumnGroup() == true) list.single().children() else list
138+
return if (includeTopLevel) {
139+
cols.dfs()
140140
} else {
141-
list
141+
cols
142142
.filter { it.isColumnGroup() }
143143
.flatMap { it.children().dfs() }
144144
}.filter { includeGroups || !it.isColumnGroup() }
145+
}
145146

147+
// TODO remove/clean
146148
override fun transformRemainingSingle(singleColumn: SingleColumn<*>): SingleColumn<*> =
147149
singleColumn.transformRemainingSingle {
148150
if (!it.isColumnGroup()) return@transformRemainingSingle it
@@ -153,6 +155,14 @@ internal fun <C> ColumnSet<C>.recursivelyImpl(
153155
DataColumn.createColumnGroup(it.name, res.toDataFrame()).addPath(it.path())
154156
}
155157

158+
// TODO remove/clean
159+
override fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*> = singleColumn.transformSingle {
160+
if (!it.isColumnGroup()) return@transformSingle listOf(it)
161+
162+
val list = it.asColumnGroup().getColumnsWithPaths { all() }
163+
flattenColumnWithPaths(list)
164+
}
165+
156166
override fun transform(columnSet: ColumnSet<*>): ColumnSet<*> = columnSet.transform(::flattenColumnWithPaths)
157167
}
158168

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,21 +141,21 @@ open class ColumnsSelectionDslTests : TestBase() {
141141
).shouldAllBeEqual()
142142

143143
listOf(
144-
dfGroup.select { name.firstNames },
144+
dfGroup.select { name.firstName },
145145

146-
dfGroup.select { colGroup("name").colGroup("firstNames") },
147-
dfGroup.select { colGroup("name").colGroup<FirstNames>("firstNames") },
146+
dfGroup.select { colGroup("name").colGroup("firstName") },
147+
dfGroup.select { colGroup("name").colGroup<FirstNames>("firstName") },
148148

149-
dfGroup.select { colGroup("name").colGroup(pathOf("firstNames")) },
150-
dfGroup.select { colGroup("name").colGroup<FirstNames>(pathOf("firstNames")) },
149+
dfGroup.select { colGroup("name").colGroup(pathOf("firstName")) },
150+
dfGroup.select { colGroup("name").colGroup<FirstNames>(pathOf("firstName")) },
151151

152-
dfGroup.select { colGroup("name").colGroup(Name2::firstNames) },
152+
dfGroup.select { colGroup("name").colGroup(Name2::firstName) },
153153
).shouldAllBeEqual()
154154

155155
dfGroup.select {
156-
"name"["firstNames"]["firstName", "secondName"]
156+
"name"["firstName"]["firstName", "secondName"]
157157
} shouldBe dfGroup.select {
158-
name.firstNames["firstName"] and name.firstNames["secondName"]
158+
name.firstName["firstName"] and name.firstName["secondName"]
159159
}
160160
}
161161

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/recursively.kt

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,33 @@ package org.jetbrains.kotlinx.dataframe.api
22

33
import io.kotest.matchers.shouldBe
44
import org.jetbrains.kotlinx.dataframe.alsoDebug
5+
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
56
import org.jetbrains.kotlinx.dataframe.samples.api.TestBase
67
import org.junit.Test
78

89
class Recursively : TestBase() {
9-
private val recursivelyGoal = dfGroup.select { dfs { true } }.reorderColumnsBy { name }.alsoDebug() // correct
10-
private val recursivelyNoGroups = dfGroup.select { allDfs(false) }.reorderColumnsBy { name }.alsoDebug() // correct
11-
private val recursivelyString = dfGroup.select { dfsOf<String?>() }.reorderColumnsBy { name }.alsoDebug()
10+
11+
fun List<ColumnWithPath<*>>.print() {
12+
forEach {
13+
if (it.isValueColumn()) println("${it.name}: ${it.type()}")
14+
else it.print()
15+
}
16+
println()
17+
}
18+
19+
infix fun List<ColumnWithPath<*>>.shouldBe(other: List<ColumnWithPath<*>>) {
20+
this.map { it.name to it.path } shouldBe other.map { it.name to it.path }
21+
}
22+
23+
private val recursivelyGoal = dfGroup.getColumnsWithPaths { dfs { true } }
24+
.sortedBy { it.name }
25+
.also { it.print() }
26+
private val recursivelyNoGroups = dfGroup.getColumnsWithPaths { allDfs(false) }
27+
.sortedBy { it.name }
28+
.also { it.print() }
29+
private val recursivelyString = dfGroup.getColumnsWithPaths { dfsOf<String?>() }
30+
.sortedBy { it.name }
31+
.also { it.print() }
1232

1333
@Test
1434
fun `recursively all`() {
@@ -30,43 +50,43 @@ class Recursively : TestBase() {
3050

3151
@Test
3252
fun recursively() {
33-
dfGroup.select { recursively() }.reorderColumnsBy { name } shouldBe recursivelyGoal
34-
dfGroup.select { rec(includeGroups = false) }.reorderColumnsBy { name } shouldBe recursivelyNoGroups
53+
dfGroup.getColumnsWithPaths { recursively() }.sortedBy { it.name } shouldBe recursivelyGoal
54+
dfGroup.getColumnsWithPaths { rec(includeGroups = false) }.sortedBy { it.name } shouldBe recursivelyNoGroups
3555
}
3656

3757
@Test
3858
fun `all recursively`() {
39-
dfGroup.select { all().recursively() }.reorderColumnsBy { name } shouldBe recursivelyGoal
40-
dfGroup.select { all().rec(includeGroups = false) }.reorderColumnsBy { name } shouldBe recursivelyNoGroups
59+
dfGroup.getColumnsWithPaths { all().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal
60+
dfGroup.getColumnsWithPaths { all().rec(includeGroups = false) }.sortedBy { it.name } shouldBe recursivelyNoGroups
4161
}
4262

4363
@Test
4464
fun `cols recursively`() {
45-
dfGroup.select { cols().recursively() }.reorderColumnsBy { name } shouldBe recursivelyGoal
46-
dfGroup.select { cols().rec(includeGroups = false) }.reorderColumnsBy { name } shouldBe recursivelyNoGroups
65+
dfGroup.getColumnsWithPaths { cols().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal
66+
dfGroup.getColumnsWithPaths { cols().rec(includeGroups = false) }.sortedBy { it.name } shouldBe recursivelyNoGroups
4767
}
4868

4969
@Test
5070
fun `colsOf recursively`() {
51-
dfGroup.select { colsOf<String?>().recursively() }.reorderColumnsBy { name } shouldBe recursivelyString
52-
dfGroup.select { colsOf<String?>().rec(includeGroups = false) }.reorderColumnsBy { name } shouldBe recursivelyString
71+
dfGroup.getColumnsWithPaths { colsOf<String?>().recursively() }.sortedBy { it.name } shouldBe recursivelyString
72+
dfGroup.getColumnsWithPaths { colsOf<String?>().rec(includeGroups = false) }.sortedBy { it.name } shouldBe recursivelyString
5373
}
5474

5575
@Test
5676
fun `allRecursively`() {
57-
dfGroup.select { allRecursively() }.reorderColumnsBy { name } shouldBe recursivelyGoal
58-
dfGroup.select { allRec(includeGroups = false) }.reorderColumnsBy { name } shouldBe recursivelyNoGroups
77+
dfGroup.getColumnsWithPaths { allRecursively() }.sortedBy { it.name } shouldBe recursivelyGoal
78+
dfGroup.getColumnsWithPaths { allRec(includeGroups = false) }.sortedBy { it.name } shouldBe recursivelyNoGroups
5979
}
6080

6181
@Test
6282
fun `all allRecursively`() {
63-
dfGroup.select { all().allRecursively() }.reorderColumnsBy { name } shouldBe recursivelyGoal
64-
dfGroup.select { all().allRec(includeGroups = false) }.reorderColumnsBy { name } shouldBe recursivelyNoGroups
83+
dfGroup.getColumnsWithPaths { all().allRecursively() }.sortedBy { it.name } shouldBe recursivelyGoal
84+
dfGroup.getColumnsWithPaths { all().allRec(includeGroups = false) }.sortedBy { it.name } shouldBe recursivelyNoGroups
6585
}
6686

6787
@Test
6888
fun `cols allRecursively`() {
69-
dfGroup.select { cols().allRecursively() }.reorderColumnsBy { name } shouldBe recursivelyGoal
70-
dfGroup.select { cols().allRec(includeGroups = false) }.reorderColumnsBy { name } shouldBe recursivelyNoGroups
89+
dfGroup.getColumnsWithPaths { cols().allRecursively() }.sortedBy { it.name } shouldBe recursivelyGoal
90+
dfGroup.getColumnsWithPaths { cols().allRec(includeGroups = false) }.sortedBy { it.name } shouldBe recursivelyNoGroups
7191
}
7292
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public open class TestBase {
2626

2727
dataFrameOf(firstName, secondName, thirdName)
2828
.cast<FirstNames>(verify = true)
29-
.asColumnGroup("firstNames")
29+
.asColumnGroup("firstName")
3030
}.cast<Person2>(verify = true)
3131

3232
@DataSchema
@@ -53,7 +53,7 @@ public open class TestBase {
5353

5454
@DataSchema
5555
interface Name2 {
56-
val firstNames: DataRow<FirstNames>
56+
val firstName: DataRow<FirstNames>
5757
val lastName: String
5858
}
5959

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2046,17 +2046,14 @@ internal fun <T, C> ColumnsSelector<T, C>.filter(predicate: (ColumnWithPath<C>)
20462046
* match the given [predicate].
20472047
*/
20482048
internal fun ColumnSet<*>.colsInternal(predicate: ColumnFilter<*>): ColumnSet<*> =
2049-
when (this) {
2050-
is SingleColumn<*> -> transformSingle {
2051-
it.children().filter { predicate(it) }
2052-
}
2053-
2054-
else -> transform {
2055-
it.filter { predicate(it) }
2056-
}
2049+
transform {
2050+
if (this is SingleColumn<*> && it.singleOrNull()?.isColumnGroup() == true) {
2051+
it.single().children()
2052+
} else {
2053+
it
2054+
}.filter(predicate)
20572055
}
20582056

2059-
20602057
@Deprecated("Replaced with recursively()")
20612058
internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) =
20622059
transform { it.filter { it.isColumnGroup() }.flatMap { it.children().dfs().filter(predicate) } }

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ public interface ColumnSetTransformer {
3434

3535
public fun transformRemainingSingle(singleColumn: SingleColumn<*>): SingleColumn<*>
3636

37+
public fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*>
38+
3739
public fun transform(columnSet: ColumnSet<*>): ColumnSet<*>
3840
}
3941

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import org.jetbrains.kotlinx.dataframe.api.*
1010
import org.jetbrains.kotlinx.dataframe.api.name
1111
import org.jetbrains.kotlinx.dataframe.columns.*
1212
import org.jetbrains.kotlinx.dataframe.columns.values
13-
import org.jetbrains.kotlinx.dataframe.impl.*
1413
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
1514
import org.jetbrains.kotlinx.dataframe.impl.asNullable
1615
import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingDataColumn
@@ -134,15 +133,18 @@ internal fun <C> ColumnSet<C>.recursivelyImpl(
134133

135134
val flattenTransformer = object : ColumnSetTransformer {
136135

137-
private fun flattenColumnWithPaths(list: List<ColumnWithPath<*>>): List<ColumnWithPath<*>> =
138-
if (includeTopLevel) {
139-
list.dfs()
136+
private fun flattenColumnWithPaths(list: List<ColumnWithPath<*>>): List<ColumnWithPath<*>> {
137+
val cols = if (list.singleOrNull()?.isColumnGroup() == true) list.single().children() else list
138+
return if (includeTopLevel) {
139+
cols.dfs()
140140
} else {
141-
list
141+
cols
142142
.filter { it.isColumnGroup() }
143143
.flatMap { it.children().dfs() }
144144
}.filter { includeGroups || !it.isColumnGroup() }
145+
}
145146

147+
// TODO remove/clean
146148
override fun transformRemainingSingle(singleColumn: SingleColumn<*>): SingleColumn<*> =
147149
singleColumn.transformRemainingSingle {
148150
if (!it.isColumnGroup()) return@transformRemainingSingle it
@@ -153,6 +155,14 @@ internal fun <C> ColumnSet<C>.recursivelyImpl(
153155
DataColumn.createColumnGroup(it.name, res.toDataFrame()).addPath(it.path())
154156
}
155157

158+
// TODO remove/clean
159+
override fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*> = singleColumn.transformSingle {
160+
if (!it.isColumnGroup()) return@transformSingle listOf(it)
161+
162+
val list = it.asColumnGroup().getColumnsWithPaths { all() }
163+
flattenColumnWithPaths(list)
164+
}
165+
156166
override fun transform(columnSet: ColumnSet<*>): ColumnSet<*> = columnSet.transform(::flattenColumnWithPaths)
157167
}
158168

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,21 +141,21 @@ open class ColumnsSelectionDslTests : TestBase() {
141141
).shouldAllBeEqual()
142142

143143
listOf(
144-
dfGroup.select { name.firstNames },
144+
dfGroup.select { name.firstName },
145145

146-
dfGroup.select { colGroup("name").colGroup("firstNames") },
147-
dfGroup.select { colGroup("name").colGroup<FirstNames>("firstNames") },
146+
dfGroup.select { colGroup("name").colGroup("firstName") },
147+
dfGroup.select { colGroup("name").colGroup<FirstNames>("firstName") },
148148

149-
dfGroup.select { colGroup("name").colGroup(pathOf("firstNames")) },
150-
dfGroup.select { colGroup("name").colGroup<FirstNames>(pathOf("firstNames")) },
149+
dfGroup.select { colGroup("name").colGroup(pathOf("firstName")) },
150+
dfGroup.select { colGroup("name").colGroup<FirstNames>(pathOf("firstName")) },
151151

152-
dfGroup.select { colGroup("name").colGroup(Name2::firstNames) },
152+
dfGroup.select { colGroup("name").colGroup(Name2::firstName) },
153153
).shouldAllBeEqual()
154154

155155
dfGroup.select {
156-
"name"["firstNames"]["firstName", "secondName"]
156+
"name"["firstName"]["firstName", "secondName"]
157157
} shouldBe dfGroup.select {
158-
name.firstNames["firstName"] and name.firstNames["secondName"]
158+
name.firstName["firstName"] and name.firstName["secondName"]
159159
}
160160
}
161161

0 commit comments

Comments
 (0)