Skip to content

Commit 022e7b1

Browse files
committed
wip allColsExcept and fixing errors which show ColumnPaths
1 parent 04c0e56 commit 022e7b1

File tree

10 files changed

+395
-97
lines changed

10 files changed

+395
-97
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/allExcept.kt

Lines changed: 89 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
88
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
99
import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
1010
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
11+
import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy
1112
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1213
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
1314
import org.jetbrains.kotlinx.dataframe.documentation.UsageTemplateColumnsSelectionDsl.UsageTemplate
1415
import org.jetbrains.kotlinx.dataframe.impl.aggregation.toColumns
15-
import org.jetbrains.kotlinx.dataframe.impl.columns.addParentPath
1616
import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExceptAndUnpack
1717
import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExceptKeepingStructure
1818
import org.jetbrains.kotlinx.dataframe.impl.columns.changePath
1919
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnSet
20+
import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn
2021
import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle
2122
import org.jetbrains.kotlinx.dataframe.impl.getColumnsWithPaths
2223
import kotlin.reflect.KProperty
@@ -215,10 +216,10 @@ public interface AllExceptColumnsSelectionDsl<out T> {
215216
// region SingleColumn
216217

217218
public infix fun <C> SingleColumn<DataRow<C>>.allColsExcept(selector: ColumnsSelector<C, *>): ColumnSet<*> =
218-
allColsExceptInternal(selector.toColumns(), false)
219+
allColsExceptInternal(selector.toColumns())
219220

220221
public infix fun SingleColumn<DataRow<*>>.allColsExcept(other: ColumnsResolver<*>): ColumnSet<*> =
221-
allColsExceptInternal(other, true)
222+
allColsExceptInternal(other)
222223

223224
public fun SingleColumn<DataRow<*>>.allColsExcept(vararg other: ColumnsResolver<*>): ColumnSet<*> =
224225
allColsExcept(other.toColumnSet())
@@ -235,11 +236,12 @@ public interface AllExceptColumnsSelectionDsl<out T> {
235236
public fun SingleColumn<DataRow<*>>.allColsExcept(vararg others: KProperty<*>): ColumnSet<*> =
236237
allColsExcept(others.toColumnSet())
237238

238-
public infix fun SingleColumn<DataRow<*>>.allColsExcept(other: ColumnPath): ColumnSet<*> =
239-
allColsExcept(column<Any?>(other))
239+
// reference and path
240+
public infix fun SingleColumn<DataRow<*>>.allColsExcept(other: ColumnReference<*>): ColumnSet<*> =
241+
allColsExceptInternal(other)
240242

241-
public fun SingleColumn<DataRow<*>>.allColsExcept(vararg others: ColumnPath): ColumnSet<*> =
242-
allColsExcept(others.toColumnSet())
243+
public fun SingleColumn<DataRow<*>>.allColsExcept(vararg other: ColumnReference<*>): ColumnSet<*> =
244+
allColsExceptInternal(*other)
243245

244246
// endregion
245247

@@ -266,11 +268,11 @@ public interface AllExceptColumnsSelectionDsl<out T> {
266268
public fun String.allColsExcept(vararg others: KProperty<*>): ColumnSet<*> =
267269
allColsExcept(others.toColumnSet())
268270

269-
public fun String.allColsExcept(other: ColumnPath): ColumnSet<*> =
271+
public fun String.allColsExcept(other: ColumnReference<*>): ColumnSet<*> =
270272
columnGroup(this).allColsExcept(other)
271273

272-
public fun String.allColsExcept(vararg others: ColumnPath): ColumnSet<*> =
273-
allColsExcept(others.toColumnSet())
274+
public fun String.allColsExcept(vararg others: ColumnReference<*>): ColumnSet<*> =
275+
columnGroup(this).allColsExcept(*others)
274276

275277
// endregion
276278

@@ -297,11 +299,11 @@ public interface AllExceptColumnsSelectionDsl<out T> {
297299
public fun KProperty<*>.allColsExcept(vararg others: KProperty<*>): ColumnSet<*> =
298300
allColsExcept(others.toColumnSet())
299301

300-
public infix fun KProperty<*>.allColsExcept(other: ColumnPath): ColumnSet<*> =
302+
public infix fun KProperty<*>.allColsExcept(other: ColumnReference<*>): ColumnSet<*> =
301303
columnGroup(this).allColsExcept(other)
302304

303-
public fun KProperty<*>.allColsExcept(vararg others: ColumnPath): ColumnSet<*> =
304-
allColsExcept(others.toColumnSet())
305+
public fun KProperty<*>.allColsExcept(vararg others: ColumnReference<*>): ColumnSet<*> =
306+
columnGroup(this).allColsExcept(*others)
305307

306308
// endregion
307309

@@ -328,44 +330,91 @@ public interface AllExceptColumnsSelectionDsl<out T> {
328330
public fun ColumnPath.allColsExcept(vararg others: KProperty<*>): ColumnSet<*> =
329331
allColsExcept(others.toColumnSet())
330332

331-
public infix fun ColumnPath.allColsExcept(other: ColumnPath): ColumnSet<*> =
333+
public infix fun ColumnPath.allColsExcept(other: ColumnReference<*>): ColumnSet<*> =
332334
columnGroup(this).allColsExcept(other)
333335

334-
public fun ColumnPath.allColsExcept(vararg others: ColumnPath): ColumnSet<*> =
335-
allColsExcept(others.toColumnSet())
336+
public fun ColumnPath.allColsExcept(vararg others: ColumnReference<*>): ColumnSet<*> =
337+
columnGroup(this).allColsExcept(*others)
336338

337339
// endregion
338340

339341
// endregion
340342

341-
private fun SingleColumn<DataRow<*>>.allColsExceptInternal(other: ColumnsResolver<*>, allowFullPaths: Boolean) =
343+
/**
344+
* streamlines column references such that both relative and absolute paths can be used
345+
*/
346+
// TODO remove this overload again
347+
private fun SingleColumn<DataRow<*>>.allColsExceptInternal(vararg others: ColumnReference<*>): ColumnSet<*> =
348+
allColsExceptInternal(others.toColumnSet())
349+
// transformSingleWithContext { col ->
350+
// val correctedOthers = others.map {
351+
// it.path().dropStartWrt(col.path)
352+
// }
353+
// allColsExceptInternal(correctedOthers.toColumnSet()).resolve(this)
354+
// }
355+
356+
private fun SingleColumn<DataRow<*>>.allColsExceptInternal(other: ColumnsResolver<*>) =
342357
createColumnSet { context ->
343-
this.ensureIsColumnGroup().resolveSingle(context)?.let { col ->
344-
require(col.isColumnGroup()) {
345-
"Column ${col.path} is not a ColumnGroup and can thus not be excepted from."
358+
val col = this.ensureIsColumnGroup().resolveSingle(context)
359+
?: return@createColumnSet emptyList()
360+
val colGroup = col.asColumnGroup()
361+
val colPath = col.path
362+
363+
val parentScope = (this@AllExceptColumnsSelectionDsl as ColumnsSelectionDsl<T>)
364+
.asSingleColumn()
365+
val parentCol = parentScope.ensureIsColumnGroup().resolveSingle(context)
366+
?: return@createColumnSet emptyList()
367+
val parentColGroup = parentCol.asColumnGroup()
368+
val parentPath = parentCol.path
369+
370+
val allCols = colGroup.getColumnsWithPaths { all() }
371+
372+
val colsToExceptRelativeToParent = parentColGroup
373+
.getColumnsWithPaths(UnresolvedColumnsPolicy.Skip) { other }
374+
375+
376+
val colsToExceptRelativeToCol = colGroup
377+
.getColumnsWithPaths(UnresolvedColumnsPolicy.Skip) { other }
378+
379+
// throw exceptions for columns that weren't in this or parent scope
380+
(colsToExceptRelativeToParent + colsToExceptRelativeToCol).groupBy { it.path }
381+
.forEach { (path, cols) ->
382+
if (cols.all { it.data.isMissingColumn() }) {
383+
throw IllegalArgumentException(
384+
"Column ${(colPath + path).joinToString()} and ${(parentPath + path).joinToString()} not found."
385+
)
386+
}
346387
}
347388

348-
val allCols = col.asColumnGroup()
349-
.getColumnsWithPaths { all() }
350-
351-
// try to resolve all columns to except relative to the current column
352-
try {
353-
val columnsToExcept = col.asColumnGroup()
354-
.getColumnsWithPaths(context.unresolvedColumnsPolicy) { other }
355-
356-
allCols.allColumnsExceptKeepingStructure(columnsToExcept)
357-
.map { it.changePath(col.path + it.path) }
358-
} catch (e: IllegalStateException) {
359-
// if allowed, attempt to resole all columns to except absolutely too if relative failed
360-
if (allowFullPaths) {
361-
val allColsAbsolute = allCols.map { it.addParentPath(col.path) }
362-
val columnsToExcept = other.resolve(context)
363-
allColsAbsolute.allColumnsExceptKeepingStructure(columnsToExcept)
364-
} else {
365-
throw e
366-
}
389+
val colsToExcept = colsToExceptRelativeToCol +
390+
colsToExceptRelativeToParent.map { // adjust the path to be relative to the current column
391+
it.changePath(it.path.dropFirst(colPath.size - parentPath.size))
367392
}
368-
} ?: emptyList()
393+
394+
allCols.allColumnsExceptKeepingStructure(
395+
colsToExcept
396+
.distinctBy { it.path }
397+
.filterNot { it.data.isMissingColumn() }
398+
).map { it.changePath(col.path + it.path) }
399+
400+
// try to resolve all columns to except relative to the current column
401+
// try {
402+
// val columnsToExcept = colGroup
403+
// .getColumnsWithPaths(context.unresolvedColumnsPolicy) { other }
404+
//
405+
// allCols.allColumnsExceptKeepingStructure(columnsToExcept)
406+
// .map { it.changePath(col.path + it.path) }
407+
// } catch (e: IllegalStateException) {
408+
// // if allowed, attempt to resole all columns to except absolutely too if relative failed
409+
// if (allowFullPaths) {
410+
// val allColsAbsolute = allCols.map { it.addParentPath(col.path) }
411+
// val columnsToExcept = other.resolve(context)
412+
// allColsAbsolute.allColumnsExceptKeepingStructure(columnsToExcept)
413+
// } else {
414+
// throw e
415+
// }
416+
// }
417+
369418
}
370419
}
371420

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnPath.kt

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ public data class ColumnPath(val path: List<String>) : List<String> by path, Col
5050
*/
5151
public fun take(first: Int): ColumnPath = ColumnPath(path.take(first))
5252

53-
public fun replaceLast(name: String): ColumnPath = ColumnPath(if (path.size < 2) listOf(name) else path.dropLast(1) + name)
53+
public fun replaceLast(name: String): ColumnPath =
54+
ColumnPath(if (path.size < 2) listOf(name) else path.dropLast(1) + name)
5455

5556
/**
5657
* Returns a shortened [ColumnPath] containing just the last [last] elements.
@@ -80,3 +81,34 @@ public data class ColumnPath(val path: List<String>) : List<String> by path, Col
8081

8182
override fun <C> get(column: ColumnReference<C>): ColumnAccessor<C> = ColumnAccessorImpl(this + column.path())
8283
}
84+
85+
/**
86+
* Drops the overlapping start of the child path with respect to the parent path, and returns the resulting ColumnPath.
87+
*
88+
* For example:
89+
* ```kt
90+
* val parentPath = pathOf("a", "b", "c")
91+
* val childPath = pathOf("a", "b", "c", "d", "e")
92+
*
93+
* childPath.dropStartWrt(parentPath) // returns pathOf("d", "e")
94+
* ```
95+
*
96+
* @param otherPath The parent path to compare against.
97+
* @return The resulting ColumnPath after dropping the overlapping start.
98+
*/
99+
internal fun ColumnPath.dropStartWrt(otherPath: ColumnPath): ColumnPath {
100+
val first = dropOverlappingStartOfChild(parent = otherPath, child = this)
101+
return ColumnPath(first)
102+
}
103+
104+
internal fun <T> dropOverlappingStartOfChild(parent: List<T>, child: List<T>): List<T> {
105+
var indexToRemoveTill = 0
106+
for (i in child.indices) {
107+
val subFirst = child.subList(0, i + 1)
108+
val sufficientSubSecond = parent.subList(maxOf(0, parent.size - (i + 1)), parent.size)
109+
if (subFirst == sufficientSubSecond) {
110+
indexToRemoveTill = i + 1
111+
}
112+
}
113+
return child.subList(indexToRemoveTill, child.size)
114+
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ internal open class DataFrameReceiver<T>(
4646
host = this@DataFrameReceiver,
4747
).asDataColumn().cast()
4848

49-
UnresolvedColumnsPolicy.Fail -> error("Column $path not found among ${df.columnNames()}.")
49+
UnresolvedColumnsPolicy.Fail -> error("Column ${path.joinToString()} not found among ${df.columnNames()}.")
5050
}
5151

5252
is MissingDataColumn -> this

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ internal fun <C> ColumnsContainer<*>.getColumn(path: ColumnPath, policy: Unresol
361361
getColumnOrNull(path)?.cast()
362362
?: when (policy) {
363363
UnresolvedColumnsPolicy.Fail ->
364-
error("Column not found: $path")
364+
error("Column not found: ${path.joinToString()}")
365365

366366
UnresolvedColumnsPolicy.Skip -> null
367367
UnresolvedColumnsPolicy.Create -> DataColumn.empty().cast<C>()
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.columns.dropOverlappingStartOfChild
5+
import org.jetbrains.kotlinx.dataframe.columns.dropStartWrt
6+
import org.jetbrains.kotlinx.dataframe.samples.api.TestBase
7+
import org.junit.Test
8+
9+
class ColumnPathTests : TestBase() {
10+
11+
@Test
12+
fun `should trim overlapping start of first list from the end of second list`() {
13+
val parent = pathOf("something", "name", "firstName")
14+
val child = pathOf("name", "firstName", "secondName")
15+
16+
dropOverlappingStartOfChild(parent, child) shouldBe listOf("secondName")
17+
child.dropStartWrt(parent) shouldBe pathOf("secondName")
18+
}
19+
20+
@Test
21+
fun `should return first list as is when there is no overlap`() {
22+
val parent = pathOf("city", "country")
23+
val child = pathOf("name", "firstName", "secondName")
24+
25+
dropOverlappingStartOfChild(parent, child) shouldBe listOf("name", "firstName", "secondName")
26+
child.dropStartWrt(parent) shouldBe pathOf("name", "firstName", "secondName")
27+
}
28+
29+
@Test
30+
fun `should return empty list when first list is completely overlapped`() {
31+
val parent = pathOf("city", "name", "firstName")
32+
val child = pathOf("name", "firstName")
33+
34+
dropOverlappingStartOfChild(parent, child) shouldBe emptyList()
35+
child.dropStartWrt(parent) shouldBe pathOf()
36+
}
37+
38+
@Test
39+
fun `if parent is empty`() {
40+
val parent = pathOf()
41+
val child = pathOf("name", "firstName")
42+
43+
dropOverlappingStartOfChild(parent, child) shouldBe listOf("name", "firstName")
44+
child.dropStartWrt(parent) shouldBe pathOf("name", "firstName")
45+
}
46+
}

0 commit comments

Comments
 (0)