Skip to content

Commit 81f68e6

Browse files
authored
Add support for interpreted distinct operation (#1137)
Introduced the `distinct` operation with added annotations for refinement and interpretability. Updated tests to cover the new functionality and adjusted related plugins to integrate `distinct` logic alongside `select`.
1 parent c08f5cf commit 81f68e6

File tree

5 files changed

+39
-0
lines changed

5 files changed

+39
-0
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import org.jetbrains.kotlinx.dataframe.AnyColumnReference
44
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
55
import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
7+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
8+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
79
import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions
810
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
911
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
@@ -51,6 +53,8 @@ public fun <T> DataFrame<T>.distinct(): DataFrame<T> = distinctBy { all() }
5153
* {@include [DistinctDocs]}
5254
* {@set PHRASE_ENDING the specified columns}.
5355
*/
56+
@Refine
57+
@Interpretable("Distinct0")
5458
public fun <T, C> DataFrame<T>.distinct(columns: ColumnsSelector<T, C>): DataFrame<T> = select(columns).distinct()
5559

5660
/**

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/select.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.ignore
2424
import org.jetbrains.kotlinx.dataframe.plugin.impl.type
2525
import org.jetbrains.kotlinx.dataframe.plugin.utils.Names
2626

27+
/**
28+
* NOTE: Serves both, select and distinct operations.
29+
*/
2730
internal class Select0 : AbstractInterpreter<PluginDataFrameSchema>() {
2831
val Arguments.receiver: PluginDataFrameSchema by dataFrame()
2932
val Arguments.columns: ColumnsResolver by arg()

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ internal inline fun <reified T> String.load(): T {
312312
"Rename" -> Rename()
313313
"RenameMapping" -> RenameMapping()
314314
"Select0" -> Select0()
315+
"Distinct0" -> Select0()
315316
"Expr0" -> Expr0()
316317
"And0" -> And0()
317318
"Remove0" -> Remove0()
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import org.jetbrains.kotlinx.dataframe.*
2+
import org.jetbrains.kotlinx.dataframe.annotations.*
3+
import org.jetbrains.kotlinx.dataframe.api.*
4+
import org.jetbrains.kotlinx.dataframe.io.*
5+
6+
fun box(): String {
7+
val personsDf = dataFrameOf("name", "age", "city", "weight", "height", "yearsToRetirement")(
8+
"Alice", 15, "London", 99.5, "1.85", 50,
9+
"Bob", 20, "Paris", 140.0, "1.35", 45,
10+
"Charlie", 100, "Dubai", 75.0, "1.95", 0,
11+
"Rose", 1, "Moscow", 45.33, "0.79", 64,
12+
"Dylan", 35, "London", 23.4, "1.83", 30,
13+
"Eve", 40, "Paris", 56.72, "1.85", 25,
14+
"Frank", 55, "Dubai", 78.9, "1.35", 10,
15+
"Grace", 29, "Moscow", 67.8, "1.65", 36,
16+
"Hank", 60, "Paris", 80.22, "1.75", 5,
17+
"Isla", 22, "London", 75.1, "1.85", 43,
18+
)
19+
20+
val res = personsDf.distinct { name and age }
21+
res.name
22+
res.age
23+
res.compareSchemas()
24+
return "OK"
25+
}

plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ public void testDiff() {
154154
runTest("testData/box/diff.kt");
155155
}
156156

157+
@Test
158+
@TestMetadata("distinct.kt")
159+
public void testDistinct() {
160+
runTest("testData/box/distinct.kt");
161+
}
162+
157163
@Test
158164
@TestMetadata("dropNA.kt")
159165
public void testDropNA() {

0 commit comments

Comments
 (0)