Skip to content

Commit 5449da2

Browse files
Merge pull request #1291 from Kotlin/explode_kdocs
`explode` docs and tests
2 parents 91551f6 + 2a063cb commit 5449da2

File tree

5 files changed

+259
-8
lines changed

5 files changed

+259
-8
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt

Lines changed: 162 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine
1010
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
1111
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1212
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
13+
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
14+
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
15+
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
1316
import org.jetbrains.kotlinx.dataframe.impl.api.explodeImpl
1417
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
1518
import kotlin.reflect.KProperty
@@ -19,13 +22,82 @@ private val defaultExplodeColumns: ColumnsSelector<*, *> = {
1922
}
2023

2124
// region explode DataFrame
25+
26+
/**
27+
* Splits list-like values in the specified [\columns] and spreads them vertically —
28+
* that is, it adds a separate row for each element (one value per row).
29+
* Values in all other columns are duplicated to preserve row context.
30+
*
31+
* If no [\columns] are specified, all columns (at any depth) containing
32+
* [List] or [DataFrame] values will be exploded.
33+
*
34+
* If [dropEmpty] is `true`, rows with empty lists or [DataFrame]s will be removed.
35+
* If `false`, such rows will be exploded into `null` values.
36+
*
37+
* Returns a new [DataFrame] with exploded columns.
38+
*
39+
* Each exploded column will have a new type (`List<T>` -> `T`).
40+
* When several columns are exploded in one operation, lists in different columns will be aligned.
41+
*
42+
* This operation is the reverse of [implode].
43+
*
44+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
45+
*
46+
* For more information, see: {@include [DocumentationUrls.Explode]}
47+
*
48+
* ### This `explode` overload
49+
*/
50+
@ExcludeFromSources
51+
internal interface ExplodeDocs
52+
53+
/**
54+
* {@include [ExplodeDocs]}
55+
* {@include [SelectingColumns.Dsl]}
56+
*
57+
* #### Examples
58+
*
59+
* ```kotlin
60+
* // Explodes all `List` and `DataFrame` columns at any depth
61+
* df.explode()
62+
*
63+
* // Explodes the "tags" column of type `List<String>`
64+
* df.explode { tags }
65+
*
66+
* // Explodes all columns of type `List<Double>`
67+
* df.explode { colsOf<List<Double>>() }
68+
* ```
69+
*
70+
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
71+
* If `false`, such rows will be exploded into `null` values.
72+
* @param columns The [ColumnsSelector] used to select columns to explode.
73+
* If not specified, all applicable columns will be exploded.
74+
* @return A new [DataFrame] with exploded columns.
75+
*/
2276
@Refine
2377
@Interpretable("Explode0")
2478
public fun <T> DataFrame<T>.explode(
2579
dropEmpty: Boolean = true,
26-
selector: ColumnsSelector<T, *> = defaultExplodeColumns,
27-
): DataFrame<T> = explodeImpl(dropEmpty, selector)
28-
80+
columns: ColumnsSelector<T, *> = defaultExplodeColumns,
81+
): DataFrame<T> = explodeImpl(dropEmpty, columns)
82+
83+
/**
84+
* {@include [ExplodeDocs]}
85+
* {@include [SelectingColumns.ColumnNames]}
86+
*
87+
* #### Example
88+
*
89+
* ```kotlin
90+
* // Explodes the "tags" and "scores" columns, where
91+
* // "tags" is a `List<String>` and "scores" is a `List<Int>`
92+
* val exploded = df.explode("tags", "scores")
93+
* ```
94+
*
95+
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
96+
* If `false`, such rows will be exploded into `null` values.
97+
* @param columns The [column names][String] used to select columns to explode.
98+
* If not specified, all applicable columns will be exploded.
99+
* @return A new [DataFrame] with exploded columns.
100+
*/
29101
public fun <T> DataFrame<T>.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame<T> =
30102
explode(dropEmpty) { columns.toColumnSet() }
31103

@@ -43,11 +115,73 @@ public fun <T, C> DataFrame<T>.explode(vararg columns: KProperty<C>, dropEmpty:
43115

44116
// region explode DataRow
45117

118+
/**
119+
* Splits list-like values in the specified [\columns] of this [DataRow] and spreads them vertically —
120+
* that is, it adds a separate row for each element (one value per row)
121+
* and combine them into new [DataFrame].
122+
* Values in all other columns are duplicated to preserve row context.
123+
*
124+
* If no [\columns] are specified, all columns (at any depth) containing
125+
* [List] or [DataFrame] values will be exploded.
126+
*
127+
* If [dropEmpty] is `true`, the result will exclude rows with empty lists or DataFrames.
128+
* If `false`, such values will be exploded into `null`.
129+
*
130+
* Returns a new [DataFrame] expanded into multiple rows based on the exploded columns.
131+
*
132+
* Each exploded column will have a new type (`List<T>` → `T`).
133+
* When several columns are exploded in one operation, lists in different columns will be aligned.
134+
*
135+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
136+
*
137+
* For more information, see: {@include [DocumentationUrls.Explode]}
138+
*
139+
* ### This `explode` overload
140+
*/
141+
@ExcludeFromSources
142+
internal interface ExplodeDataRowDocs
143+
144+
/**
145+
* {@include [ExplodeDataRowDocs]}
146+
* {@include [SelectingColumns.Dsl]}
147+
*
148+
* #### Example
149+
*
150+
* ```kotlin
151+
* // Explodes the `hobbies` and `scores` values of the row,
152+
* // of types `List<String>` and `List<Int>`, respectively
153+
* row.explode { hobbies and scores }
154+
* ```
155+
*
156+
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
157+
* If `false`, such rows will be exploded into `null` values.
158+
* @param columns The [ColumnsSelector] used to select columns to explode.
159+
* If not specified, all applicable columns will be exploded.
160+
* @return A new [DataFrame] with exploded columns from this [DataRow].
161+
*/
46162
public fun <T> DataRow<T>.explode(
47163
dropEmpty: Boolean = true,
48-
selector: ColumnsSelector<T, *> = defaultExplodeColumns,
49-
): DataFrame<T> = toDataFrame().explode(dropEmpty, selector)
50-
164+
columns: ColumnsSelector<T, *> = defaultExplodeColumns,
165+
): DataFrame<T> = toDataFrame().explode(dropEmpty, columns)
166+
167+
/**
168+
* {@include [ExplodeDataRowDocs]}
169+
* {@include [SelectingColumns.ColumnNames]}
170+
*
171+
* #### Example
172+
*
173+
* ```kotlin
174+
* // Explodes the `hobbies` and `scores` values of the row,
175+
* // of types `List<String>` and `List<Int>`, respectively
176+
* row.explode("hobbies", "scores")
177+
* ```
178+
*
179+
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
180+
* If `false`, such rows will be exploded into `null` values.
181+
* @param columns The [column names][String] used to select columns to explode.
182+
* If not specified, all applicable columns will be exploded.
183+
* @return A new [DataFrame] with exploded columns from this [DataRow].
184+
*/
51185
public fun <T> DataRow<T>.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame<T> =
52186
explode(dropEmpty) { columns.toColumnSet() }
53187

@@ -65,9 +199,31 @@ public fun <T, C> DataRow<T>.explode(vararg columns: KProperty<C>, dropEmpty: Bo
65199

66200
// region explode DataColumn
67201

202+
/**
203+
* Splits list-like values in this [DataColumn] and spreads them vertically —
204+
* that is, it adds a separate row for each element (one value per row).
205+
*
206+
* Returns a new [DataColumn] with the exploded values.
207+
* The resulting column will have a new type (`List<T>` → `T`).
208+
*
209+
* For more information, see: {@include [DocumentationUrls.Explode]}
210+
*
211+
* @return A new [DataColumn] with exploded values.
212+
*/
68213
@JvmName("explodeList")
69214
public fun <T> DataColumn<Collection<T>>.explode(): DataColumn<T> = explodeImpl() as DataColumn<T>
70215

216+
/**
217+
* Explodes a [DataColumn] of [DataFrame] values into a single [ColumnGroup].
218+
*
219+
* Each nested [DataFrame] is unwrapped, and its columns are placed side by side
220+
* within a column group named after the original column.
221+
* The number of resulting rows equals the total number of rows across all nested DataFrames.
222+
*
223+
* For more information, see: {@include [DocumentationUrls.Explode]}
224+
*
225+
* @return A [ColumnGroup] containing the concatenated contents of all nested DataFrames.
226+
*/
71227
@JvmName("explodeFrames")
72228
public fun <T> DataColumn<DataFrame<T>>.explode(): ColumnGroup<T> = concat().asColumnGroup(name())
73229

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ public fun <T, C, K, R> Gather<T, C?, K, R>.notNull(): Gather<T, C, K, R> = wher
259259
* .mapValues { (it + 0.5).toFloat() }
260260
* .into("series", "value")
261261
* ```
262+
*
263+
* @see [explode]
262264
* @return A new [Gather] instance with exploded list values.
263265
*/
264266
@Interpretable("GatherExplodeLists")
@@ -296,6 +298,8 @@ public fun <T, C, K, R> Gather<T, C, K, R>.explodeLists(): Gather<T, C, K, R> =
296298
* .mapValues { (it + 0.5).toFloat() }
297299
* .into("series", "value")
298300
* ```
301+
*
302+
* @see [explode]
299303
* @return A new [Gather] instance with exploded list values.
300304
*/
301305
@JvmName("explodeListsTyped")

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,4 +113,7 @@ internal interface DocumentationUrls {
113113

114114
/** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */
115115
interface Filter
116+
117+
/** [See `explode` on the documentation website.]({@include [Url]}/explode.html) */
118+
interface Explode
116119
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,92 @@ class ExplodeTests {
4444
val df = dataFrameOf("a", "b")(1, 2)
4545
df.explode() shouldBe df
4646
}
47+
48+
@Test
49+
fun `explode multiple aligned columns`() {
50+
val a by columnOf(listOf(1, 2), listOf(3, 4, 5))
51+
val b by columnOf(listOf(1, 2, 3), listOf(4, 5))
52+
53+
val df = dataFrameOf(a, b)
54+
val exploded = df.explode { a and b }
55+
56+
val expected = dataFrameOf("a", "b")(
57+
1, 1,
58+
2, 2,
59+
null, 3,
60+
3, 4,
61+
4, 5,
62+
5, null,
63+
)
64+
65+
exploded shouldBe expected
66+
}
67+
68+
@Test
69+
fun `explode with empty list and dropEmpty true`() {
70+
val df = dataFrameOf("a", "b")(
71+
1, listOf(1, 2),
72+
2, emptyList<Int>(),
73+
3, listOf(3),
74+
)
75+
76+
val exploded = df.explode(dropEmpty = true)
77+
78+
val expected = dataFrameOf("a", "b")(
79+
1, 1,
80+
1, 2,
81+
3, 3,
82+
)
83+
84+
exploded shouldBe expected
85+
}
86+
87+
@Test
88+
fun `explode with empty list and dropEmpty false`() {
89+
val df = dataFrameOf("a", "b")(
90+
1, listOf(1, 2),
91+
2, emptyList<Int>(),
92+
3, listOf(3),
93+
)
94+
95+
val exploded = df.explode(dropEmpty = false)
96+
97+
val expected = dataFrameOf("a", "b")(
98+
1, 1,
99+
1, 2,
100+
2, null,
101+
3, 3,
102+
)
103+
104+
exploded shouldBe expected
105+
}
106+
107+
@Test
108+
fun `explode DataColumn of lists`() {
109+
val col by columnOf(listOf(1, 2), listOf(3, 4))
110+
111+
val exploded = col.explode()
112+
val expected = columnOf(1, 2, 3, 4) named "col"
113+
114+
exploded shouldBe expected
115+
}
116+
117+
@Test
118+
fun `explode FrameColumn into ColumnGroup`() {
119+
val col by columnOf(
120+
dataFrameOf("x", "y")(1, 2, 3, 4),
121+
dataFrameOf("x", "y")(5, 6, 7, 8),
122+
)
123+
124+
val exploded = col.explode()
125+
126+
val expected = dataFrameOf("x", "y")(
127+
1, 2,
128+
3, 4,
129+
5, 6,
130+
7, 8,
131+
).asColumnGroup("col")
132+
133+
exploded shouldBe expected
134+
}
47135
}

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf
1616
internal class Explode0 : AbstractInterpreter<PluginDataFrameSchema>() {
1717
val Arguments.dropEmpty: Boolean by arg(defaultValue = Present(true))
1818
val Arguments.receiver: PluginDataFrameSchema by dataFrame()
19-
val Arguments.selector: ColumnsResolver? by arg(defaultValue = Present(null))
19+
val Arguments.columns: ColumnsResolver? by arg(defaultValue = Present(null))
2020
override val Arguments.startingSchema get() = receiver
2121

2222
override fun Arguments.interpret(): PluginDataFrameSchema {
23-
val columns = selector ?: object : ColumnsResolver {
23+
val columns = columns ?: object : ColumnsResolver {
2424
override fun resolve(df: PluginDataFrameSchema): List<ColumnWithPathApproximation> {
2525
return df.flatten(includeFrames = false).filter {
2626
val column = it.column

0 commit comments

Comments
 (0)