Merge pull request #1291 from Kotlin/explode_kdocs

AndreiKingsley · web-flow · commit 5449da29298d · 2025-07-03T18:29:07.000+04:00
`explode` docs and tests
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt
@@ -10,6 +10,9 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine
 import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
 import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
 import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
+import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
+import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
+import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
 import org.jetbrains.kotlinx.dataframe.impl.api.explodeImpl
 import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
 import kotlin.reflect.KProperty
@@ -19,13 +22,82 @@ private val defaultExplodeColumns: ColumnsSelector<*, *> = {
 }
 
 // region explode DataFrame
+
+/**
+ * Splits list-like values in the specified [\columns] and spreads them vertically —
+ * that is, it adds a separate row for each element (one value per row).
+ * Values in all other columns are duplicated to preserve row context.
+ *
+ * If no [\columns] are specified, all columns (at any depth) containing
+ * [List] or [DataFrame] values will be exploded.
+ *
+ * If [dropEmpty] is `true`, rows with empty lists or [DataFrame]s will be removed.
+ * If `false`, such rows will be exploded into `null` values.
+ *
+ * Returns a new [DataFrame] with exploded columns.
+ *
+ * Each exploded column will have a new type (`List<T>` -> `T`).
+ * When several columns are exploded in one operation, lists in different columns will be aligned.
+ *
+ * This operation is the reverse of [implode].
+ *
+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
+ *
+ * For more information, see: {@include [DocumentationUrls.Explode]}
+ *
+ * ### This `explode` overload
+ */
+@ExcludeFromSources
+internal interface ExplodeDocs
+
+/**
+ * {@include [ExplodeDocs]}
+ * {@include [SelectingColumns.Dsl]}
+ *
+ * #### Examples
+ *
+ * ```kotlin
+ * // Explodes all `List` and `DataFrame` columns at any depth
+ * df.explode()
+ *
+ * // Explodes the "tags" column of type `List<String>`
+ * df.explode { tags }
+ *
+ * // Explodes all columns of type `List<Double>`
+ * df.explode { colsOf<List<Double>>() }
+ * ```
+ *
+ * @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
+ *                  If `false`, such rows will be exploded into `null` values.
+ * @param columns The [ColumnsSelector] used to select columns to explode.
+ *                If not specified, all applicable columns will be exploded.
+ * @return A new [DataFrame] with exploded columns.
+ */
 @Refine
 @Interpretable("Explode0")
 public fun <T> DataFrame<T>.explode(
     dropEmpty: Boolean = true,
-    selector: ColumnsSelector<T, *> = defaultExplodeColumns,
-): DataFrame<T> = explodeImpl(dropEmpty, selector)
-
+    columns: ColumnsSelector<T, *> = defaultExplodeColumns,
+): DataFrame<T> = explodeImpl(dropEmpty, columns)
+
+/**
+ * {@include [ExplodeDocs]}
+ * {@include [SelectingColumns.ColumnNames]}
+ *
+ * #### Example
+ *
+ * ```kotlin
+ * // Explodes the "tags" and "scores" columns, where
+ * // "tags" is a `List<String>` and "scores" is a `List<Int>`
+ * val exploded = df.explode("tags", "scores")
+ * ```
+ *
+ * @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
+ *                  If `false`, such rows will be exploded into `null` values.
+ * @param columns The [column names][String] used to select columns to explode.
+ *                If not specified, all applicable columns will be exploded.
+ * @return A new [DataFrame] with exploded columns.
+ */
 public fun <T> DataFrame<T>.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame<T> =
     explode(dropEmpty) { columns.toColumnSet() }
 
@@ -43,11 +115,73 @@ public fun <T, C> DataFrame<T>.explode(vararg columns: KProperty<C>, dropEmpty:
 
 // region explode DataRow
 
+/**
+ * Splits list-like values in the specified [\columns] of this [DataRow] and spreads them vertically —
+ * that is, it adds a separate row for each element (one value per row)
+ * and combine them into new [DataFrame].
+ * Values in all other columns are duplicated to preserve row context.
+ *
+ * If no [\columns] are specified, all columns (at any depth) containing
+ * [List] or [DataFrame] values will be exploded.
+ *
+ * If [dropEmpty] is `true`, the result will exclude rows with empty lists or DataFrames.
+ * If `false`, such values will be exploded into `null`.
+ *
+ * Returns a new [DataFrame] expanded into multiple rows based on the exploded columns.
+ *
+ * Each exploded column will have a new type (`List<T>` → `T`).
+ * When several columns are exploded in one operation, lists in different columns will be aligned.
+ *
+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
+ *
+ * For more information, see: {@include [DocumentationUrls.Explode]}
+ *
+ * ### This `explode` overload
+ */
+@ExcludeFromSources
+internal interface ExplodeDataRowDocs
+
+/**
+ * {@include [ExplodeDataRowDocs]}
+ * {@include [SelectingColumns.Dsl]}
+ *
+ * #### Example
+ *
+ * ```kotlin
+ * // Explodes the `hobbies` and `scores` values of the row,
+ * // of types `List<String>` and `List<Int>`, respectively
+ * row.explode { hobbies and scores }
+ * ```
+ *
+ * @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
+ *                  If `false`, such rows will be exploded into `null` values.
+ * @param columns The [ColumnsSelector] used to select columns to explode.
+ *                 If not specified, all applicable columns will be exploded.
+ * @return A new [DataFrame] with exploded columns from this [DataRow].
+ */
 public fun <T> DataRow<T>.explode(
     dropEmpty: Boolean = true,
-    selector: ColumnsSelector<T, *> = defaultExplodeColumns,
-): DataFrame<T> = toDataFrame().explode(dropEmpty, selector)
-
+    columns: ColumnsSelector<T, *> = defaultExplodeColumns,
+): DataFrame<T> = toDataFrame().explode(dropEmpty, columns)
+
+/**
+ * {@include [ExplodeDataRowDocs]}
+ * {@include [SelectingColumns.ColumnNames]}
+ *
+ * #### Example
+ *
+ * ```kotlin
+ * // Explodes the `hobbies` and `scores` values of the row,
+ * // of types `List<String>` and `List<Int>`, respectively
+ * row.explode("hobbies", "scores")
+ * ```
+ *
+ * @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
+ *                  If `false`, such rows will be exploded into `null` values.
+ * @param columns The [column names][String] used to select columns to explode.
+ *                 If not specified, all applicable columns will be exploded.
+ * @return A new [DataFrame] with exploded columns from this [DataRow].
+ */
 public fun <T> DataRow<T>.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame<T> =
     explode(dropEmpty) { columns.toColumnSet() }
 
@@ -65,9 +199,31 @@ public fun <T, C> DataRow<T>.explode(vararg columns: KProperty<C>, dropEmpty: Bo
 
 // region explode DataColumn
 
+/**
+ * Splits list-like values in this [DataColumn] and spreads them vertically —
+ * that is, it adds a separate row for each element (one value per row).
+ *
+ * Returns a new [DataColumn] with the exploded values.
+ * The resulting column will have a new type (`List<T>` → `T`).
+ *
+ * For more information, see: {@include [DocumentationUrls.Explode]}
+ *
+ * @return A new [DataColumn] with exploded values.
+ */
 @JvmName("explodeList")
 public fun <T> DataColumn<Collection<T>>.explode(): DataColumn<T> = explodeImpl() as DataColumn<T>
 
+/**
+ * Explodes a [DataColumn] of [DataFrame] values into a single [ColumnGroup].
+ *
+ * Each nested [DataFrame] is unwrapped, and its columns are placed side by side
+ * within a column group named after the original column.
+ * The number of resulting rows equals the total number of rows across all nested DataFrames.
+ *
+ * For more information, see: {@include [DocumentationUrls.Explode]}
+ *
+ * @return A [ColumnGroup] containing the concatenated contents of all nested DataFrames.
+ */
 @JvmName("explodeFrames")
 public fun <T> DataColumn<DataFrame<T>>.explode(): ColumnGroup<T> = concat().asColumnGroup(name())
 
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt
@@ -259,6 +259,8 @@ public fun <T, C, K, R> Gather<T, C?, K, R>.notNull(): Gather<T, C, K, R> = wher
  *   .mapValues { (it + 0.5).toFloat() }
  *   .into("series", "value")
  * ```
+ *
+ * @see [explode]
  * @return A new [Gather] instance with exploded list values.
  */
 @Interpretable("GatherExplodeLists")
@@ -296,6 +298,8 @@ public fun <T, C, K, R> Gather<T, C, K, R>.explodeLists(): Gather<T, C, K, R> =
  *   .mapValues { (it + 0.5).toFloat() }
  *   .into("series", "value")
  * ```
+ *
+ * @see [explode]
  * @return A new [Gather] instance with exploded list values.
  */
 @JvmName("explodeListsTyped")
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt
@@ -113,4 +113,7 @@ internal interface DocumentationUrls {
 
     /** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */
     interface Filter
+
+    /** [See `explode` on the documentation website.]({@include [Url]}/explode.html) */
+    interface Explode
 }
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt
@@ -44,4 +44,92 @@ class ExplodeTests {
         val df = dataFrameOf("a", "b")(1, 2)
         df.explode() shouldBe df
     }
+
+    @Test
+    fun `explode multiple aligned columns`() {
+        val a by columnOf(listOf(1, 2), listOf(3, 4, 5))
+        val b by columnOf(listOf(1, 2, 3), listOf(4, 5))
+
+        val df = dataFrameOf(a, b)
+        val exploded = df.explode { a and b }
+
+        val expected = dataFrameOf("a", "b")(
+            1, 1,
+            2, 2,
+            null, 3,
+            3, 4,
+            4, 5,
+            5, null,
+        )
+
+        exploded shouldBe expected
+    }
+
+    @Test
+    fun `explode with empty list and dropEmpty true`() {
+        val df = dataFrameOf("a", "b")(
+            1, listOf(1, 2),
+            2, emptyList<Int>(),
+            3, listOf(3),
+        )
+
+        val exploded = df.explode(dropEmpty = true)
+
+        val expected = dataFrameOf("a", "b")(
+            1, 1,
+            1, 2,
+            3, 3,
+        )
+
+        exploded shouldBe expected
+    }
+
+    @Test
+    fun `explode with empty list and dropEmpty false`() {
+        val df = dataFrameOf("a", "b")(
+            1, listOf(1, 2),
+            2, emptyList<Int>(),
+            3, listOf(3),
+        )
+
+        val exploded = df.explode(dropEmpty = false)
+
+        val expected = dataFrameOf("a", "b")(
+            1, 1,
+            1, 2,
+            2, null,
+            3, 3,
+        )
+
+        exploded shouldBe expected
+    }
+
+    @Test
+    fun `explode DataColumn of lists`() {
+        val col by columnOf(listOf(1, 2), listOf(3, 4))
+
+        val exploded = col.explode()
+        val expected = columnOf(1, 2, 3, 4) named "col"
+
+        exploded shouldBe expected
+    }
+
+    @Test
+    fun `explode FrameColumn into ColumnGroup`() {
+        val col by columnOf(
+            dataFrameOf("x", "y")(1, 2, 3, 4),
+            dataFrameOf("x", "y")(5, 6, 7, 8),
+        )
+
+        val exploded = col.explode()
+
+        val expected = dataFrameOf("x", "y")(
+            1, 2,
+            3, 4,
+            5, 6,
+            7, 8,
+        ).asColumnGroup("col")
+
+        exploded shouldBe expected
+    }
 }
diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt
@@ -16,11 +16,11 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf
 internal class Explode0 : AbstractInterpreter<PluginDataFrameSchema>() {
     val Arguments.dropEmpty: Boolean by arg(defaultValue = Present(true))
     val Arguments.receiver: PluginDataFrameSchema by dataFrame()
-    val Arguments.selector: ColumnsResolver? by arg(defaultValue = Present(null))
+    val Arguments.columns: ColumnsResolver? by arg(defaultValue = Present(null))
     override val Arguments.startingSchema get() = receiver
 
     override fun Arguments.interpret(): PluginDataFrameSchema {
-        val columns = selector ?: object : ColumnsResolver {
+        val columns = columns ?: object : ColumnsResolver {
             override fun resolve(df: PluginDataFrameSchema): List<ColumnWithPathApproximation> {
                 return df.flatten(includeFrames = false).filter {
                     val column = it.column

Original file line number	Diff line number	Diff line change
`@@ -113,4 +113,7 @@ internal interface DocumentationUrls {`
`113`	`113`
`114`	`114`	/** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */
`115`	`115`	`interface Filter`
	`116`	`+`
	`117`	+ /** [See `explode` on the documentation website.]({@include [Url]}/explode.html) */
	`118`	`+ interface Explode`
`116`	`119`	`}`