@@ -15,69 +15,78 @@ import org.jetbrains.kotlinx.dataframe.Selector
15
15
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
16
16
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
17
17
import org.jetbrains.kotlinx.dataframe.annotations.Refine
18
+ import org.jetbrains.kotlinx.dataframe.api.add
18
19
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
19
20
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
20
21
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
21
22
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
23
+ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
24
+ import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
22
25
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
23
26
import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
24
27
import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl
25
28
import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle
26
29
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
27
30
import kotlin.reflect.KProperty
28
31
29
- /*
30
- * `add` operation adds new columns to DataFrame.
31
- */
32
-
33
32
// region Add existing columns
34
33
35
34
/* *
36
- * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list .
35
+ * Adds new [columns] to the end of this [DataFrame] (at the top level) .
37
36
*
38
- * Original [DataFrame] is not modified .
37
+ * Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns] .
39
38
*
40
- * @param columns columns to add
41
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
42
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
43
- * @return new [DataFrame] with added columns
39
+ * For more information: {@include [DocumentationUrls.Add]}.
40
+ *
41
+ * @param columns columns to add.
42
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
43
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
44
+ * @return new [DataFrame] with added columns.
44
45
*/
45
46
public fun <T > DataFrame<T>.add (vararg columns : AnyBaseCol ): DataFrame <T > = addAll(columns.asIterable())
46
47
47
48
/* *
48
- * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list.
49
+ * Adds new [columns] to the end of this [DataFrame] (at the top level).
50
+ *
51
+ * Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns].
49
52
*
50
- * Original [DataFrame] is not modified .
53
+ * For more information: {@include [DocumentationUrls.Add]} .
51
54
*
52
- * @param columns columns to add
53
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
54
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
55
- * @return new [DataFrame] with added columns
55
+ * @param columns columns to add.
56
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
57
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
58
+ * @return new [DataFrame] with added columns.
56
59
*/
57
60
public fun <T > DataFrame<T>.addAll (columns : Iterable <AnyBaseCol >): DataFrame <T > =
58
61
dataFrameOf(columns() + columns).cast()
59
62
60
63
/* *
61
- * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list .
64
+ * Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level) .
62
65
*
63
- * Original [DataFrame] is not modified.
66
+ * Returns a new [DataFrame] with the columns from the specified
67
+ * [dataFrames] appended to the original list of [DataFrame.columns].
64
68
*
65
- * @param dataFrames dataFrames to get columns from
66
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
67
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
68
- * @return new [DataFrame] with added columns
69
+ * For more information: {@include [DocumentationUrls.Add]}.
70
+ *
71
+ * @param dataFrames dataFrames to get columns from.
72
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
73
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
74
+ * @return new [DataFrame] with added columns.
69
75
*/
70
76
public fun <T > DataFrame<T>.add (vararg dataFrames : AnyFrame ): DataFrame <T > = addAll(dataFrames.asIterable())
71
77
72
78
/* *
73
- * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list.
79
+ * Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level).
80
+ *
81
+ * Returns a new [DataFrame] with the columns from the specified
82
+ * [dataFrames] appended to the original list of [DataFrame.columns].
74
83
*
75
- * Original [DataFrame] is not modified .
84
+ * For more information: {@include [DocumentationUrls.Add]} .
76
85
*
77
- * @param dataFrames dataFrames to get columns from
78
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
79
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
80
- * @return new [DataFrame] with added columns
86
+ * @param dataFrames dataFrames to get columns from.
87
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
88
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
89
+ * @return new [DataFrame] with added columns.
81
90
*/
82
91
@JvmName(" addAllFrames" )
83
92
public fun <T > DataFrame<T>.addAll (dataFrames : Iterable <AnyFrame >): DataFrame <T > =
@@ -115,14 +124,50 @@ public interface AddDataRow<out T> : DataRow<T> {
115
124
public typealias AddExpression <T , R > = Selector <AddDataRow <T >, R >
116
125
117
126
/* *
118
- * Creates new column using row [expression] and adds it to the end of [DataFrame]
127
+ * With an [AddExpression], you define the value that each row in the new column should have.
128
+ * This can be based on values from the same row in the original [DataFrame].
119
129
*
120
- * Original [DataFrame] is not modified.
130
+ * You can also use functions like [prev] and [next] to access other rows, and combine them with
131
+ * [newValue][AddDataRow.newValue] to reference values already computed in the new column.
132
+ * For example, use `prev().newValue()` to access the new column value from the previous row.
133
+ */
134
+ @ExcludeFromSources
135
+ internal interface AddExpressionDocs
136
+
137
+ /* *
138
+ * Creates a new column using an [AddExpression] and
139
+ * adds a new column to the end of this [DataFrame] (at the top level).
140
+ *
141
+ * {@include [AddExpressionDocs]}
142
+ *
143
+ * Returns a new [DataFrame] with the new column appended to the original list of [DataFrame.columns].
144
+ *
145
+ * ## Example
146
+ *
147
+ * ```kotlin
148
+ * // Add a new column "sum" that contains the sum of values from the "firstValue"
149
+ * // and "secondValue" columns for each row.
150
+ * val dfWithSum = df.add("sum") { firstValue + secondValue }
151
+ *
152
+ * // Add a "fibonacci" column with the Fibonacci sequence:
153
+ * // for the first two rows, the value is 1;
154
+ * // for subsequent rows, it's the sum of the two previous Fibonacci values.
155
+ * val dfWithFibonacci = df.add("fibonacci") {
156
+ * if (index() < 2) 1
157
+ * else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
158
+ * }
159
+ * ```
160
+ *
161
+ * For more information: {@include [DocumentationUrls.Add]}.
162
+ *
163
+ * @param name name for a new column.
164
+ * If it is empty, a unique column name will be generated.
165
+ * Otherwise, it should be unique for original [DataFrame].
166
+ * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
167
+ * Defaults to [Infer.Nulls].
168
+ * @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
169
+ * @return new [DataFrame] with added column.
121
170
*
122
- * @param name name for a new column. If it is empty, a unique column name will be generated. Otherwise, it should be unique for original [DataFrame].
123
- * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column
124
- * @param expression [AddExpression] that computes column value for every [DataRow]
125
- * @return new [DataFrame] with added column
126
171
* @throws DuplicateColumnNamesException if [DataFrame] already contains a column with given [name]
127
172
*/
128
173
@Refine
@@ -149,6 +194,34 @@ public inline fun <reified R, T> DataFrame<T>.add(
149
194
noinline expression : AddExpression <T , R >,
150
195
): DataFrame <T > = add(column.path(), infer, expression)
151
196
197
+ /* *
198
+ * Creates a new column using [AddExpression] and inserts it at the specified [ColumnPath].
199
+ *
200
+ * {@include [AddExpressionDocs]}
201
+ *
202
+ * For more information: {@include [DocumentationUrls.Add]}.
203
+ *
204
+ * Returns a new [DataFrame] with the new column inserted at the given [path].
205
+ * {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
206
+ *
207
+ * ## Example
208
+ *
209
+ * ```kotlin
210
+ * // Add a new column "sum" inside the "info" column group (which will be created if it doesn't exist).
211
+ * // The column contains the sum of values from the "firstValue" and "secondValue" columns for each row.
212
+ * val dfWithSum = df.add(pathOf("info", "sum")) { firstValue + secondValue }
213
+ * ```
214
+ *
215
+ * @param path Target [ColumnPath] for the new column.
216
+ * If it points to a nested location,
217
+ * intermediate columns will be created if necessary.
218
+ * @param infer A value of [Infer] that specifies how to compute the column [type][BaseColumn.type] for the new column.
219
+ * Defaults to [Infer.Nulls].
220
+ * @param expression An [AddExpression] that computes the column value for every [DataRow] of the new column.
221
+ * @return A new [DataFrame] with the added column.
222
+ *
223
+ * @throws DuplicateColumnNamesException If the [DataFrame] already contains a column at the specified [path].
224
+ */
152
225
public inline fun <reified R , T > DataFrame<T>.add (
153
226
path : ColumnPath ,
154
227
infer : Infer = Infer .Nulls ,
@@ -163,6 +236,10 @@ public inline fun <reified R, T> DataFrame<T>.add(
163
236
164
237
// region Create and add several columns
165
238
239
+ /* *
240
+ * Receiver that is used by the [add] and [mapToFrame]
241
+ * for adding new columns and column groups based on [DataFrame] columns and row values.
242
+ */
166
243
public class AddDsl <T >(
167
244
@PublishedApi internal val df : DataFrame <T >,
168
245
) : ColumnsContainer<T> by df,
@@ -253,6 +330,43 @@ public class AddDsl<T>(
253
330
public infix fun AddGroup<T>.into (column : AnyColumnGroupAccessor ): Unit = into(column.name())
254
331
}
255
332
333
+ /* *
334
+ * Creates new columns using the [AddDsl] builder.
335
+ *
336
+ * An [AddDsl] allows to add multiple new columns and column groups to a [DataFrame]
337
+ * using concise syntax based on `from`, `into` operations and [AddExpression]s.
338
+ *
339
+ * Returns a new [DataFrame] with the newly added columns.
340
+ *
341
+ * ## Example
342
+ *
343
+ * ```kotlin
344
+ * val dfWithAdded = df.add {
345
+ * // Add new column "yearOfBirth" computed as 2021 minus value in "age" column
346
+ * "yearOfBirth" from { 2021 - age }
347
+ *
348
+ * // Add column "is adult" with result of age > 18
349
+ * age > 18 into "is adult"
350
+ *
351
+ * // Add new column "role" using expression
352
+ * expr { if ( department == "IT") "developer" else "analyst" } into "role"
353
+ *
354
+ * // Add column group "details"
355
+ * group("details") {
356
+ * // Add column "last name length" with length of lastName
357
+ * name.lastName.length() into "last name length"
358
+ *
359
+ * // Add column "full name" by combining firstName and lastName
360
+ * "full name" from { name.firstName + " " + name.lastName }
361
+ * }
362
+ * }
363
+ * ```
364
+ *
365
+ * For more information: {@include [DocumentationUrls.Add]}.
366
+ *
367
+ * @param body An [AddDsl] expression used to define new columns and column groups.
368
+ * @return A new [DataFrame] with the added columns.
369
+ */
256
370
@Refine
257
371
@Interpretable(" AddWithDsl" )
258
372
public fun <T > DataFrame<T>.add (body : AddDsl <T >.() -> Unit ): DataFrame <T > {
@@ -261,6 +375,43 @@ public fun <T> DataFrame<T>.add(body: AddDsl<T>.() -> Unit): DataFrame<T> {
261
375
return dataFrameOf(this @add.columns() + dsl.columns).cast()
262
376
}
263
377
378
+ /* *
379
+ * Creates a new column using [AddExpression] and
380
+ * adds a new column to the end of each group (i.e., [DataFrame]s) of this [GroupBy] (at the top level).
381
+ *
382
+ * {@include [AddExpressionDocs]}
383
+ *
384
+ * Returns a new [GroupBy] with the new column
385
+ * appended to each group [DataFrame] to the original list of [DataFrame.columns].
386
+ *
387
+ * ## Example
388
+ *
389
+ * ```kotlin
390
+ * // Add a new column "sum" that contains the sum of values from the "firstValue"
391
+ * // and "secondValue" columns for each row.
392
+ * val gbWithSum = gb.add("sum") { firstValue + secondValue }
393
+ *
394
+ * // Add a "fibonacci" column with the Fibonacci sequence:
395
+ * // for the first two rows, the value is 1;
396
+ * // for subsequent rows, it's the sum of the two previous Fibonacci values.
397
+ * val gbWithFibonacci = gb.add("fibonacci") {
398
+ * if (index() < 2) 1
399
+ * else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
400
+ * }
401
+ * ```
402
+ *
403
+ * For more information: {@include [DocumentationUrls.Add]}.
404
+ *
405
+ * @param name name for a new column.
406
+ * If it is empty, a unique column name will be generated.
407
+ * Otherwise, it should be unique for original group [DataFrame]s.
408
+ * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
409
+ * Defaults to [Infer.Nulls].
410
+ * @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
411
+ * @return new [GroupBy] with added column.
412
+ *
413
+ * @throws DuplicateColumnNamesException if group [DataFrame]s already contains a column with given [name].
414
+ */
264
415
@Refine
265
416
@Interpretable(" GroupByAdd" )
266
417
public inline fun <reified R , T , G > GroupBy <T , G >.add (
0 commit comments