@@ -15,69 +15,77 @@ import org.jetbrains.kotlinx.dataframe.Selector
15
15
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
16
16
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
17
17
import org.jetbrains.kotlinx.dataframe.annotations.Refine
18
+ import org.jetbrains.kotlinx.dataframe.api.add
18
19
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
19
20
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
20
21
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
21
22
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
23
+ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
22
24
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
23
25
import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
24
26
import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl
25
27
import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle
26
28
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
27
29
import kotlin.reflect.KProperty
28
30
29
- /*
30
- * `add` operation adds new columns to DataFrame.
31
- */
32
-
33
31
// region Add existing columns
34
32
35
33
/* *
36
- * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list.
34
+ * Adds new [columns] to the end of this [DataFrame] (at the top level).
35
+ *
36
+ * Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns].
37
37
*
38
- * Original [DataFrame] is not modified .
38
+ * For more information: {@include [DocumentationUrls.Add]} .
39
39
*
40
- * @param columns columns to add
41
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
42
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
43
- * @return new [DataFrame] with added columns
40
+ * @param columns columns to add.
41
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
42
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
43
+ * @return new [DataFrame] with added columns.
44
44
*/
45
45
public fun <T > DataFrame<T>.add (vararg columns : AnyBaseCol ): DataFrame <T > = addAll(columns.asIterable())
46
46
47
47
/* *
48
- * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list .
48
+ * Adds new [columns] to the end of this [DataFrame] (at the top level) .
49
49
*
50
- * Original [DataFrame] is not modified .
50
+ * Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns] .
51
51
*
52
- * @param columns columns to add
53
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
54
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
55
- * @return new [DataFrame] with added columns
52
+ * For more information: {@include [DocumentationUrls.Add]}.
53
+ *
54
+ * @param columns columns to add.
55
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
56
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
57
+ * @return new [DataFrame] with added columns.
56
58
*/
57
59
public fun <T > DataFrame<T>.addAll (columns : Iterable <AnyBaseCol >): DataFrame <T > =
58
60
dataFrameOf(columns() + columns).cast()
59
61
60
62
/* *
61
- * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list.
63
+ * Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level).
64
+ *
65
+ * Returns a new [DataFrame] with the columns from the specified
66
+ * [dataFrames] appended to the original list of [DataFrame.columns].
62
67
*
63
- * Original [DataFrame] is not modified .
68
+ * For more information: {@include [DocumentationUrls.Add]} .
64
69
*
65
- * @param dataFrames dataFrames to get columns from
66
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
67
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
68
- * @return new [DataFrame] with added columns
70
+ * @param dataFrames dataFrames to get columns from.
71
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
72
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
73
+ * @return new [DataFrame] with added columns.
69
74
*/
70
75
public fun <T > DataFrame<T>.add (vararg dataFrames : AnyFrame ): DataFrame <T > = addAll(dataFrames.asIterable())
71
76
72
77
/* *
73
- * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list.
78
+ * Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level).
79
+ *
80
+ * Returns a new [DataFrame] with the columns from the specified
81
+ * [dataFrames] appended to the original list of [DataFrame.columns].
74
82
*
75
- * Original [DataFrame] is not modified .
83
+ * For more information: {@include [DocumentationUrls.Add]} .
76
84
*
77
- * @param dataFrames dataFrames to get columns from
78
- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
79
- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
80
- * @return new [DataFrame] with added columns
85
+ * @param dataFrames dataFrames to get columns from.
86
+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
87
+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
88
+ * @return new [DataFrame] with added columns.
81
89
*/
82
90
@JvmName(" addAllFrames" )
83
91
public fun <T > DataFrame<T>.addAll (dataFrames : Iterable <AnyFrame >): DataFrame <T > =
@@ -115,14 +123,41 @@ public interface AddDataRow<out T> : DataRow<T> {
115
123
public typealias AddExpression <T , R > = Selector <AddDataRow <T >, R >
116
124
117
125
/* *
118
- * Creates new column using row [expression] and adds it to the end of [DataFrame]
126
+ * Creates a new column using [AddExpression] and
127
+ * adds a new column to the end of this [DataFrame] (at the top level).
128
+ *
129
+ * An [AddExpression] allows to compute a value for each row in the new column
130
+ * based on the values from that row in the original [DataFrame].
131
+ * Also, you can use other methods such as [prev] and [next] to access other rows,
132
+ * including [newValue][AddDataRow.newValue] to retrieve already computed values of this column
133
+ * in previous rows.
134
+ *
135
+ * Returns a new [DataFrame] with the new column appended to the original list of [DataFrame.columns].
136
+ *
137
+ * ## Example
138
+ *
139
+ * ```kotlin
140
+ * // Add a new column "sum" that contains the sum of values from the "firstValue"
141
+ * // and "secondValue" columns for each row.
142
+ * val dfWithSum = df.add("sum") { firstValue + secondValue }
143
+ *
144
+ * // Add a "fibonacci" column with the Fibonacci sequence:
145
+ * // for the first two rows, the value is 1;
146
+ * // for subsequent rows, it's the sum of the two previous Fibonacci values.
147
+ * val dfWithFibonacci = df.add("fibonacci") {
148
+ * if (index() < 2) 1
149
+ * else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
150
+ * }
151
+ * ```
119
152
*
120
- * Original [DataFrame] is not modified.
153
+ * @param name name for a new column.
154
+ * If it is empty, a unique column name will be generated.
155
+ * Otherwise, it should be unique for original [DataFrame].
156
+ * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
157
+ * Defaults to [Infer.Nulls].
158
+ * @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
159
+ * @return new [DataFrame] with added column.
121
160
*
122
- * @param name name for a new column. If it is empty, a unique column name will be generated. Otherwise, it should be unique for original [DataFrame].
123
- * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column
124
- * @param expression [AddExpression] that computes column value for every [DataRow]
125
- * @return new [DataFrame] with added column
126
161
* @throws DuplicateColumnNamesException if [DataFrame] already contains a column with given [name]
127
162
*/
128
163
@Refine
@@ -149,6 +184,36 @@ public inline fun <reified R, T> DataFrame<T>.add(
149
184
noinline expression : AddExpression <T , R >,
150
185
): DataFrame <T > = add(column.path(), infer, expression)
151
186
187
+ /* *
188
+ * Creates a new column using [AddExpression] and inserts it at the specified [ColumnPath].
189
+ *
190
+ * An [AddExpression] allows to compute a value for each row in the new column
191
+ * based on the values from that row in the original [DataFrame].
192
+ * Also, you can use other methods such as [prev] and [next] to access other rows,
193
+ * including [newValue][AddDataRow.newValue] to retrieve already computed values of this column
194
+ * in previous rows.
195
+ *
196
+ * Returns a new [DataFrame] with the new column inserted at the given [path].
197
+ * {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
198
+ *
199
+ * ## Example
200
+ *
201
+ * ```kotlin
202
+ * // Add a new column "sum" inside the "info" column group (which will be created if it doesn't exist).
203
+ * // The column contains the sum of values from the "firstValue" and "secondValue" columns for each row.
204
+ * val dfWithSum = df.add(pathOf("info", "sum")) { firstValue + secondValue }
205
+ * ```
206
+ *
207
+ * @param path Target [ColumnPath] for the new column.
208
+ * If it points to a nested location,
209
+ * intermediate columns will be created if necessary.
210
+ * @param infer A value of [Infer] that specifies how to compute the column [type][BaseColumn.type] for the new column.
211
+ * Defaults to [Infer.Nulls].
212
+ * @param expression An [AddExpression] that computes the column value for every [DataRow] of the new column.
213
+ * @return A new [DataFrame] with the added column.
214
+ *
215
+ * @throws DuplicateColumnNamesException If the [DataFrame] already contains a column at the specified [path].
216
+ */
152
217
public inline fun <reified R , T > DataFrame<T>.add (
153
218
path : ColumnPath ,
154
219
infer : Infer = Infer .Nulls ,
@@ -163,6 +228,10 @@ public inline fun <reified R, T> DataFrame<T>.add(
163
228
164
229
// region Create and add several columns
165
230
231
+ /* *
232
+ * Receiver that is used by the [add] and [mapToFrame]
233
+ * for adding new columns and column groups based on [DataFrame] columns and row values.
234
+ */
166
235
public class AddDsl <T >(
167
236
@PublishedApi internal val df : DataFrame <T >,
168
237
) : ColumnsContainer<T> by df,
@@ -253,6 +322,38 @@ public class AddDsl<T>(
253
322
public infix fun AddGroup<T>.into (column : AnyColumnGroupAccessor ): Unit = into(column.name())
254
323
}
255
324
325
+ /* *
326
+ * Creates new columns using the [AddDsl] builder.
327
+ *
328
+ * An [AddDsl] allows to add multiple new columns and column groups to a [DataFrame]
329
+ * using concise syntax based on `from`, `into` operations and [AddExpression]s.
330
+ *
331
+ * Returns a new [DataFrame] with the newly added columns.
332
+ *
333
+ * ## Example
334
+ *
335
+ * ```kotlin
336
+ * val dfWithAdded = df.add {
337
+ * // Add new column "yearOfBirth" computed as 2021 minus value in "age" column
338
+ * "yearOfBirth" from { 2021 - age }
339
+ *
340
+ * // Add column "is adult" with result of age > 18
341
+ * age > 18 into "is adult"
342
+ *
343
+ * // Add column group "details"
344
+ * group("details") {
345
+ * // Add column "last name length" with length of lastName
346
+ * name.lastName.length() into "last name length"
347
+ *
348
+ * // Add column "full name" by combining firstName and lastName
349
+ * "full name" from { name.firstName + " " + name.lastName }
350
+ * }
351
+ * }
352
+ * ```
353
+ *
354
+ * @param body An [AddDsl] expression used to define new columns and column groups.
355
+ * @return A new [DataFrame] with the added columns.
356
+ */
256
357
@Refine
257
358
@Interpretable(" AddWithDsl" )
258
359
public fun <T > DataFrame<T>.add (body : AddDsl <T >.() -> Unit ): DataFrame <T > {
@@ -261,6 +362,45 @@ public fun <T> DataFrame<T>.add(body: AddDsl<T>.() -> Unit): DataFrame<T> {
261
362
return dataFrameOf(this @add.columns() + dsl.columns).cast()
262
363
}
263
364
365
+ /* *
366
+ * Creates a new column using [AddExpression] and
367
+ * adds a new column to the end of each group (i.e., [DataFrame]s) of this [GroupBy] (at the top level).
368
+ *
369
+ * An [AddExpression] allows to compute a value for each row in the new column
370
+ * based on the values from that row in the original group [DataFrame]s.
371
+ * Also, you can use other methods such as [prev] and [next] to access other rows,
372
+ * including [newValue][AddDataRow.newValue] to retrieve already computed values of this column
373
+ * in previous rows.
374
+ *
375
+ * Returns a new [GroupBy] with the new column
376
+ * appended to each group [DataFrame] to the original list of [DataFrame.columns].
377
+ *
378
+ * ## Example
379
+ *
380
+ * ```kotlin
381
+ * // Add a new column "sum" that contains the sum of values from the "firstValue"
382
+ * // and "secondValue" columns for each row.
383
+ * val gbWithSum = gb.add("sum") { firstValue + secondValue }
384
+ *
385
+ * // Add a "fibonacci" column with the Fibonacci sequence:
386
+ * // for the first two rows, the value is 1;
387
+ * // for subsequent rows, it's the sum of the two previous Fibonacci values.
388
+ * val gbWithFibonacci = gb.add("fibonacci") {
389
+ * if (index() < 2) 1
390
+ * else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
391
+ * }
392
+ * ```
393
+ *
394
+ * @param name name for a new column.
395
+ * If it is empty, a unique column name will be generated.
396
+ * Otherwise, it should be unique for original group [DataFrame]s.
397
+ * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
398
+ * Defaults to [Infer.Nulls].
399
+ * @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
400
+ * @return new [GroupBy] with added column.
401
+ *
402
+ * @throws DuplicateColumnNamesException if group [DataFrame]s already contains a column with given [name].
403
+ */
264
404
@Refine
265
405
@Interpretable(" GroupByAdd" )
266
406
public inline fun <reified R , T , G > GroupBy <T , G >.add (
0 commit comments