@@ -8,10 +8,24 @@ import org.jetbrains.kotlinx.dataframe.Predicate
8
8
import org.jetbrains.kotlinx.dataframe.RowFilter
9
9
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
10
10
import org.jetbrains.kotlinx.dataframe.annotations.Refine
11
+ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
12
+ import org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription
11
13
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateValue
12
14
13
15
// region DataColumn
14
16
17
+ /* *
18
+ * Counts the elements in this [DataColumn] that satisfy a given [predicate] or returns the total count
19
+ * if no predicate is provided.
20
+ *
21
+ * For more information: {@include [DocumentationUrls.Count]}
22
+ *
23
+ * @param predicate An optional predicate used to filter the elements.
24
+ * The predicate should return `true` for elements to be counted.
25
+ * If `null` (by default), all elements are counted.
26
+ * @return The count of elements in the column
27
+ * that either match the predicate or the total count of elements if no predicate is provided.
28
+ */
15
29
public fun <T > DataColumn<T>.count (predicate : Predicate <T >? = null): Int =
16
30
if (predicate == null ) {
17
31
size()
@@ -23,27 +37,126 @@ public fun <T> DataColumn<T>.count(predicate: Predicate<T>? = null): Int =
23
37
24
38
// region DataRow
25
39
40
+ /* *
41
+ * Returns the number of columns in this [DataRow].
42
+ *
43
+ * For more information: {@include [DocumentationUrls.Count]}
44
+ *
45
+ * @return the number of columns in this row.
46
+ */
26
47
public fun AnyRow.count (): Int = columnsCount()
27
48
49
+ /* *
50
+ * Counts the number of elements in the current row that satisfy the given [predicate].
51
+ *
52
+ * For more information: {@include [DocumentationUrls.Count]}
53
+ *
54
+ * @param predicate A predicate function to test each element.
55
+ * The predicate should return `true` for elements to be counted.
56
+ * @return The number of elements that satisfy the predicate.
57
+ */
28
58
public inline fun AnyRow.count (predicate : Predicate <Any ?>): Int = values().count(predicate)
29
59
30
60
// endregion
31
61
32
62
// region DataFrame
33
63
64
+ /* *
65
+ * Returns the total number of rows of this [DataFrame].
66
+ *
67
+ * For more information: {@include [DocumentationUrls.Count]}
68
+ *
69
+ * @return The number of rows in the [DataFrame].
70
+ */
34
71
public fun <T > DataFrame<T>.count (): Int = rowsCount()
35
72
73
+ /* *
74
+ * Counts the number of rows in this [DataFrame] that satisfy the given [predicate].
75
+ *
76
+ * {@include [RowFilterDescription]}
77
+ *
78
+ * See also:
79
+ * - [filter][DataFrame.filter] — filters rows using a [RowFilter] condition.
80
+ * - [countDistinct][DataFrame.countDistinct] — counts distinct rows or values.
81
+ *
82
+ * For more information: {@include [DocumentationUrls.Count]}
83
+ *
84
+ * ### Example
85
+ * ```kotlin
86
+ * // Count rows where the value in the "age" column is greater than 18
87
+ * // and the "name/firstName" column starts with 'A'
88
+ * df.count { age > 18 && name.firstName.startsWith("A") }
89
+ * // Count rows
90
+ * df.count { prev()?.length >= 50.0 ?: false }
91
+ * ```
92
+ *
93
+ * @param T The schema marker type of the [DataFrame].
94
+ * @param predicate A [RowFilter] that returns `true` for rows that should be counted.
95
+ * @return The number of rows that satisfy the predicate.
96
+ */
36
97
public inline fun <T > DataFrame<T>.count (predicate : RowFilter <T >): Int = rows().count { predicate(it, it) }
37
98
38
99
// endregion
39
100
40
101
// region GroupBy
41
102
103
+ /* *
104
+ * Aggregates this [GroupBy] by counting the number of rows in each group.
105
+ *
106
+ * Returns a new [DataFrame] where each row corresponds to a group.
107
+ * The resulting frame contains:
108
+ * - the original group key columns,
109
+ * - a new column (named [resultName], default is `"count"`) that contains the number of rows in each group.
110
+ *
111
+ * This is equivalent to applying `.aggregate { count() }`, but more efficient.
112
+ *
113
+ * See also common [aggregate][Grouped.aggregate].
114
+ *
115
+ * For more information: {@include [DocumentationUrls.Count]}
116
+ *
117
+ * ### Example
118
+ * ```kotlin
119
+ * // Counts number of rows for each city, returning
120
+ * // a new DataFrame with columns "city" and "count"
121
+ * df.groupBy { city }.count()
122
+ * ```
123
+ *
124
+ * @param resultName The name of the result column that will store the group sizes. Defaults to `"count"`.
125
+ * @return A new [DataFrame] with group keys and corresponding group sizes.
126
+ */
42
127
@Refine
43
128
@Interpretable(" GroupByCount0" )
44
129
public fun <T > Grouped<T>.count (resultName : String = "count"): DataFrame <T > =
45
130
aggregateValue(resultName) { count() default 0 }
46
131
132
+ /* *
133
+ * Aggregates this [GroupBy] by counting the number of rows in each group
134
+ * that satisfy the given [predicate].
135
+ *
136
+ * {@include [RowFilterDescription]}
137
+ *
138
+ * Returns a new [DataFrame] where each row corresponds to a group.
139
+ * The resulting frame contains:
140
+ * - the original group key columns,
141
+ * - a new column (named [resultName], defaults to `"count"`)
142
+ * that stores the number of rows in each group matching the [predicate].
143
+ *
144
+ * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient.
145
+ *
146
+ * See also: common [aggregate][Grouped.aggregate].
147
+ *
148
+ * For more information: {@include [DocumentationUrls.Count]}
149
+ *
150
+ * ### Example
151
+ * ```kotlin
152
+ * // Count rows for each city where the "income" value is greater than 30.0.
153
+ * // Returns a new DataFrame with columns "city" and "pointsCount".
154
+ * df.groupBy { city }.count("pointsCount") { income >= 30.0 }
155
+ * ```
156
+ *
157
+ * @param resultName The name of the result column containing the group sizes. Defaults to `"count"`.
158
+ * @return A new [DataFrame] with group keys and filtered row counts per group.
159
+ */
47
160
@Refine
48
161
@Interpretable(" GroupByCount0" )
49
162
public inline fun <T > Grouped<T>.count (
@@ -55,16 +168,150 @@ public inline fun <T> Grouped<T>.count(
55
168
56
169
// region Pivot
57
170
171
+ /* *
172
+ * Aggregates this [Pivot] by counting the number of rows in each group.
173
+ *
174
+ * Returns a single [DataRow] where:
175
+ * - each column corresponds to a [pivot] group — if multiple pivot keys were used,
176
+ * the result will contain column groups for each pivot key, with columns inside
177
+ * corresponding to the values of that key;
178
+ * - each value contains the number of rows in that group.
179
+ *
180
+ * The original [Pivot] column structure is preserved.
181
+ * If the [Pivot] was created using multiple or nested keys
182
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
183
+ * the structure remains unchanged — only the contents of each group
184
+ * are replaced with the number of rows in that group.
185
+ *
186
+ * This is equivalent to calling `.aggregate { count() }`, but more efficient.
187
+ *
188
+ * See also:
189
+ * - common [aggregate][Pivot.aggregate].
190
+ * - [pivotCounts][DataFrame.pivotCounts] shortcut.
191
+ *
192
+ * For more information: {@include [DocumentationUrls.Count]}
193
+ *
194
+ * ### Example
195
+ * ```kotlin
196
+ * // Count the number of rows for each city.
197
+ * // Returns a single DataRow with one column per city and the count of rows in each.
198
+ * df.pivot { city }.count()
199
+ * ```
200
+ *
201
+ * @return A single [DataRow] with one column per group and the corresponding group size as its value.
202
+ */
58
203
public fun <T > Pivot<T>.count (): DataRow <T > = delegate { count() }
59
204
205
+ /* *
206
+ * Aggregates this [Pivot] by counting the number of rows in each group
207
+ * that satisfy the given [predicate].
208
+ *
209
+ * {@include [RowFilterDescription]}
210
+ *
211
+ * Returns a single [DataRow] where:
212
+ * - each column corresponds to a [pivot] group — if multiple pivot keys were used,
213
+ * the result will contain column groups for each pivot key, with columns inside
214
+ * corresponding to the values of that key;
215
+ * - each value contains the number of rows in that group matching the [predicate].
216
+ *
217
+ * The original [Pivot] column structure is preserved.
218
+ * If the [Pivot] was created using multiple or nested keys
219
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
220
+ * the structure remains unchanged — only the contents of each group
221
+ * are replaced with the number of rows (matching the [predicate]) in that group.
222
+ *
223
+ * This is equivalent to calling `.aggregate { count(predicate) }`, but more efficient.
224
+ *
225
+ * See also:
226
+ * - common [aggregate][Pivot.aggregate].
227
+ * - [pivotCounts][DataFrame.pivotCounts] shortcut.
228
+ *
229
+ * For more information: {@include [DocumentationUrls.Count]}
230
+ *
231
+ * ### Example
232
+ * ```kotlin
233
+ * // Count rows for each city where the "income" value is greater than 30.0.
234
+ * // Returns a single DataRow with one column per city and the count of matching rows.
235
+ * df.pivot { city }.count { income > 30.0 }
236
+ * ```
237
+ *
238
+ * @return A single [DataRow] with original [Pivot] columns and filtered row counts per group.
239
+ */
60
240
public inline fun <T > Pivot<T>.count (crossinline predicate : RowFilter <T >): DataRow <T > = delegate { count(predicate) }
61
241
62
242
// endregion
63
243
64
244
// region PivotGroupBy
65
245
246
+ /* *
247
+ * Aggregates this [PivotGroupBy] by counting the number of rows in each
248
+ * combined [pivot] + [groupBy] group.
249
+ *
250
+ * Returns a new [DataFrame] containing a following matrix:
251
+ * - one row per [groupBy] key (or keys set);
252
+ * - one column group per [pivot] key, where each inner column corresponds to a value of that key;
253
+ * - each cell contains the number of rows in the corresponding pivot–group pair.
254
+ *
255
+ * The original [Pivot] column structure is preserved.
256
+ * If the [Pivot] was created using multiple or nested keys
257
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
258
+ * the result will contain nested column groups reflecting that key structure,
259
+ * with each group containing columns for the values of the corresponding key.
260
+ *
261
+ * This is equivalent to calling `.aggregate { count() }`, but more efficient.
262
+ *
263
+ * See also:
264
+ * - common [aggregate][PivotGroupBy.aggregate];
265
+ * - [GroupBy.pivotCounts] shortcut.
266
+ *
267
+ * For more information: {@include [DocumentationUrls.Count]}
268
+ *
269
+ * ### Example
270
+ * ```kotlin
271
+ * // Compute a matrix with "city" values horizontally and
272
+ * // "age" values vertically, where each cell contains
273
+ * // the number of rows with the corresponding age–city pair.
274
+ * df.pivot { city }.groupBy { age }.count()
275
+ * ```
276
+ *
277
+ * @return A [DataFrame] with [groupBy] rows and pivoted counts as columns.
278
+ */
66
279
public fun <T > PivotGroupBy<T>.count (): DataFrame <T > = aggregate { count() default 0 }
67
280
281
+ /* *
282
+ * Aggregates this [PivotGroupBy] by counting the number of rows in each
283
+ * combined [pivot] + [groupBy] group, that satisfy the given [predicate].
284
+ *
285
+ * Returns a new [DataFrame] containing a following matrix:
286
+ * - one row per [groupBy] key (or keys set);
287
+ * - one column group per [pivot] key, where each inner column corresponds to a value of that key;
288
+ * - each cell contains the number of rows in the corresponding pivot–group pair.
289
+ *
290
+ * The original [Pivot] column structure is preserved.
291
+ * If the [Pivot] was created using multiple or nested keys
292
+ * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]),
293
+ * the result will contain nested column groups reflecting that key structure,
294
+ * with each group containing columns for the values
295
+ * (matching the [predicate]) of the corresponding key.
296
+ *
297
+ * This is equivalent to calling `.aggregate { count() }`, but more efficient.
298
+ *
299
+ * See also:
300
+ * - common [aggregate][PivotGroupBy.aggregate];
301
+ * - [GroupBy.pivotCounts] shortcut.
302
+ *
303
+ * For more information: {@include [DocumentationUrls.Count]}
304
+ *
305
+ * ### Example
306
+ * ```kotlin
307
+ * // Compute a matrix with "city" values horizontally and
308
+ * // "age" values vertically, where each cell contains
309
+ * // the number of rows with the corresponding age–city pair.
310
+ * df.pivot { city }.groupBy { age }.count()
311
+ * ```
312
+ *
313
+ * @return A [DataFrame] with [groupBy] rows and pivoted counts as columns matching the [predicate]..
314
+ */
68
315
public inline fun <T > PivotGroupBy<T>.count (crossinline predicate : RowFilter <T >): DataFrame <T > =
69
316
aggregate {
70
317
count(predicate) default
0 commit comments