@@ -7,6 +7,8 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
7
7
import org.jetbrains.kotlinx.dataframe.DataFrame
8
8
import org.jetbrains.kotlinx.dataframe.DataRow
9
9
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
10
+ import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
11
+ import org.jetbrains.kotlinx.dataframe.annotations.Refine
10
12
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
11
13
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
12
14
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
@@ -18,10 +20,12 @@ import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
18
20
import org.jetbrains.kotlinx.dataframe.impl.asList
19
21
import org.jetbrains.kotlinx.dataframe.impl.columnName
20
22
import org.jetbrains.kotlinx.dataframe.impl.getListType
23
+ import org.jetbrains.kotlinx.dataframe.util.SPLIT_STR
21
24
import kotlin.reflect.KProperty
22
25
import kotlin.reflect.KType
23
26
import kotlin.reflect.typeOf
24
27
28
+ @Interpretable(" Split0" )
25
29
public fun <T , C > DataFrame<T>.split (columns : ColumnsSelector <T , C ?>): Split <T , C > = Split (this , columns)
26
30
27
31
public fun <T > DataFrame<T>.split (vararg columns : String ): Split <T , Any > = split { columns.toColumnSet() }
@@ -62,22 +66,27 @@ public typealias ColumnNamesGenerator<C> = ColumnWithPath<C>.(extraColumnIndex:
62
66
63
67
// region default
64
68
69
+ @Interpretable(" SplitDefault" )
65
70
public inline fun <T , C : Iterable <R >, reified R > Split <T , C >.default (value : R ? ): SplitWithTransform <T , C , R > =
66
71
by { it }.default(value)
67
72
73
+ @Deprecated(SPLIT_STR , ReplaceWith (""" by(",").default(value)""" ))
68
74
public fun <T > Split <T , String >.default (value : String? ): SplitWithTransform <T , String , String > =
69
75
by { it.splitDefault() }.default(value)
70
76
77
+ @Interpretable(" SplitWithTransformDefault" )
71
78
public fun <T , C , R > SplitWithTransform <T , C , R >.default (value : R ? ): SplitWithTransform <T , C , R > = copy(default = value)
72
79
73
80
// endregion
74
81
75
82
// region by
76
83
84
+ @Interpretable(" ByIterable" )
77
85
public inline fun <T , C , reified R > Split <T , C >.by (
78
86
noinline splitter : DataRow <T >.(C ) -> Iterable <R >,
79
87
): SplitWithTransform <T , C , R > = by(typeOf<R >(), splitter)
80
88
89
+ @Interpretable(" ByCharDelimiters" )
81
90
public fun <T , C > Split <T , C >.by (
82
91
vararg delimiters : Char ,
83
92
trim : Boolean = true,
@@ -90,6 +99,22 @@ public fun <T, C> Split<T, C>.by(
90
99
}
91
100
}
92
101
102
+ /* *
103
+ * Example:
104
+ * ```
105
+ * dataFrameOf("str" to listOf("1 2 3 4"))
106
+ * .split("str").by("\s+".toRegex())
107
+ * // when the list of explicitly specified columnNames is not long enough (or none at all),
108
+ * // names for additional columns are generates
109
+ * .into()
110
+ * ```
111
+ * Result:
112
+ * ```
113
+ * split1 split2 split3 split4
114
+ * 1 2 3 4
115
+ * ```
116
+ */
117
+ @Interpretable(" ByRegex" )
93
118
public fun <T , C > Split <T , C >.by (
94
119
regex : Regex ,
95
120
trim : Boolean = true,
@@ -101,6 +126,7 @@ public fun <T, C> Split<T, C>.by(
101
126
}
102
127
}
103
128
129
+ @Interpretable(" ByStringDelimiters" )
104
130
public fun <T , C > Split <T , C >.by (
105
131
vararg delimiters : String ,
106
132
trim : Boolean = true,
@@ -126,10 +152,34 @@ internal inline fun <T, C, R> Split<T, C>.by(
126
152
127
153
// region match
128
154
155
+ /* *
156
+ * Creates new String columns according to MatchResult [capturing groups](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.text/-match-result/group-values.html),
157
+ * excluding the first group which is entire matched String.
158
+ * Example:
159
+ * ```
160
+ * dataFrameOf("str" to listOf("100 ml", "1 L"))
161
+ * .split { "str"<String>() }.match("(\d+)\s*(ml|l|L)").into("volume", "unit")
162
+ * ```
163
+ * Created columns will be nullable if [regex] doesn't match some rows or there are nulls in original column
164
+ * Check [Split.by] overload with regex parameter if you're looking to split String value by [Regex] delimiter
165
+ */
166
+ @Interpretable(" MatchStringRegex" )
129
167
public fun <T , C : String ?> Split <T , C >.match (
130
168
@Language(" RegExp" ) regex : String ,
131
169
): SplitWithTransform <T , C , String ?> = match(regex.toRegex())
132
170
171
+ /* *
172
+ * Creates new String columns according to MatchResult [capturing groups](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.text/-match-result/group-values.html),
173
+ * excluding the first group which is entire matched String.
174
+ * Example:
175
+ * ```
176
+ * dataFrameOf("str" to listOf("100 ml", "1 L"))
177
+ * .split { "str"<String>() }.match("(\d+)\s*(ml|l|L)").into("volume", "unit")
178
+ * ```
179
+ * Created columns will be nullable if [regex] doesn't match some rows or there are nulls in original column
180
+ * Check [Split.by][org.jetbrains.kotlinx.dataframe.api.Split.by] overload with regex parameter if you're looking to split String value by [Regex] delimiter
181
+ */
182
+ @Interpretable(" MatchRegex" )
133
183
public fun <T , C : String ?> Split <T , C >.match (regex : Regex ): SplitWithTransform <T , C , String ?> =
134
184
by {
135
185
it?.let {
@@ -171,6 +221,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.into(
171
221
vararg otherNames : KProperty <* >,
172
222
): DataFrame <T > = into(listOf (firstName.columnName) + otherNames.map { it.columnName })
173
223
224
+ @Refine
225
+ @Interpretable(" SplitWithTransformInto0" )
174
226
public fun <T , C , R > SplitWithTransform <T , C , R >.into (
175
227
vararg names : String ,
176
228
extraNamesGenerator : (ColumnWithPath <C >.(extraColumnIndex: Int ) -> String )? = null,
@@ -188,6 +240,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.into(
188
240
}
189
241
}
190
242
243
+ @Refine
244
+ @Interpretable(" SplitIterableInto" )
191
245
public fun <T , C : Iterable <* >> Split <T , C >.into (
192
246
vararg names : String ,
193
247
extraNamesGenerator : ColumnNamesGenerator <C >? = null,
@@ -199,6 +253,8 @@ public fun <T, C> Split<T, DataFrame<C>>.into(
199
253
extraNamesGenerator : ColumnNamesGenerator <DataFrame <C >>? = null,
200
254
): DataFrame <T > = by { it.rows() }.into(names.toList(), extraNamesGenerator)
201
255
256
+ @Refine
257
+ @Interpretable(" SplitPair" )
202
258
public fun <T , A , B > Split <T , Pair <A , B >>.into (firstCol : String , secondCol : String ): DataFrame <T > =
203
259
by { listOf (it.first, it.second) }.into(firstCol, secondCol)
204
260
@@ -211,6 +267,7 @@ public inline fun <T, reified A, reified B> Split<T, Pair<A, B>>.into(
211
267
secondCol : ColumnAccessor <B >,
212
268
): DataFrame <T > = by { listOf (it.first, it.second) }.into(firstCol, secondCol)
213
269
270
+ @Deprecated(SPLIT_STR , ReplaceWith (""" by(",").into(*names, extraNamesGenerator = extraNamesGenerator)""" ))
214
271
@JvmName(" intoTC" )
215
272
public fun <T > Split <T , String >.into (
216
273
vararg names : String ,
@@ -226,6 +283,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.inward(
226
283
extraNamesGenerator : ColumnNamesGenerator <C >? = null,
227
284
): DataFrame <T > = copy(inward = true ).into(names.toList(), extraNamesGenerator)
228
285
286
+ @Refine
287
+ @Interpretable(" SplitWithTransformInward0" )
229
288
public fun <T , C , R > SplitWithTransform <T , C , R >.inward (
230
289
vararg names : String ,
231
290
extraNamesGenerator : ColumnNamesGenerator <C >? = null,
@@ -272,6 +331,7 @@ public inline fun <T, reified A, reified B> Split<T, Pair<A, B>>.inward(
272
331
secondCol : ColumnAccessor <B >,
273
332
): DataFrame <T > = by { listOf (it.first, it.second) }.inward(firstCol, secondCol)
274
333
334
+ @Deprecated(SPLIT_STR , ReplaceWith (""" by(",").inward(*names, extraNamesGenerator = extraNamesGenerator)""" ))
275
335
@JvmName(" inwardTC" )
276
336
public fun <T > Split <T , String >.inward (
277
337
vararg names : String ,
@@ -282,6 +342,8 @@ public fun <T> Split<T, String>.inward(
282
342
283
343
// region intoColumns
284
344
345
+ @Refine
346
+ @Interpretable(" SplitAnyFrameIntoColumns" )
285
347
public fun <T , C : AnyFrame > Split <T , C >.intoColumns (): DataFrame <T > =
286
348
df.convert(columns).with {
287
349
when {
@@ -296,11 +358,15 @@ public fun <T, C : AnyFrame> Split<T, C>.intoColumns(): DataFrame<T> =
296
358
// region intoRows
297
359
298
360
@JvmName(" intoRowsTC" )
361
+ @Refine
362
+ @Interpretable(" SplitIntoRows" )
299
363
public inline fun <T , C : Iterable <R >, reified R > Split <T , C >.intoRows (dropEmpty : Boolean = true): DataFrame <T > =
300
364
by { it }
301
365
.intoRows(dropEmpty)
302
366
303
367
@JvmName(" intoRowsFrame" )
368
+ @Refine
369
+ @Interpretable(" SplitAnyFrameRows" )
304
370
public fun <T , C : AnyFrame > Split <T , C >.intoRows (dropEmpty : Boolean = true): DataFrame <T > =
305
371
by { it.rows() }.intoRows(dropEmpty)
306
372
@@ -309,6 +375,8 @@ internal inline fun <T, C, R> Convert<T, C?>.splitInplace(
309
375
crossinline transform : DataRow <T >.(C ) -> Iterable <R >,
310
376
) = withRowCellImpl(getListType(type), Infer .None ) { if (it == null ) emptyList() else transform(it).asList() }
311
377
378
+ @Refine
379
+ @Interpretable(" SplitWithTransformIntoRows" )
312
380
public fun <T , C , R > SplitWithTransform <T , C , R >.intoRows (dropEmpty : Boolean = true): DataFrame <T > {
313
381
val paths = df.getColumnPaths(columns).toColumnSet()
314
382
return df.convert { paths as ColumnSet <C ?> }.splitInplace(tartypeOf, transform).explode(dropEmpty) { paths }
@@ -319,8 +387,12 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.intoRows(dropEmpty: Boolean = t
319
387
// region inplace
320
388
321
389
@JvmName(" inplaceTC" )
390
+ @Refine
391
+ @Interpretable(" SplitInplace" )
322
392
public inline fun <T , C : Iterable <R >, reified R > Split <T , C >.inplace (): DataFrame <T > = by { it }.inplace()
323
393
394
+ @Refine
395
+ @Interpretable(" SplitWithTransformInplace" )
324
396
public fun <T , C , R > SplitWithTransform <T , C , R >.inplace (): DataFrame <T > =
325
397
df.convert(columns).splitInplace(tartypeOf, transform)
326
398
0 commit comments