Skip to content

Commit f469c84

Browse files
Automated commit of generated code
1 parent 8b00384 commit f469c84

File tree

3 files changed

+83
-0
lines changed

3 files changed

+83
-0
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
77
import org.jetbrains.kotlinx.dataframe.DataFrame
88
import org.jetbrains.kotlinx.dataframe.DataRow
99
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
10+
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
11+
import org.jetbrains.kotlinx.dataframe.annotations.Refine
1012
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
1113
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1214
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
@@ -18,10 +20,12 @@ import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
1820
import org.jetbrains.kotlinx.dataframe.impl.asList
1921
import org.jetbrains.kotlinx.dataframe.impl.columnName
2022
import org.jetbrains.kotlinx.dataframe.impl.getListType
23+
import org.jetbrains.kotlinx.dataframe.util.SPLIT_STR
2124
import kotlin.reflect.KProperty
2225
import kotlin.reflect.KType
2326
import kotlin.reflect.typeOf
2427

28+
@Interpretable("Split0")
2529
public fun <T, C> DataFrame<T>.split(columns: ColumnsSelector<T, C?>): Split<T, C> = Split(this, columns)
2630

2731
public fun <T> DataFrame<T>.split(vararg columns: String): Split<T, Any> = split { columns.toColumnSet() }
@@ -62,22 +66,27 @@ public typealias ColumnNamesGenerator<C> = ColumnWithPath<C>.(extraColumnIndex:
6266

6367
// region default
6468

69+
@Interpretable("SplitDefault")
6570
public inline fun <T, C : Iterable<R>, reified R> Split<T, C>.default(value: R?): SplitWithTransform<T, C, R> =
6671
by { it }.default(value)
6772

73+
@Deprecated(SPLIT_STR, ReplaceWith("""by(",").default(value)"""))
6874
public fun <T> Split<T, String>.default(value: String?): SplitWithTransform<T, String, String> =
6975
by { it.splitDefault() }.default(value)
7076

77+
@Interpretable("SplitWithTransformDefault")
7178
public fun <T, C, R> SplitWithTransform<T, C, R>.default(value: R?): SplitWithTransform<T, C, R> = copy(default = value)
7279

7380
// endregion
7481

7582
// region by
7683

84+
@Interpretable("ByIterable")
7785
public inline fun <T, C, reified R> Split<T, C>.by(
7886
noinline splitter: DataRow<T>.(C) -> Iterable<R>,
7987
): SplitWithTransform<T, C, R> = by(typeOf<R>(), splitter)
8088

89+
@Interpretable("ByCharDelimiters")
8190
public fun <T, C> Split<T, C>.by(
8291
vararg delimiters: Char,
8392
trim: Boolean = true,
@@ -90,6 +99,22 @@ public fun <T, C> Split<T, C>.by(
9099
}
91100
}
92101

102+
/**
103+
* Example:
104+
* ```
105+
* dataFrameOf("str" to listOf("1 2 3 4"))
106+
* .split("str").by("\s+".toRegex())
107+
* // when the list of explicitly specified columnNames is not long enough (or none at all),
108+
* // names for additional columns are generates
109+
* .into()
110+
* ```
111+
* Result:
112+
* ```
113+
* split1 split2 split3 split4
114+
* 1 2 3 4
115+
* ```
116+
*/
117+
@Interpretable("ByRegex")
93118
public fun <T, C> Split<T, C>.by(
94119
regex: Regex,
95120
trim: Boolean = true,
@@ -101,6 +126,7 @@ public fun <T, C> Split<T, C>.by(
101126
}
102127
}
103128

129+
@Interpretable("ByStringDelimiters")
104130
public fun <T, C> Split<T, C>.by(
105131
vararg delimiters: String,
106132
trim: Boolean = true,
@@ -126,10 +152,34 @@ internal inline fun <T, C, R> Split<T, C>.by(
126152

127153
// region match
128154

155+
/**
156+
* Creates new String columns according to MatchResult [capturing groups](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.text/-match-result/group-values.html),
157+
* excluding the first group which is entire matched String.
158+
* Example:
159+
* ```
160+
* dataFrameOf("str" to listOf("100 ml", "1 L"))
161+
* .split { "str"<String>() }.match("(\d+)\s*(ml|l|L)").into("volume", "unit")
162+
* ```
163+
* Created columns will be nullable if [regex] doesn't match some rows or there are nulls in original column
164+
* Check [Split.by] overload with regex parameter if you're looking to split String value by [Regex] delimiter
165+
*/
166+
@Interpretable("MatchStringRegex")
129167
public fun <T, C : String?> Split<T, C>.match(
130168
@Language("RegExp") regex: String,
131169
): SplitWithTransform<T, C, String?> = match(regex.toRegex())
132170

171+
/**
172+
* Creates new String columns according to MatchResult [capturing groups](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.text/-match-result/group-values.html),
173+
* excluding the first group which is entire matched String.
174+
* Example:
175+
* ```
176+
* dataFrameOf("str" to listOf("100 ml", "1 L"))
177+
* .split { "str"<String>() }.match("(\d+)\s*(ml|l|L)").into("volume", "unit")
178+
* ```
179+
* Created columns will be nullable if [regex] doesn't match some rows or there are nulls in original column
180+
* Check [Split.by][org.jetbrains.kotlinx.dataframe.api.Split.by] overload with regex parameter if you're looking to split String value by [Regex] delimiter
181+
*/
182+
@Interpretable("MatchRegex")
133183
public fun <T, C : String?> Split<T, C>.match(regex: Regex): SplitWithTransform<T, C, String?> =
134184
by {
135185
it?.let {
@@ -171,6 +221,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.into(
171221
vararg otherNames: KProperty<*>,
172222
): DataFrame<T> = into(listOf(firstName.columnName) + otherNames.map { it.columnName })
173223

224+
@Refine
225+
@Interpretable("SplitWithTransformInto0")
174226
public fun <T, C, R> SplitWithTransform<T, C, R>.into(
175227
vararg names: String,
176228
extraNamesGenerator: (ColumnWithPath<C>.(extraColumnIndex: Int) -> String)? = null,
@@ -188,6 +240,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.into(
188240
}
189241
}
190242

243+
@Refine
244+
@Interpretable("SplitIterableInto")
191245
public fun <T, C : Iterable<*>> Split<T, C>.into(
192246
vararg names: String,
193247
extraNamesGenerator: ColumnNamesGenerator<C>? = null,
@@ -199,6 +253,8 @@ public fun <T, C> Split<T, DataFrame<C>>.into(
199253
extraNamesGenerator: ColumnNamesGenerator<DataFrame<C>>? = null,
200254
): DataFrame<T> = by { it.rows() }.into(names.toList(), extraNamesGenerator)
201255

256+
@Refine
257+
@Interpretable("SplitPair")
202258
public fun <T, A, B> Split<T, Pair<A, B>>.into(firstCol: String, secondCol: String): DataFrame<T> =
203259
by { listOf(it.first, it.second) }.into(firstCol, secondCol)
204260

@@ -211,6 +267,7 @@ public inline fun <T, reified A, reified B> Split<T, Pair<A, B>>.into(
211267
secondCol: ColumnAccessor<B>,
212268
): DataFrame<T> = by { listOf(it.first, it.second) }.into(firstCol, secondCol)
213269

270+
@Deprecated(SPLIT_STR, ReplaceWith("""by(",").into(*names, extraNamesGenerator = extraNamesGenerator)"""))
214271
@JvmName("intoTC")
215272
public fun <T> Split<T, String>.into(
216273
vararg names: String,
@@ -226,6 +283,8 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.inward(
226283
extraNamesGenerator: ColumnNamesGenerator<C>? = null,
227284
): DataFrame<T> = copy(inward = true).into(names.toList(), extraNamesGenerator)
228285

286+
@Refine
287+
@Interpretable("SplitWithTransformInward0")
229288
public fun <T, C, R> SplitWithTransform<T, C, R>.inward(
230289
vararg names: String,
231290
extraNamesGenerator: ColumnNamesGenerator<C>? = null,
@@ -272,6 +331,7 @@ public inline fun <T, reified A, reified B> Split<T, Pair<A, B>>.inward(
272331
secondCol: ColumnAccessor<B>,
273332
): DataFrame<T> = by { listOf(it.first, it.second) }.inward(firstCol, secondCol)
274333

334+
@Deprecated(SPLIT_STR, ReplaceWith("""by(",").inward(*names, extraNamesGenerator = extraNamesGenerator)"""))
275335
@JvmName("inwardTC")
276336
public fun <T> Split<T, String>.inward(
277337
vararg names: String,
@@ -282,6 +342,8 @@ public fun <T> Split<T, String>.inward(
282342

283343
// region intoColumns
284344

345+
@Refine
346+
@Interpretable("SplitAnyFrameIntoColumns")
285347
public fun <T, C : AnyFrame> Split<T, C>.intoColumns(): DataFrame<T> =
286348
df.convert(columns).with {
287349
when {
@@ -296,11 +358,15 @@ public fun <T, C : AnyFrame> Split<T, C>.intoColumns(): DataFrame<T> =
296358
// region intoRows
297359

298360
@JvmName("intoRowsTC")
361+
@Refine
362+
@Interpretable("SplitIntoRows")
299363
public inline fun <T, C : Iterable<R>, reified R> Split<T, C>.intoRows(dropEmpty: Boolean = true): DataFrame<T> =
300364
by { it }
301365
.intoRows(dropEmpty)
302366

303367
@JvmName("intoRowsFrame")
368+
@Refine
369+
@Interpretable("SplitAnyFrameRows")
304370
public fun <T, C : AnyFrame> Split<T, C>.intoRows(dropEmpty: Boolean = true): DataFrame<T> =
305371
by { it.rows() }.intoRows(dropEmpty)
306372

@@ -309,6 +375,8 @@ internal inline fun <T, C, R> Convert<T, C?>.splitInplace(
309375
crossinline transform: DataRow<T>.(C) -> Iterable<R>,
310376
) = withRowCellImpl(getListType(type), Infer.None) { if (it == null) emptyList() else transform(it).asList() }
311377

378+
@Refine
379+
@Interpretable("SplitWithTransformIntoRows")
312380
public fun <T, C, R> SplitWithTransform<T, C, R>.intoRows(dropEmpty: Boolean = true): DataFrame<T> {
313381
val paths = df.getColumnPaths(columns).toColumnSet()
314382
return df.convert { paths as ColumnSet<C?> }.splitInplace(tartypeOf, transform).explode(dropEmpty) { paths }
@@ -319,8 +387,12 @@ public fun <T, C, R> SplitWithTransform<T, C, R>.intoRows(dropEmpty: Boolean = t
319387
// region inplace
320388

321389
@JvmName("inplaceTC")
390+
@Refine
391+
@Interpretable("SplitInplace")
322392
public inline fun <T, C : Iterable<R>, reified R> Split<T, C>.inplace(): DataFrame<T> = by { it }.inplace()
323393

394+
@Refine
395+
@Interpretable("SplitWithTransformInplace")
324396
public fun <T, C, R> SplitWithTransform<T, C, R>.inplace(): DataFrame<T> =
325397
df.convert(columns).splitInplace(tartypeOf, transform)
326398

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ internal const val TO_CSV = "toCsv() is deprecated in favor of toCsvStr() in dat
160160
internal const val TO_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.toCsvStr"
161161
internal const val TO_CSV_REPLACE = "this.toCsvStr()"
162162

163+
internal const val SPLIT_STR =
164+
"Please explicitly specify how the String should be split. This shortcut will be removed in version 1.1.0"
165+
163166
// endregion
164167

165168
// region keep across releases

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,6 +2184,14 @@ class DataFrameTests : BaseTest() {
21842184
split["name"] shouldBe typed.name.map { it.toCharArray().toList() }
21852185
}
21862186

2187+
@Test
2188+
fun `split iterable inplace`() {
2189+
val df = dataFrameOf("a" to listOf(listOf(1), null)).split { "a"<List<Int>?>() }.inplace()
2190+
2191+
df["a"].type() shouldBe typeOf<List<Int>>()
2192+
df["a"].values() shouldBe listOf(listOf(1), emptyList())
2193+
}
2194+
21872195
@Test
21882196
fun `split into rows with transform`() {
21892197
val split = typed.split { city }.by { it.toCharArray().toList() }.intoRows()

0 commit comments

Comments
 (0)