Skip to content

Commit 161c4fd

Browse files
committed
Rename DataFrame.map overloads into DataFrame.mapToColumns and DataFrame.mapToFrame. Add DataFrame.map that returns List. Add name argument to DataColumn.map.
1 parent a744649 commit 161c4fd

File tree

17 files changed

+158
-70
lines changed

17 files changed

+158
-70
lines changed

docs/StardustDocs/topics/map.md

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,23 @@
22

33
<!---IMPORT org.jetbrains.kotlinx.dataframe.samples.api.Modify-->
44

5-
Creates `DataFrame` or `DataColumn` with values computed from rows of original `DataFrame`.
5+
Creates `List`, [DataFrame](DataFrame.md) or [DataColumn](DataColumn.md) with values computed from rows of original `DataFrame`.
6+
7+
**Map into `List`:**
8+
9+
```text
10+
map { transform }: List<T>
11+
12+
transform: (DataRow) -> T
13+
```
14+
15+
<!---FUN map-->
16+
17+
```kotlin
18+
df.map { 2021 - it.age }
19+
```
20+
21+
<!---END-->
622

723
**Map into `DataColumn`:**
824

@@ -12,6 +28,34 @@ map(columnName) { rowExpression }: DataColumn
1228
rowExpression: DataRow.(DataRow) -> Value
1329
```
1430

31+
<!---FUN mapToColumn-->
32+
<tabs>
33+
<tab title="Properties">
34+
35+
```kotlin
36+
df.mapToColumn("year of birth") { 2021 - age }
37+
```
38+
39+
</tab>
40+
<tab title="Accessors">
41+
42+
```kotlin
43+
val age by column<Int>()
44+
val yearOfBirth by column<Int>("year of birth")
45+
46+
df.mapToColumn(yearOfBirth) { 2021 - age }
47+
```
48+
49+
</tab>
50+
<tab title="Strings">
51+
52+
```kotlin
53+
df.mapToColumn("year of birth") { 2021 - "age"<Int>() }
54+
```
55+
56+
</tab></tabs>
57+
<!---END-->
58+
1559
See [row expressions](DataRow.md#row-expressions)
1660

1761
**Map into `DataFrame`:**
@@ -31,7 +75,7 @@ columnMapping = column into columnName | columnName from column | columnName fro
3175
<tab title="Properties">
3276

3377
```kotlin
34-
df.map {
78+
df.mapToFrame {
3579
"year of birth" from 2021 - age
3680
age gt 18 into "is adult"
3781
name.lastName.length() into "last name length"
@@ -54,7 +98,7 @@ val firstName by name.column<String>()
5498
val lastName by name.column<String>()
5599
val city by column<String?>()
56100

57-
df.map {
101+
df.mapToFrame {
58102
yob from 2021 - age
59103
age gt 18 into isAdult
60104
lastName.length() into lastNameLength
@@ -67,7 +111,7 @@ df.map {
67111
<tab title="Strings">
68112

69113
```kotlin
70-
df.map {
114+
df.mapToFrame {
71115
"year of birth" from 2021 - "age"<Int>()
72116
"age"<Int>() gt 18 into "is adult"
73117
"name"["lastName"]<String>().length() into "last name length"

examples/jupyter-notebooks/github/github.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,7 @@
12921292
}
12931293
],
12941294
"source": [
1295-
"val df = jbRepos.map { \n",
1295+
"val df = jbRepos.mapToFrame { \n",
12961296
" \"id\" from { id }\n",
12971297
" +name\n",
12981298
" \"total\" from { contributors.count() }\n",
@@ -1584,7 +1584,7 @@
15841584
}
15851585
],
15861586
"source": [
1587-
"organizations.map(\"most_starred_repo\") {\n",
1587+
"organizations.mapToColumn(\"most_starred_repo\") {\n",
15881588
" it.repos.maxBy { stargazers_count }\n",
15891589
"}"
15901590
]
@@ -1693,7 +1693,7 @@
16931693
}
16941694
],
16951695
"source": [
1696-
"organizations.map(\"most_starred_repo\") {\n",
1696+
"organizations.mapToColumn(\"most_starred_repo\") {\n",
16971697
" // Alternative syntax\n",
16981698
" // this.repos.maxBy { stargazers_count }\n",
16991699
" // it.repos.maxBy { stargazers_count }\n",
@@ -1800,7 +1800,7 @@
18001800
}
18011801
],
18021802
"source": [
1803-
"organizations.map(\"most_starred_repo\") {\n",
1803+
"organizations.mapToColumn(\"most_starred_repo\") {\n",
18041804
" repos.select { name and html_url and stargazers_count }.maxBy { stargazers_count }\n",
18051805
"}"
18061806
]
@@ -1858,7 +1858,7 @@
18581858
}
18591859
],
18601860
"source": [
1861-
"organizations.map { \n",
1861+
"organizations.mapToFrame { \n",
18621862
" \"most_starred_repo\" from { \n",
18631863
" repos\n",
18641864
" .select { name and html_url and stargazers_count }\n",
@@ -1979,7 +1979,7 @@
19791979
}
19801980
],
19811981
"source": [
1982-
"organizationsWithMinifiedRepos.map { \n",
1982+
"organizationsWithMinifiedRepos.mapToFrame { \n",
19831983
" \"most_starred_repo\" from { \n",
19841984
" repos.maxBy { stargazers_count } \n",
19851985
" }\n",
@@ -2042,7 +2042,7 @@
20422042
}
20432043
],
20442044
"source": [
2045-
"organizationsWithMinifiedRepos.map(\"interesting_repos\") {\n",
2045+
"organizationsWithMinifiedRepos.mapToColumn(\"interesting_repos\") {\n",
20462046
" val sorted = repos.sortByDesc { stargazers_count }\n",
20472047
" sorted.take(5).concat(sorted.takeLast(5))\n",
20482048
"}"

examples/jupyter-notebooks/puzzles/40 puzzles.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,7 +1673,7 @@
16731673
}
16741674
],
16751675
"source": [
1676-
"df.map(\"res\") { \n",
1676+
"df.mapToColumn(\"res\") { \n",
16771677
" namedValuesOf<Double>()\n",
16781678
" .filter { it.value.isNaN() }.drop(2)\n",
16791679
" .firstOrNull()?.name \n",
@@ -2021,7 +2021,7 @@
20212021
}
20222022
],
20232023
"source": [
2024-
"df.map(\"Y\") {\n",
2024+
"df.mapToColumn(\"Y\") {\n",
20252025
" if(it.X == 0) 0 else (prev()?.new() ?: 0) + 1\n",
20262026
"}"
20272027
]
@@ -3118,4 +3118,4 @@
31183118
},
31193119
"nbformat": 4,
31203120
"nbformat_minor": 1
3121-
}
3121+
}

src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import org.jetbrains.kotlinx.dataframe.api.asDataColumn
55
import org.jetbrains.kotlinx.dataframe.api.cast
66
import org.jetbrains.kotlinx.dataframe.api.concat
77
import org.jetbrains.kotlinx.dataframe.api.filter
8-
import org.jetbrains.kotlinx.dataframe.api.map
8+
import org.jetbrains.kotlinx.dataframe.api.mapToFrame
99
import org.jetbrains.kotlinx.dataframe.api.schema
1010
import org.jetbrains.kotlinx.dataframe.api.take
1111
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ public inline fun <T, reified R> ColumnsSelectionDsl<T>.expr(
341341
name: String = "",
342342
infer: Infer = Infer.Nulls,
343343
noinline expression: AddExpression<T, R>
344-
): DataColumn<R> = map(name, infer, expression)
344+
): DataColumn<R> = mapToColumn(name, infer, expression)
345345

346346
internal fun <T, C> ColumnsSelector<T, C>.filter(predicate: (ColumnWithPath<C>) -> Boolean): ColumnsSelector<T, C> =
347347
{ this@filter(it, it).filter(predicate) }

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dataframe.ColumnsContainer
99
import org.jetbrains.kotlinx.dataframe.DataFrame
1010
import org.jetbrains.kotlinx.dataframe.DataRow
1111
import org.jetbrains.kotlinx.dataframe.RowExpression
12+
import org.jetbrains.kotlinx.dataframe.Selector
1213
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
1314
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
1415
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
@@ -32,21 +33,21 @@ internal class AddDataRowImpl<T>(index: Int, owner: DataFrame<T>, private val co
3233
override fun <C> AnyRow.new() = container[index] as C
3334
}
3435

35-
public typealias AddExpression<T, C> = AddDataRow<T>.(AddDataRow<T>) -> C
36+
public typealias AddExpression<T, C> = Selector<AddDataRow<T>, C>
3637

3738
public inline fun <reified R, T> DataFrame<T>.add(
3839
name: String,
3940
infer: Infer = Infer.Nulls,
4041
noinline expression: AddExpression<T, R>
4142
): DataFrame<T> =
42-
(this + map(name, infer, expression))
43+
(this + mapToColumn(name, infer, expression))
4344

4445
public inline fun <reified R, T> DataFrame<T>.add(
4546
property: KProperty<R>,
4647
infer: Infer = Infer.Nulls,
4748
noinline expression: AddExpression<T, R>
4849
): DataFrame<T> =
49-
(this + map(property, infer, expression))
50+
(this + mapToColumn(property, infer, expression))
5051

5152
public inline fun <reified R, T> DataFrame<T>.add(
5253
column: ColumnAccessor<R>,
@@ -60,7 +61,7 @@ public inline fun <reified R, T> DataFrame<T>.add(
6061
infer: Infer = Infer.Nulls,
6162
noinline expression: AddExpression<T, R>
6263
): DataFrame<T> {
63-
val col = map(path.name(), infer, expression)
64+
val col = mapToColumn(path.name(), infer, expression)
6465
if (path.size == 1) return this + col
6566
return insertImpl(path, col)
6667
}
@@ -103,7 +104,7 @@ public class AddDsl<T>(@PublishedApi internal val df: DataFrame<T>) : ColumnsCon
103104
name: String,
104105
infer: Infer = Infer.Nulls,
105106
noinline expression: RowExpression<T, R>
106-
): Boolean = add(df.map(name, infer, expression))
107+
): Boolean = add(df.mapToColumn(name, infer, expression))
107108

108109
public inline infix fun <reified R> String.from(noinline expression: RowExpression<T, R>): Boolean = add(this, Infer.Nulls, expression)
109110

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,11 @@ public fun DataColumn<Long>.convertToLocalDate(zone: TimeZone = defaultTimeZone)
180180
public fun DataColumn<Long?>.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn<LocalDate?> = map { it?.toLocalDate(zone) }
181181

182182
@JvmName("convertToLocalDateFromInt")
183-
public fun DataColumn<Int>.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn<LocalDate> = map { it.toLong().toLocalDate(zone) }
183+
public fun DataColumn<Int>.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn<LocalDate> =
184+
map { it.toLong().toLocalDate(zone) }
184185
@JvmName("convertToLocalDateFromIntNullable")
185-
public fun DataColumn<Int?>.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn<LocalDate?> = map { it?.toLong()?.toLocalDate(zone) }
186+
public fun DataColumn<Int?>.convertToLocalDate(zone: TimeZone = defaultTimeZone): DataColumn<LocalDate?> =
187+
map { it?.toLong()?.toLocalDate(zone) }
186188

187189
@JvmName("convertToLocalDateFromString")
188190
public fun DataColumn<String>.convertToLocalDate(pattern: String? = null, locale: Locale? = null): DataColumn<LocalDate> {
@@ -213,9 +215,11 @@ public fun DataColumn<Long>.convertToLocalTime(zone: TimeZone = defaultTimeZone)
213215
public fun DataColumn<Long?>.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalTime?> = map { it?.toLocalTime(zone) }
214216

215217
@JvmName("convertToLocalTimeFromInt")
216-
public fun DataColumn<Int>.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalTime> = map { it.toLong().toLocalTime(zone) }
218+
public fun DataColumn<Int>.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalTime> =
219+
map { it.toLong().toLocalTime(zone) }
217220
@JvmName("convertToLocalTimeIntNullable")
218-
public fun DataColumn<Int?>.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalTime?> = map { it?.toLong()?.toLocalTime(zone) }
221+
public fun DataColumn<Int?>.convertToLocalTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalTime?> =
222+
map { it?.toLong()?.toLocalTime(zone) }
219223

220224
@JvmName("convertToLocalTimeFromString")
221225
public fun DataColumn<String>.convertToLocalTime(pattern: String? = null, locale: Locale? = null): DataColumn<LocalTime> {
@@ -246,14 +250,18 @@ public fun DataColumn<Long>.convertToLocalDateTime(zone: TimeZone = defaultTimeZ
246250
public fun DataColumn<Long?>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime?> = map { it?.toLocalDateTime(zone) }
247251

248252
@JvmName("convertToLocalDateTimeFromInstant")
249-
public fun DataColumn<Instant>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime> = map { it.toLocalDateTime(zone) }
253+
public fun DataColumn<Instant>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime> =
254+
map { it.toLocalDateTime(zone) }
250255
@JvmName("convertToLocalDateTimeFromInstantNullable")
251-
public fun DataColumn<Instant?>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime?> = map { it?.toLocalDateTime(zone) }
256+
public fun DataColumn<Instant?>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime?> =
257+
map { it?.toLocalDateTime(zone) }
252258

253259
@JvmName("convertToLocalDateTimeFromInt")
254-
public fun DataColumn<Int>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime> = map { it.toLong().toLocalDateTime(zone) }
260+
public fun DataColumn<Int>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime> =
261+
map { it.toLong().toLocalDateTime(zone) }
255262
@JvmName("convertToLocalDateTimeFromIntNullable")
256-
public fun DataColumn<Int?>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime?> = map { it?.toLong()?.toLocalDateTime(zone) }
263+
public fun DataColumn<Int?>.convertToLocalDateTime(zone: TimeZone = defaultTimeZone): DataColumn<LocalDateTime?> =
264+
map { it?.toLong()?.toLocalDateTime(zone) }
257265

258266
@JvmName("convertToLocalDateTimeFromString")
259267
public fun DataColumn<String>.convertToLocalDateTime(pattern: String? = null, locale: Locale? = null): DataColumn<LocalDateTime> {

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/insert.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public inline fun <T, reified R> DataFrame<T>.insert(
1717
name: String,
1818
infer: Infer = Infer.Nulls,
1919
noinline expression: RowExpression<T, R>
20-
): InsertClause<T> = insert(map(name, infer, expression))
20+
): InsertClause<T> = insert(mapToColumn(name, infer, expression))
2121

2222
public inline fun <T, reified R> DataFrame<T>.insert(
2323
column: ColumnAccessor<R>,

src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,17 @@ public inline fun <C, reified R> ColumnReference<C>.map(infer: Infer = Infer.Nul
2626
// region DataColumn
2727

2828
public inline fun <T, reified R> DataColumn<T>.map(
29+
name: String = name(),
2930
infer: Infer = Infer.Nulls,
3031
crossinline transform: (T) -> R
3132
): DataColumn<R> {
3233
val newValues = Array(size()) { transform(get(it)) }.asList()
33-
return DataColumn.create(name(), newValues, typeOf<R>(), infer)
34+
return DataColumn.create(name, newValues, typeOf<R>(), infer)
3435
}
3536

36-
public fun <T, R> DataColumn<T>.mapTo(
37+
public fun <T, R> DataColumn<T>.map(
3738
type: KType,
39+
name: String = name(),
3840
infer: Infer = Infer.Nulls,
3941
transform: (T) -> R
4042
): DataColumn<R> {
@@ -46,29 +48,52 @@ public fun <T, R> DataColumn<T>.mapTo(
4648

4749
// region DataFrame
4850

49-
public fun <T> DataFrame<T>.map(body: AddDsl<T>.() -> Unit): AnyFrame {
50-
val dsl = AddDsl(this)
51-
body(dsl)
52-
return dataFrameOf(dsl.columns)
53-
}
51+
public fun <T, R> DataFrame<T>.map(transform: (DataRow<T>) -> R): List<R> = rows().map(transform)
5452

55-
public inline fun <T, reified R> ColumnsContainer<T>.map(
53+
public inline fun <T, reified R> ColumnsContainer<T>.mapToColumn(
5654
name: String,
5755
infer: Infer = Infer.Nulls,
5856
noinline body: AddExpression<T, R>
59-
): DataColumn<R> = newColumn(typeOf<R>(), name, infer, body)
57+
): DataColumn<R> = mapToColumn(name, typeOf<R>(), infer, body)
6058

61-
public inline fun <T, reified R> ColumnsContainer<T>.map(
59+
public inline fun <T, reified R> ColumnsContainer<T>.mapToColumn(
6260
column: ColumnAccessor<R>,
6361
infer: Infer = Infer.Nulls,
6462
noinline body: AddExpression<T, R>
65-
): DataColumn<R> = map(column.name(), infer, body)
63+
): DataColumn<R> = mapToColumn(column, typeOf<R>(), infer, body)
6664

67-
public inline fun <T, reified R> ColumnsContainer<T>.map(
65+
public inline fun <T, reified R> ColumnsContainer<T>.mapToColumn(
6866
column: KProperty<R>,
6967
infer: Infer = Infer.Nulls,
7068
noinline body: AddExpression<T, R>
71-
): DataColumn<R> = map(column.columnName, infer, body)
69+
): DataColumn<R> = mapToColumn(column, typeOf<R>(), infer, body)
70+
71+
public fun <T, R> ColumnsContainer<T>.mapToColumn(
72+
name: String,
73+
type: KType,
74+
infer: Infer = Infer.Nulls,
75+
body: AddExpression<T, R>
76+
): DataColumn<R> = newColumn(type, name, infer, body)
77+
78+
public fun <T, R> ColumnsContainer<T>.mapToColumn(
79+
column: ColumnAccessor<R>,
80+
type: KType,
81+
infer: Infer = Infer.Nulls,
82+
body: AddExpression<T, R>
83+
): DataColumn<R> = mapToColumn(column.name(), type, infer, body)
84+
85+
public fun <T, R> ColumnsContainer<T>.mapToColumn(
86+
column: KProperty<R>,
87+
type: KType,
88+
infer: Infer = Infer.Nulls,
89+
body: AddExpression<T, R>
90+
): DataColumn<R> = mapToColumn(column.columnName, type, infer, body)
91+
92+
public fun <T> DataFrame<T>.mapToFrame(body: AddDsl<T>.() -> Unit): AnyFrame {
93+
val dsl = AddDsl(this)
94+
body(dsl)
95+
return dataFrameOf(dsl.columns)
96+
}
7297

7398
// endregion
7499

0 commit comments

Comments
 (0)