Skip to content

Commit 9ca36e8

Browse files
committed
Add castTo to help working with implicitly generated schemas in notebooks and plugin
1 parent 9d5f0a7 commit 9ca36e8

File tree

4 files changed

+73
-0
lines changed

4 files changed

+73
-0
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ public inline fun <reified T> AnyFrame.cast(verify: Boolean = true): DataFrame<T
3333
).cast()
3434
else cast()
3535

36+
public inline fun <reified T> AnyFrame.castTo(
37+
@Suppress("UNUSED_PARAMETER") df: DataFrame<T>,
38+
verify: Boolean = true
39+
): DataFrame<T> {
40+
return cast<T>(verify = verify)
41+
}
42+
3643
public fun <T> AnyRow.cast(): DataRow<T> = this as DataRow<T>
3744

3845
public inline fun <reified T> AnyRow.cast(verify: Boolean = true): DataRow<T> = df().cast<T>(verify)[0]

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@ import org.jetbrains.kotlinx.dataframe.api.at
1414
import org.jetbrains.kotlinx.dataframe.api.by
1515
import org.jetbrains.kotlinx.dataframe.api.byName
1616
import org.jetbrains.kotlinx.dataframe.api.cast
17+
import org.jetbrains.kotlinx.dataframe.api.castTo
1718
import org.jetbrains.kotlinx.dataframe.api.colsOf
1819
import org.jetbrains.kotlinx.dataframe.api.column
1920
import org.jetbrains.kotlinx.dataframe.api.columnGroup
2021
import org.jetbrains.kotlinx.dataframe.api.columnOf
2122
import org.jetbrains.kotlinx.dataframe.api.concat
2223
import org.jetbrains.kotlinx.dataframe.api.convert
2324
import org.jetbrains.kotlinx.dataframe.api.convertTo
25+
import org.jetbrains.kotlinx.dataframe.api.count
2426
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
2527
import org.jetbrains.kotlinx.dataframe.api.default
2628
import org.jetbrains.kotlinx.dataframe.api.dropNulls
@@ -100,6 +102,7 @@ import org.jetbrains.kotlinx.dataframe.explainer.PluginCallbackProxy
100102
import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions
101103
import org.jetbrains.kotlinx.dataframe.impl.api.mapNotNullValues
102104
import org.jetbrains.kotlinx.dataframe.indices
105+
import org.jetbrains.kotlinx.dataframe.io.readJson
103106
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
104107
import org.jetbrains.kotlinx.dataframe.io.renderToString
105108
import org.jetbrains.kotlinx.dataframe.testResource
@@ -1421,4 +1424,33 @@ class Modify : TestBase() {
14211424
| 1 kotlin /kotlin 180
14221425
|""".trimMargin()
14231426
}
1427+
1428+
@DataSchema
1429+
interface ImplicitSchema {
1430+
val perf: Double
1431+
}
1432+
1433+
@Test
1434+
@Ignore
1435+
@Suppress("UNUSED_VARIABLE")
1436+
fun castToGenerateSchema() {
1437+
// SampleStart
1438+
val sample = DataFrame.readJson("sample.json")
1439+
// SampleEnd
1440+
}
1441+
1442+
@Test
1443+
@Suppress("KotlinConstantConditions")
1444+
fun castTo() {
1445+
val sample = dataFrameOf("perf")(10.0, 20.0, 12.0).cast<ImplicitSchema>()
1446+
val files = listOf<String>() // not intended to run
1447+
// SampleStart
1448+
for (file in files) {
1449+
// df here is expected to have the same structure as sample
1450+
val df = DataFrame.readJson(file).castTo(sample)
1451+
val count = df.count { perf > 10.0 }
1452+
println("$file: $count")
1453+
}
1454+
// SampleEnd
1455+
}
14241456
}
28.7 KB
Loading

docs/StardustDocs/topics/cast.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,37 @@ df.cast<Person>()
2626
```
2727

2828
To convert [`DataFrame`](DataFrame.md) columns to match given schema, use [`convertTo`](convertTo.md) operation.
29+
30+
**Reusing implicitly generated schema**
31+
32+
```kotlin
33+
castTo<T>(df: DataFrame<T>)
34+
```
35+
36+
In notebooks, dataframe types are implicitly generated.
37+
38+
![Implicitly generated schema](implicitlyGeneratedSchema.png)
39+
40+
This type can be referred to, but its name will change whenever you re-execute cells.
41+
Here how you can do it in a more robust way:
42+
43+
<!---FUN castToGenerateSchema-->
44+
45+
```kotlin
46+
val sample = DataFrame.readJson("sample.json")
47+
```
48+
49+
<!---END-->
50+
51+
<!---FUN castTo-->
52+
53+
```kotlin
54+
for (file in files) {
55+
// df here is expected to have the same structure as sample
56+
val df = DataFrame.readJson(file).castTo(sample)
57+
val count = df.count { perf > 10.0 }
58+
println("$file: $count")
59+
}
60+
```
61+
62+
<!---END-->

0 commit comments

Comments
 (0)