update example to make it easier to copy-paste and understand

koperagen · koperagen · commit 9007afe31029 · 2024-01-22T14:18:38.000+02:00
diff --git a/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithColumnAccessor.kt b/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithColumnAccessor.kt
@@ -3,16 +3,25 @@ package org.jetbrains.kotlinx.dataframe.examples.movies
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.api.*
 import org.jetbrains.kotlinx.dataframe.api.column
-import org.jetbrains.kotlinx.dataframe.io.read
+import org.jetbrains.kotlinx.dataframe.io.*
 
 private const val pathToCsv = "examples/idea-examples/movies/src/main/resources/movies.csv"
+// Uncomment this line if you want to copy-paste and run the code in your project without downloading the file
+//private const val pathToCsv = "https://raw.githubusercontent.com/Kotlin/dataframe/master/examples/idea-examples/movies/src/main/resources/movies.csv"
 
 fun main() {
+    // This example shows how to use the column accessor API to address columns in different operations
+    // https://kotlin.github.io/dataframe/apilevels.html
     val genres by column<String>()
     val title by column<String>()
     val year by column<Int>()
 
-    DataFrame
+    /**
+     *                              movieId                                    title                              genres
+     *   0 9b30aff7943f44579e92c261f3adc193                    Women in Black (1997)          Fantasy|Suspenseful|Comedy
+     *   1 2a1ba1fc5caf492a80188e032995843e                   Bumblebee Movie (2007)        Comedy|Jazz|Family|Animation
+     */
+    val step1 = DataFrame
         .read(pathToCsv)
         .split { genres }.by("|").inplace()
         .split { title }.by {
@@ -22,12 +31,30 @@ fun main() {
             )
         }.into(title, year)
         .explode { genres }
-        .filter { year() >= 0 && genres() != "(no genres listed)" }
+    step1.print()
+
+    /**
+     * Data is parsed and prepared for aggregation
+     *                             movieId                                    title year      genres
+     *   0 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997     Fantasy
+     *   1 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997 Suspenseful
+     *   2 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997      Comedy
+     *   3 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007      Comedy
+     *   4 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007        Jazz
+     *   5 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007      Family
+     *   6 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007   Animation
+     */
+
+    val step2 = step1.filter { year() >= 0 && genres() != "(no genres listed)" }
         .groupBy { year }
         .sortBy { year }
         .pivot(inward = false) { genres }
         .aggregate {
             count() into "count"
-            mean() into "mean"
-        }.print(10)
+            title().first() into "example"
+        }
+    step2.print(rowsLimit = 10)
+
+//    Discover the final reshaped data in an interactive HTML table
+//    step2.toStandaloneHTML().openInBrowser()
 }
diff --git a/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithDataClass.kt b/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithDataClass.kt
@@ -2,15 +2,23 @@ package org.jetbrains.kotlinx.dataframe.examples.movies
 
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.api.*
-import org.jetbrains.kotlinx.dataframe.io.read
+import org.jetbrains.kotlinx.dataframe.io.*
 
 private const val pathToCsv = "examples/idea-examples/movies/src/main/resources/movies.csv"
+// Uncomment this line if you want to copy-paste and run the code in your project without downloading the file
+//private const val pathToCsv = "https://raw.githubusercontent.com/Kotlin/dataframe/master/examples/idea-examples/movies/src/main/resources/movies.csv"
 
 fun main() {
-
+    // This example shows how to use the KProperties API to address columns in different operations
+    // https://kotlin.github.io/dataframe/apilevels.html
     data class Movie(val movieId: String, val title: String, val genres: String, val year: Int)
 
-    DataFrame
+    /**
+     *                              movieId                                    title                              genres
+     *   0 9b30aff7943f44579e92c261f3adc193                    Women in Black (1997)          Fantasy|Suspenseful|Comedy
+     *   1 2a1ba1fc5caf492a80188e032995843e                   Bumblebee Movie (2007)        Comedy|Jazz|Family|Animation
+     */
+    val step1 = DataFrame
         .read(pathToCsv)
         .split(Movie::genres).by("|").inplace()
         .split(Movie::title).by {
@@ -20,6 +28,19 @@ fun main() {
             )
         }.into(Movie::title, Movie::year)
         .explode(Movie::genres)
+
+    /**
+     * Data is parsed and prepared for aggregation
+     *                             movieId                                    title year      genres
+     *   0 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997     Fantasy
+     *   1 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997 Suspenseful
+     *   2 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997      Comedy
+     *   3 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007      Comedy
+     *   4 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007        Jazz
+     *   5 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007      Family
+     *   6 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007   Animation
+     */
+    val step2 = step1
         .filter { it[Movie::year] >= 0 && it[Movie::genres] != "(no genres listed)" }
         .groupBy(Movie::year)
         .sortBy(Movie::year)
@@ -28,4 +49,7 @@ fun main() {
             count() into "count"
             mean() into "mean"
         }.print(10)
+
+//    Discover the final reshaped data in an interactive HTML table
+//    step2.toStandaloneHTML().openInBrowser()
 }
diff --git a/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithInterface.kt b/examples/idea-examples/movies/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/movies/moviesWithInterface.kt
@@ -2,21 +2,14 @@ package org.jetbrains.kotlinx.dataframe.examples.movies
 
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
-import org.jetbrains.kotlinx.dataframe.api.by
-import org.jetbrains.kotlinx.dataframe.api.convertTo
-import org.jetbrains.kotlinx.dataframe.api.count
-import org.jetbrains.kotlinx.dataframe.api.explode
-import org.jetbrains.kotlinx.dataframe.api.filter
-import org.jetbrains.kotlinx.dataframe.api.groupBy
-import org.jetbrains.kotlinx.dataframe.api.inplace
-import org.jetbrains.kotlinx.dataframe.api.into
-import org.jetbrains.kotlinx.dataframe.api.mean
-import org.jetbrains.kotlinx.dataframe.api.pivot
-import org.jetbrains.kotlinx.dataframe.api.print
-import org.jetbrains.kotlinx.dataframe.api.sortBy
-import org.jetbrains.kotlinx.dataframe.api.split
-import org.jetbrains.kotlinx.dataframe.io.read
+import org.jetbrains.kotlinx.dataframe.api.*
+import org.jetbrains.kotlinx.dataframe.io.*
 
+/**
+ *                              movieId                                    title                              genres
+ *   0 9b30aff7943f44579e92c261f3adc193                    Women in Black (1997)          Fantasy|Suspenseful|Comedy
+ *   1 2a1ba1fc5caf492a80188e032995843e                   Bumblebee Movie (2007)        Comedy|Jazz|Family|Animation
+ */
 @DataSchema
 interface Movie {
     val movieId: String
@@ -25,9 +18,16 @@ interface Movie {
 }
 
 private const val pathToCsv = "examples/idea-examples/movies/src/main/resources/movies.csv"
+// Uncomment this line if you want to copy-paste and run the code in your project without downloading the file
+//private const val pathToCsv = "https://raw.githubusercontent.com/Kotlin/dataframe/master/examples/idea-examples/movies/src/main/resources/movies.csv"
 
 fun main() {
-    DataFrame
+    // This example shows how to the use extension properties API to address columns in different operations
+    // https://kotlin.github.io/dataframe/apilevels.html
+
+    // Add the Gradle plugin and run `assemble`
+    // check the README https://github.com/Kotlin/dataframe?tab=readme-ov-file#setup
+    val step1 = DataFrame
         .read(pathToCsv).convertTo<Movie>()
         .split { genres }.by("|").inplace()
         .split { title }.by {
@@ -37,12 +37,30 @@ fun main() {
             )
         }.into("title", "year")
         .explode("genres")
+    step1.print()
+
+    /**
+     * Data is parsed and prepared for aggregation
+     *                             movieId                                    title year      genres
+     *   0 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997     Fantasy
+     *   1 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997 Suspenseful
+     *   2 9b30aff7943f44579e92c261f3adc193                           Women in Black 1997      Comedy
+     *   3 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007      Comedy
+     *   4 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007        Jazz
+     *   5 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007      Family
+     *   6 2a1ba1fc5caf492a80188e032995843e                          Bumblebee Movie 2007   Animation
+     */
+    val step2 = step1
         .filter { "year"<Int>() >= 0 && genres != "(no genres listed)" }
         .groupBy("year")
         .sortBy("year")
         .pivot("genres", inward = false)
         .aggregate {
             count() into "count"
             mean() into "mean"
-        }.print(10)
+        }
+
+    step2.print(10)
+//    Discover the final reshaped data in an interactive HTML table
+//    step2.toStandaloneHTML().openInBrowser()
 }