|
| 1 | +package org.jetbrains.kotlinx.dataframe.examples.plugin |
| 2 | + |
| 3 | +import org.jetbrains.kotlinx.dataframe.DataFrame |
| 4 | +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema |
| 5 | +import org.jetbrains.kotlinx.dataframe.api.add |
| 6 | +import org.jetbrains.kotlinx.dataframe.api.convert |
| 7 | +import org.jetbrains.kotlinx.dataframe.api.convertTo |
| 8 | +import org.jetbrains.kotlinx.dataframe.api.filter |
| 9 | +import org.jetbrains.kotlinx.dataframe.api.into |
| 10 | +import org.jetbrains.kotlinx.dataframe.api.rename |
| 11 | +import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase |
| 12 | +import org.jetbrains.kotlinx.dataframe.api.with |
| 13 | +import org.jetbrains.kotlinx.dataframe.io.readCsv |
| 14 | +import org.jetbrains.kotlinx.dataframe.io.writeCsv |
| 15 | +import java.net.URL |
| 16 | + |
| 17 | +// Declare data schema for the DataFrame from jetbrains_repositories.csv. |
| 18 | +@DataSchema |
| 19 | +data class Repositories( |
| 20 | + val full_name: String, |
| 21 | + val html_url: URL, |
| 22 | + val stargazers_count: Int, |
| 23 | + val topics: String, |
| 24 | + val watchers: Int, |
| 25 | +) |
| 26 | + |
| 27 | +// Define kinds of repositories. |
| 28 | +enum class RepoKind { |
| 29 | + Kotlin, |
| 30 | + IntelliJ, |
| 31 | + Other, |
| 32 | +} |
| 33 | + |
| 34 | +// A rule for determining the kind of repository based on its name and topics. |
| 35 | +fun getKind(fullName: String, topics: List<String>): RepoKind { |
| 36 | + fun checkContains(name: String) = name in topics || fullName.lowercase().contains(name) |
| 37 | + |
| 38 | + return when { |
| 39 | + checkContains("kotlin") -> RepoKind.Kotlin |
| 40 | + checkContains("idea") || checkContains("intellij") -> RepoKind.IntelliJ |
| 41 | + else -> RepoKind.Other |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +fun main() { |
| 46 | + val repos = DataFrame |
| 47 | + // Read DataFrame from the CSV file. |
| 48 | + .readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv") |
| 49 | + // And convert it to match the `Repositories` schema. |
| 50 | + .convertTo<Repositories>() |
| 51 | + |
| 52 | + // With Compiler Plugin, the DataFrame schema changes immediately after each operation: |
| 53 | + // For example, if a new column is added or the old one is renamed (or its type is changed) |
| 54 | + // during the operation, you can use the new name immediately in the following operations: |
| 55 | + repos |
| 56 | + // Add a new "name" column... |
| 57 | + .add("name") { full_name.substringAfterLast("/") } |
| 58 | + // ... and now we can use "name" extension in DataFrame operations, such as `filter`. |
| 59 | + .filter { name.lowercase().contains("kotlin") } |
| 60 | + |
| 61 | + // Let's update the DataFrame with some operations using these features. |
| 62 | + val reposUpdated = repos |
| 63 | + // Rename columns to CamelCase. |
| 64 | + // Note that after that, in the following operations, extension properties will have |
| 65 | + // new names corresponding to the column names. |
| 66 | + .renameToCamelCase() |
| 67 | + // Rename "stargazersCount" column to "stars". |
| 68 | + .rename { stargazersCount }.into("stars") |
| 69 | + // And we can immediately use the updated name in the filtering. |
| 70 | + .filter { stars > 50 } |
| 71 | + // Convert values in the "topic" column (which were `String` initially) |
| 72 | + // to the list of topics. |
| 73 | + .convert { topics }.with { |
| 74 | + val inner = it.removeSurrounding("[", "]") |
| 75 | + if (inner.isEmpty()) emptyList() else inner.split(',').map(String::trim) |
| 76 | + } |
| 77 | + // Now "topics" is a `List<String>` column. |
| 78 | + // Add a new column with the number of topics. |
| 79 | + .add("topicCount") { topics.size } |
| 80 | + // Add a new column with the kind of repository. |
| 81 | + .add("kind") { getKind(fullName, topics) } |
| 82 | + |
| 83 | + // Write the updated DataFrame to a CSV file. |
| 84 | + reposUpdated.writeCsv("jetbrains_repositories_new.csv") |
| 85 | + |
| 86 | + // TODO: Add Kandy Plot |
| 87 | + // reposUpdated.groupBy { kind }.max { stargazersCount }.plot { |
| 88 | + // bars { |
| 89 | + // x(kind) |
| 90 | + // y(stargazersCount) |
| 91 | + // } |
| 92 | + // } |
| 93 | +} |
0 commit comments