Skip to content

Commit fd2554c

Browse files
authored
Merge pull request #763 from Kotlin/generate-data-classes
Improve codegen for stdlib <-> df interop workflow
2 parents 4f55ebe + 9296544 commit fd2554c

File tree

17 files changed

+820
-556
lines changed

17 files changed

+820
-556
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import org.jetbrains.dataframe.impl.codeGen.CodeGenerator
4+
import org.jetbrains.kotlinx.dataframe.DataFrame
5+
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
6+
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
7+
import org.jetbrains.kotlinx.dataframe.impl.codeGen.from
8+
9+
public inline fun <reified T> DataFrame<T>.generateCode(
10+
fields: Boolean = true,
11+
extensionProperties: Boolean = true,
12+
): CodeString {
13+
val name = markerName<T>()
14+
return generateCode(name, fields, extensionProperties)
15+
}
16+
17+
public fun <T> DataFrame<T>.generateCode(
18+
markerName: String,
19+
fields: Boolean = true,
20+
extensionProperties: Boolean = true,
21+
visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC,
22+
): CodeString {
23+
val codeGen = CodeGenerator.create()
24+
return codeGen.generate(
25+
schema = schema(),
26+
name = markerName,
27+
fields = fields,
28+
extensionProperties = extensionProperties,
29+
isOpen = true,
30+
visibility = visibility,
31+
).code.declarations.toCodeString()
32+
}
33+
34+
public inline fun <reified T> DataFrame<T>.generateInterfaces(): CodeString = generateCode(
35+
fields = true,
36+
extensionProperties = false
37+
)
38+
39+
public inline fun <reified T> DataFrame<T>.generateDataClasses(
40+
markerName: String? = null,
41+
extensionProperties: Boolean = false,
42+
visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC,
43+
useFqNames: Boolean = false,
44+
nameNormalizer: NameNormalizer = NameNormalizer.default,
45+
): CodeString {
46+
val name = markerName ?: markerName<T>()
47+
val codeGen = CodeGenerator.create(useFqNames)
48+
return codeGen.generate(
49+
schema = schema(),
50+
name = name,
51+
fields = true,
52+
extensionProperties = extensionProperties,
53+
isOpen = false,
54+
visibility = visibility,
55+
asDataClass = true,
56+
fieldNameNormalizer = nameNormalizer
57+
).code.declarations.toCodeString()
58+
}
59+
60+
@PublishedApi
61+
internal inline fun <reified T> markerName(): String = if (T::class.isAbstract) {
62+
T::class.simpleName!!
63+
} else "DataEntry"
64+
65+
public fun <T> DataFrame<T>.generateInterfaces(markerName: String): CodeString = generateCode(
66+
markerName = markerName,
67+
fields = true,
68+
extensionProperties = false
69+
)
70+
71+
/**
72+
* Converts delimited 'my_name', 'my name', etc., String to camelCase 'myName'
73+
*/
74+
public val NameNormalizer.Companion.default: NameNormalizer get() = NameNormalizer.from(setOf('\t', ' ', '_'))
75+
76+
@JvmInline
77+
public value class CodeString(public val value: String) {
78+
override fun toString(): String = value
79+
}
80+
81+
@PublishedApi
82+
internal fun String.toCodeString(): CodeString = CodeString(this)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/print.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,5 @@ public fun <T, G> GroupBy<T, G>.print(): Unit = println(this)
4242
public fun DataFrameSchema.print(): Unit = println(this)
4343

4444
// endregion
45+
46+
public fun CodeString.print(): Unit = println(this)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public interface CodeGenerator : ExtensionsCodeGenerator {
3636
knownMarkers: Iterable<Marker> = emptyList(),
3737
readDfMethod: DefaultReadDfMethod? = null,
3838
fieldNameNormalizer: NameNormalizer = NameNormalizer.id(),
39+
asDataClass: Boolean = false
3940
): CodeGenResult
4041

4142
public fun generate(

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/GeneratedField.kt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@ import org.jetbrains.kotlinx.dataframe.impl.codeGen.needsQuoting
55
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
66

77
public sealed interface FieldType {
8-
public class ValueFieldType(public val typeFqName: String) : FieldType
9-
public class FrameFieldType(public val markerName: String, public val nullable: Boolean) : FieldType
10-
public class GroupFieldType(public val markerName: String) : FieldType
8+
public data class ValueFieldType(public val typeFqName: String) : FieldType
9+
public data class FrameFieldType(
10+
public val markerName: String,
11+
public val nullable: Boolean,
12+
public val renderAsList: Boolean
13+
) : FieldType
14+
public data class GroupFieldType(public val markerName: String, public val renderAsObject: Boolean) : FieldType
1115
}
1216

1317
/**
@@ -36,8 +40,8 @@ private fun String.toNullable() = if (this.last() == '?' || this == "*") this el
3640
public fun FieldType.toNullable(): FieldType =
3741
if (isNotNullable()) {
3842
when (this) {
39-
is FieldType.FrameFieldType -> FieldType.FrameFieldType(markerName.toNullable(), nullable)
40-
is FieldType.GroupFieldType -> FieldType.GroupFieldType(markerName.toNullable())
43+
is FieldType.FrameFieldType -> FieldType.FrameFieldType(markerName.toNullable(), nullable, renderAsList)
44+
is FieldType.GroupFieldType -> FieldType.GroupFieldType(markerName.toNullable(), renderAsObject)
4145
is FieldType.ValueFieldType -> FieldType.ValueFieldType(typeFqName.toNullable())
4246
}
4347
} else this
@@ -55,13 +59,15 @@ public fun FieldType.toNotNullable(): FieldType =
5559
else it.removeSuffix("?")
5660
},
5761
nullable = nullable,
62+
renderAsList
5863
)
5964

6065
is FieldType.GroupFieldType -> FieldType.GroupFieldType(
6166
markerName = markerName.let {
6267
if (it == "*") "Any"
6368
else it.removeSuffix("?")
6469
},
70+
renderAsObject
6571
)
6672

6773
is FieldType.ValueFieldType -> FieldType.ValueFieldType(
@@ -88,6 +94,10 @@ public class ValidFieldName private constructor(private val identifier: String,
8894
return ValidFieldName(identifier = identifier + other.identifier, needsQuote = needsQuote || other.needsQuote)
8995
}
9096

97+
override fun toString(): String {
98+
return identifier
99+
}
100+
91101
public companion object {
92102
public fun of(name: String): ValidFieldName {
93103
val needsQuote = name.needsQuoting()

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MarkersExtractor.kt

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,33 @@ import kotlin.reflect.full.withNullability
1717
import kotlin.reflect.jvm.jvmErasure
1818
import kotlin.reflect.typeOf
1919

20-
internal fun KType.shouldBeConvertedToFrameColumn(): Boolean = when (jvmErasure) {
21-
DataFrame::class -> true
22-
List::class -> arguments[0].type?.jvmErasure?.hasAnnotation<DataSchema>() == true
23-
else -> false
20+
internal fun KType.getFieldKind(): FieldKind = when {
21+
jvmErasure == DataFrame::class -> Frame
22+
jvmErasure == List::class && (arguments[0].type?.jvmErasure?.hasAnnotation<DataSchema>() == true) -> ListToFrame
23+
jvmErasure == DataRow::class -> Group
24+
jvmErasure.hasAnnotation<DataSchema>() -> ObjectToGroup
25+
else -> Default
2426
}
2527

26-
internal fun KType.shouldBeConvertedToColumnGroup(): Boolean = jvmErasure.let {
27-
it == DataRow::class || it.hasAnnotation<DataSchema>()
28+
internal sealed interface FieldKind {
29+
val shouldBeConvertedToColumnGroup: Boolean get() = false
30+
val shouldBeConvertedToFrameColumn: Boolean get() = false
31+
}
32+
internal data object Frame : FieldKind {
33+
override val shouldBeConvertedToFrameColumn: Boolean = true
34+
}
35+
internal data object ListToFrame : FieldKind {
36+
override val shouldBeConvertedToFrameColumn: Boolean = true
37+
}
38+
39+
internal data object Default : FieldKind
40+
41+
internal data object Group : FieldKind {
42+
override val shouldBeConvertedToColumnGroup: Boolean = true
43+
}
44+
45+
internal data object ObjectToGroup : FieldKind {
46+
override val shouldBeConvertedToColumnGroup: Boolean = true
2847
}
2948

3049
private fun String.toNullable(): String = if (endsWith("?")) this else "$this?"
@@ -62,18 +81,26 @@ internal object MarkersExtractor {
6281
val type = it.returnType
6382
val fieldType: FieldType
6483
val clazz = type.jvmErasure
84+
val fieldKind = type.getFieldKind()
6585
val columnSchema = when {
66-
type.shouldBeConvertedToColumnGroup() -> {
86+
fieldKind.shouldBeConvertedToColumnGroup -> {
6787
val nestedType = if (clazz == DataRow::class) type.arguments[0].type ?: typeOf<Any?>() else type
6888
val marker = get(nestedType.jvmErasure, nullableProperties || type.isMarkedNullable)
69-
fieldType = FieldType.GroupFieldType(marker.name)
89+
fieldType = FieldType.GroupFieldType(
90+
marker.name,
91+
renderAsObject = fieldKind is ObjectToGroup
92+
)
7093
ColumnSchema.Group(marker.schema, nestedType)
7194
}
7295

73-
type.shouldBeConvertedToFrameColumn() -> {
96+
fieldKind.shouldBeConvertedToFrameColumn -> {
7497
val frameType = type.arguments[0].type ?: typeOf<Any?>()
7598
val marker = get(frameType.jvmErasure, nullableProperties || type.isMarkedNullable)
76-
fieldType = FieldType.FrameFieldType(marker.name, type.isMarkedNullable || nullableProperties)
99+
fieldType = FieldType.FrameFieldType(
100+
marker.name,
101+
type.isMarkedNullable || nullableProperties,
102+
renderAsList = fieldKind is ListToFrame
103+
)
77104
ColumnSchema.Frame(marker.schema, type.isMarkedNullable, frameType)
78105
}
79106

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/generateCode.kt

Lines changed: 0 additions & 43 deletions
This file was deleted.

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ import org.jetbrains.kotlinx.dataframe.api.TraversePropertiesDsl
1111
import org.jetbrains.kotlinx.dataframe.api.concat
1212
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
1313
import org.jetbrains.kotlinx.dataframe.api.toDataFrameFromPairs
14-
import org.jetbrains.kotlinx.dataframe.codeGen.shouldBeConvertedToColumnGroup
15-
import org.jetbrains.kotlinx.dataframe.codeGen.shouldBeConvertedToFrameColumn
14+
import org.jetbrains.kotlinx.dataframe.codeGen.getFieldKind
1615
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
1716
import org.jetbrains.kotlinx.dataframe.impl.asList
1817
import org.jetbrains.kotlinx.dataframe.impl.columnName
@@ -248,11 +247,12 @@ internal fun convertToDataFrame(
248247
}
249248
}
250249
val kClass = returnType.classifier as KClass<*>
250+
val fieldKind = returnType.getFieldKind()
251251

252252
val shouldCreateValueCol = (
253253
maxDepth <= 0 &&
254-
!returnType.shouldBeConvertedToFrameColumn() &&
255-
!returnType.shouldBeConvertedToColumnGroup()
254+
!fieldKind.shouldBeConvertedToFrameColumn &&
255+
!fieldKind.shouldBeConvertedToColumnGroup
256256
) ||
257257
kClass == Any::class ||
258258
kClass in preserveClasses ||

0 commit comments

Comments
 (0)