Skip to content

Commit 798edb0

Browse files
committed
renamed DataColumn.create to DataColumn.crateUnsafe and added clarifying KDocs for that suite of functions to explain what they're for
1 parent 6f73354 commit 798edb0

File tree

12 files changed

+126
-36
lines changed

12 files changed

+126
-36
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

Lines changed: 86 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
2525
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
2626
import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
2727
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
28+
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN
29+
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_IMPORT
30+
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_REPLACE
2831
import kotlin.reflect.KClass
2932
import kotlin.reflect.KProperty
3033
import kotlin.reflect.KType
@@ -45,6 +48,9 @@ public interface DataColumn<out T> : BaseColumn<T> {
4548
/**
4649
* Creates [ValueColumn] using given [name], [values] and [type].
4750
*
51+
* Be careful; values are NOT checked to adhere to [type] for efficiency,
52+
* unless you specify [infer].
53+
*
4854
* @param name name of the column
4955
* @param values list of column values
5056
* @param type type of the column
@@ -56,11 +62,20 @@ public interface DataColumn<out T> : BaseColumn<T> {
5662
type: KType,
5763
infer: Infer = Infer.None,
5864
defaultValue: T? = null,
59-
): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue)
65+
): ValueColumn<T> =
66+
ValueColumnImpl(
67+
values = values,
68+
name = name,
69+
type = getValuesType(values, type, infer),
70+
defaultValue = defaultValue,
71+
)
6072

6173
/**
6274
* Creates [ValueColumn] using given [name], [values] and reified column [type].
6375
*
76+
* Be careful; values are NOT checked to adhere to [type] for efficiency,
77+
* unless you specify [infer].
78+
*
6479
* Note, that column [type] will be defined at compile-time using [T] argument
6580
*
6681
* @param T type of the column
@@ -74,26 +89,56 @@ public interface DataColumn<out T> : BaseColumn<T> {
7489
infer: Infer = Infer.None,
7590
): ValueColumn<T> =
7691
createValueColumn(
77-
name,
78-
values,
79-
getValuesType(
80-
values,
81-
typeOf<T>(),
82-
infer,
83-
),
92+
name = name,
93+
values = values,
94+
type = typeOf<T>(),
95+
infer = infer,
8496
)
8597

98+
/**
99+
* Creates [ColumnGroup] using the given [name] and [df] representing the group of columns.
100+
*
101+
* @param name name of the column group
102+
* @param df the collection of columns representing the column group
103+
*/
86104
public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df)
87105

106+
// TODO this shouldn't be here
88107
public fun <T> createFrameColumn(name: String, df: DataFrame<T>, startIndices: Iterable<Int>): FrameColumn<T> =
89108
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })
90109

110+
/**
111+
* Creates [FrameColumn] using the given [name] and list of dataframes [groups].
112+
*
113+
* Be careful; [groups] must be a non-null list of [DataFrames][DataFrame].
114+
* This is NOT checked at runtime for efficiency, nor is the validity of given [schema].
115+
*
116+
* @param name name of the frame column
117+
* @param groups the dataframes to be put in the column
118+
* @param schema an optional (lazily calculated) [DataFrameSchema] representing
119+
* the intersecting schema of [groups]
120+
*/
91121
public fun <T> createFrameColumn(
92122
name: String,
93123
groups: List<DataFrame<T>>,
94124
schema: Lazy<DataFrameSchema>? = null,
95125
): FrameColumn<T> = FrameColumnImpl(name, groups, schema)
96126

127+
/**
128+
* Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in
129+
* [values].
130+
* This is safer but less efficient than the other functions.
131+
*
132+
* Some conversions are done automatically to attempt to unify the values, like:
133+
* - `null` -> [DataFrame.empty][DataFrame.empty]`()` and [DataRow] -> single-row [DataFrame] when there are other
134+
* [DataFrames][DataFrame] present in [values]
135+
* - [List][List]`<`[DataRow][DataRow]`<*>>` -> [DataFrame]
136+
* etc.
137+
*
138+
* @param name name of the column
139+
* @param values the values to represent each row in the column
140+
* @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred.
141+
*/
97142
public fun <T> createWithTypeInference(
98143
name: String,
99144
values: List<T>,
@@ -102,9 +147,21 @@ public interface DataColumn<out T> : BaseColumn<T> {
102147

103148
/**
104149
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
105-
* [type] without checking the actual values in [values].
150+
* [type].
151+
*
152+
* Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
153+
* do we check whether there are nulls among the values when the given type is [DataFrame]
154+
* (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
155+
* When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
156+
*
157+
* This may be unsafe but is more efficient than [createWithTypeInference].
158+
*
159+
* @param name the name of the column
160+
* @param values the values to represent each row in the column
161+
* @param type the (unchecked) common type of [values]
162+
* @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
106163
*/
107-
public fun <T> create(
164+
public fun <T> createUnsafe(
108165
name: String,
109166
values: List<T>,
110167
type: KType,
@@ -118,11 +175,27 @@ public interface DataColumn<out T> : BaseColumn<T> {
118175

119176
/**
120177
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
121-
* type [T] without checking the actual values in [values].
178+
* type [T].
179+
*
180+
* Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
181+
* do we check whether there are nulls among the values when the given type is [DataFrame]
182+
* (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
183+
* When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
184+
*
185+
* This may be unsafe but is more efficient than [createWithTypeInference].
186+
*
187+
* @param T the (unchecked) common type of [values]
188+
* @param name the name of the column
189+
* @param values the values to represent each row in the column
190+
* @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
122191
*/
123-
public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> =
124-
create(name, values, typeOf<T>(), infer)
192+
public inline fun <reified T> createUnsafe(
193+
name: String,
194+
values: List<T>,
195+
infer: Infer = Infer.None,
196+
): DataColumn<T> = createUnsafe(name, values, typeOf<T>(), infer)
125197

198+
/** Creates an empty [DataColumn] with given [name]. */
126199
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>())
127200
}
128201

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -234,16 +234,22 @@ public enum class Infer {
234234

235235
/**
236236
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type].
237+
*
238+
* This is the most efficient but least safe option.
237239
*/
238240
None,
239241

240242
/**
241-
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type], but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of *null* values.
243+
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type],
244+
* but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of `null` values.
242245
*/
243246
Nulls,
244247

245248
/**
246-
* Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using optionally provided base type as an upper bound.
249+
* Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using an optionally provided
250+
* base type as an upper bound.
251+
*
252+
* This is the least efficient but safest option.
247253
*/
248254
Type,
249255

@@ -306,17 +312,17 @@ public inline fun <reified T> Iterable<T>.toColumn(name: String = "", infer: Inf
306312
if (infer == Infer.Type) {
307313
DataColumn.createWithTypeInference(name, asList())
308314
} else {
309-
DataColumn.create(name, asList(), typeOf<T>(), infer)
315+
DataColumn.createUnsafe(name, asList(), typeOf<T>(), infer)
310316
}.forceResolve()
311317

312318
public inline fun <reified T> Iterable<*>.toColumnOf(name: String = ""): DataColumn<T> =
313-
DataColumn.create(name, asList() as List<T>, typeOf<T>()).forceResolve()
319+
DataColumn.createUnsafe(name, asList() as List<T>, typeOf<T>()).forceResolve()
314320

315321
public inline fun <reified T> Iterable<T>.toColumn(ref: ColumnReference<T>): DataColumn<T> =
316-
DataColumn.create(ref.name(), asList()).forceResolve()
322+
DataColumn.createUnsafe(ref.name(), asList()).forceResolve()
317323

318324
public inline fun <reified T> Iterable<T>.toColumn(property: KProperty<T>): DataColumn<T> =
319-
DataColumn.create(property.columnName, asList()).forceResolve()
325+
DataColumn.createUnsafe(property.columnName, asList()).forceResolve()
320326

321327
public fun Iterable<String>.toPath(): ColumnPath = ColumnPath(asList())
322328

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ public fun dataFrameOf(header: Iterable<String>, values: Iterable<Any?>): DataFr
290290
public inline fun <T, reified C> dataFrameOf(header: Iterable<T>, fill: (T) -> Iterable<C>): DataFrame<*> =
291291
header.map { value ->
292292
fill(value).asList().let {
293-
DataColumn.create(value.toString(), it)
293+
DataColumn.createUnsafe(value.toString(), it)
294294
}
295295
}.toDataFrame()
296296

@@ -325,7 +325,7 @@ public class DataFrameBuilder(private val header: List<String>) {
325325
public inline operator fun <reified T> invoke(crossinline valuesBuilder: (String) -> Iterable<T>): DataFrame<*> =
326326
withColumns { name ->
327327
valuesBuilder(name).let {
328-
DataColumn.create(
328+
DataColumn.createUnsafe(
329329
name = name,
330330
values = it.asList(),
331331
)
@@ -345,15 +345,15 @@ public class DataFrameBuilder(private val header: List<String>) {
345345

346346
public inline fun <reified C> fillIndexed(nrow: Int, crossinline init: (Int, String) -> C): DataFrame<*> =
347347
withColumns { name ->
348-
DataColumn.create(
348+
DataColumn.createUnsafe(
349349
name,
350350
List(nrow) { init(it, name) },
351351
)
352352
}
353353

354354
public inline fun <reified C> fill(nrow: Int, crossinline init: (Int) -> C): DataFrame<*> =
355355
withColumns { name ->
356-
DataColumn.create(
356+
DataColumn.createUnsafe(
357357
name = name,
358358
values = List(nrow, init),
359359
)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,20 @@ public inline fun <T, reified R> DataColumn<T>.map(
3434
crossinline transform: (T) -> R,
3535
): DataColumn<R> {
3636
val newValues = Array(size()) { transform(get(it)) }.asList()
37-
return DataColumn.create(name(), newValues, typeOf<R>(), infer)
37+
return DataColumn.createUnsafe(name(), newValues, typeOf<R>(), infer)
3838
}
3939

4040
public fun <T, R> DataColumn<T>.map(type: KType, infer: Infer = Infer.Nulls, transform: (T) -> R): DataColumn<R> {
4141
val values = Array<Any?>(size()) { transform(get(it)) }.asList()
42-
return DataColumn.create(name(), values, type, infer).cast()
42+
return DataColumn.createUnsafe(name(), values, type, infer).cast()
4343
}
4444

4545
public inline fun <T, reified R> DataColumn<T>.mapIndexed(
4646
infer: Infer = Infer.Nulls,
4747
crossinline transform: (Int, T) -> R,
4848
): DataColumn<R> {
4949
val newValues = Array(size()) { transform(it, get(it)) }.asList()
50-
return DataColumn.create(name(), newValues, typeOf<R>(), infer)
50+
return DataColumn.createUnsafe(name(), newValues, typeOf<R>(), infer)
5151
}
5252

5353
public fun <T, R> DataColumn<T>.mapIndexed(
@@ -56,7 +56,7 @@ public fun <T, R> DataColumn<T>.mapIndexed(
5656
transform: (Int, T) -> R,
5757
): DataColumn<R> {
5858
val values = Array<Any?>(size()) { transform(it, get(it)) }.asList()
59-
return DataColumn.create(name(), values, type, infer).cast()
59+
return DataColumn.createUnsafe(name(), values, type, infer).cast()
6060
}
6161

6262
// endregion

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ private interface CommonDataColumnSortWithDocs
9494

9595
/** @include [CommonDataColumnSortWithDocs] */
9696
public fun <T, C : DataColumn<T>> C.sortWith(comparator: Comparator<T>): C =
97-
DataColumn.create(name, values().sortedWith(comparator), type) as C
97+
DataColumn.createUnsafe(name, values().sortedWith(comparator), type) as C
9898

9999
/** @include [CommonDataColumnSortWithDocs] */
100100
public fun <T, C : DataColumn<T>> C.sortWith(comparator: (T, T) -> Int): C = sortWith(Comparator(comparator))

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ public fun <T> DataColumn<T>.valueCounts(
4040
}
4141
if (dropNA) grouped = grouped.filter { !it.first.isNA }
4242
val nulls = if (dropNA) false else hasNulls()
43-
val values = DataColumn.create(name(), grouped.map { it.first }, type().withNullability(nulls))
43+
val values = DataColumn.createUnsafe(name(), grouped.map { it.first }, type().withNullability(nulls))
4444
val countName = if (resultColumn == name()) resultColumn + "1" else resultColumn
45-
val counts = DataColumn.create(countName, grouped.map { it.second }, typeOf<Int>())
45+
val counts = DataColumn.createUnsafe(countName, grouped.map { it.second }, typeOf<Int>())
4646
return dataFrameOf(values, counts).cast()
4747
}
4848

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ internal fun DataColumn<String?>.tryParseImpl(options: ParserOptions?): DataColu
525525
if (type.jvmErasure == String::class && !nullStringParsed) {
526526
return this // nothing parsed
527527
}
528-
return DataColumn.create(name(), parsedValues, type)
528+
return DataColumn.createUnsafe(name(), parsedValues, type)
529529
}
530530

531531
internal fun <T> DataColumn<String?>.parse(parser: StringParser<T>, options: ParserOptions?): DataColumn<T?> {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,17 @@ internal open class FrameColumnImpl<T> constructor(
3333
// This only runs with `kotlin.dataframe.debug=true` in gradle.properties.
3434
if (BuildConfig.DEBUG) {
3535
require(!values.anyNull()) { "FrameColumn cannot null values." }
36+
37+
// val schema = columnSchema?.value
38+
// ?: values.mapNotNull { it.takeIf { it.nrow > 0 }?.schema() }.intersectSchemas()
39+
//
40+
// for (df in values) {
41+
// val dfSchema = df.schema()
42+
// if (dfSchema.columns.isEmpty()) continue
43+
// require(dfSchema.compare(schema).isDerivedOrEqual()) {
44+
// "DataFrames in FrameColumn don't adhere to the given schema:\nGiven:\n$schema\n\nActual:\n$dfSchema"
45+
// }
46+
// }
3647
}
3748
}
3849

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ internal fun <T, R> ColumnsContainer<T>.newColumn(
5959
val df = this as? DataFrame<T> ?: dataFrameOf(columns()).cast()
6060
val (nullable, values) = computeValues(df, expression)
6161
return when (infer) {
62-
Infer.Nulls -> DataColumn.create(
62+
Infer.Nulls -> DataColumn.createUnsafe(
6363
name = name,
6464
values = values,
6565
type = type.withNullability(nullable).replaceGenericTypeParametersWithUpperbound(),
@@ -72,7 +72,7 @@ internal fun <T, R> ColumnsContainer<T>.newColumn(
7272
nullable = nullable,
7373
)
7474

75-
Infer.None -> DataColumn.create(
75+
Infer.None -> DataColumn.createUnsafe(
7676
name = name,
7777
values = values,
7878
type = type.replaceGenericTypeParametersWithUpperbound(),

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class DataColumns {
4444
}
4545

4646
shouldThrow<IllegalArgumentException> {
47-
DataColumn.create(
47+
DataColumn.createUnsafe(
4848
"",
4949
listOf(dataFrameOf("a")(1), null),
5050
)

0 commit comments

Comments
 (0)