Skip to content

Commit baaa014

Browse files
committed
clarifying some docs based on feedback
1 parent 222086a commit baaa014

File tree

3 files changed

+32
-20
lines changed

3 files changed

+32
-20
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,18 @@ public interface DataColumn<out T> : BaseColumn<T> {
132132
/**
133133
* Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in
134134
* [values].
135+
*
135136
* This is safer but less efficient than the other functions.
136137
*
137-
* Some conversions are done automatically to attempt to unify the values, like:
138-
* - `null` -> [DataFrame.empty][DataFrame.empty]`()` and [DataRow] -> single-row [DataFrame] when there are other
139-
* [DataFrames][DataFrame] present in [values]
140-
* - [List][List]`<`[DataRow][DataRow]`<*>>` -> [DataFrame]
141-
* etc.
138+
* Some conversions are done automatically to attempt to unify the values.
139+
*
140+
* For instance, when there are other [DataFrames][DataFrame] present in [values], we'll convert:
141+
* - `null` -> [DataFrame.empty]`()`
142+
* - [DataRow] -> single-row [DataFrame]
143+
* - [List][List]`<`[DataRow][DataRow]`<*>>` -> multi-row [DataFrame]
144+
*
145+
* to be able to create a [FrameColumn].
146+
* There are more conversions for other types as well.
142147
*
143148
* @param name name of the column
144149
* @param values the values to represent each row in the column
@@ -163,12 +168,12 @@ public interface DataColumn<out T> : BaseColumn<T> {
163168
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
164169
* [type].
165170
*
171+
* This may be unsafe but is more efficient than [createWithTypeInference].
172+
*
166173
* Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
167-
* do we check whether there are nulls among the values when the given type is [DataFrame]
168-
* (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
169-
* When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
174+
* do we check whether there are unexpected nulls among the values.
170175
*
171-
* This may be unsafe but is more efficient than [createWithTypeInference].
176+
* It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead.
172177
*
173178
* @param name the name of the column
174179
* @param values the values to represent each row in the column
@@ -181,22 +186,24 @@ public interface DataColumn<out T> : BaseColumn<T> {
181186
type: KType,
182187
infer: Infer = Infer.None,
183188
): DataColumn<T> =
184-
when (type.toColumnKind()) {
189+
when (type.toColumnKind()) { // AnyFrame -> Frame, AnyRow? -> Group, else -> Value
185190
ColumnKind.Value -> createValueColumn(name, values, type, infer)
191+
186192
ColumnKind.Group -> createColumnGroup(name, (values as List<AnyRow?>).concat()).asDataColumn().cast()
193+
187194
ColumnKind.Frame -> createFrameColumn(name, values as List<AnyFrame>).asDataColumn().cast()
188195
}
189196

190197
/**
191198
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
192199
* type [T].
193200
*
194-
* Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
195-
* do we check whether there are nulls among the values when the given type is [DataFrame]
196-
* (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
197-
* When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
201+
* This is generally safe, as [T] can be inferred, and more efficient than [createWithTypeInference].
198202
*
199-
* This may be unsafe but is more efficient than [createWithTypeInference].
203+
* Be careful when casting occurs; Values in [values] are NOT checked to adhere to the given/inferred type [T],
204+
* nor do we check whether there are unexpected nulls among the values.
205+
*
206+
* It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead.
200207
*
201208
* @param T the (unchecked) common type of [values]
202209
* @param name the name of the column

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
99
import org.jetbrains.kotlinx.dataframe.DataFrame
1010
import org.jetbrains.kotlinx.dataframe.DataRow
1111
import org.jetbrains.kotlinx.dataframe.api.Infer
12+
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
1213
import kotlin.reflect.KClass
1314
import kotlin.reflect.KType
1415
import kotlin.reflect.KTypeParameter
@@ -385,7 +386,12 @@ internal fun <T> getValuesType(values: List<T>, type: KType, infer: Infer): KTyp
385386
}
386387

387388
/**
388-
* Returns the value type of the given [values] sequence.
389+
* Returns the guessed value type of the given [values] sequence.
390+
*
391+
* This function analyzes all [values] once and returns the expected column type.
392+
*
393+
* The resulting column type may need [values] to be converted to the expected type.
394+
* See [createColumnGuessingType] for how to create a column with the guessed type.
389395
*
390396
* @param values the values to guess the type from
391397
* @param upperBound the upper bound of the type to guess

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,8 @@ internal fun <T> createColumnGuessingType(
239239

240240
return when (type.classifier!! as KClass<*>) {
241241
// guessValueType can only return DataRow if all values are `AnyRow?`
242-
// or allColsMakesColGroup == true, all values are `AnyCol`
243-
DataRow::class -> {
242+
// or allColsMakesColGroup == true, and all values are `AnyCol`
243+
DataRow::class ->
244244
if (allColsMakesColGroup && values.firstOrNull() is AnyCol) {
245245
val df = dataFrameOf(values as Iterable<AnyCol>)
246246
DataColumn.createColumnGroup(name, df)
@@ -250,7 +250,6 @@ internal fun <T> createColumnGuessingType(
250250
}.concat()
251251
DataColumn.createColumnGroup(name, df)
252252
}.asDataColumn().cast()
253-
}
254253

255254
DataFrame::class -> {
256255
val frames = values.map {
@@ -312,7 +311,7 @@ internal fun <T> createColumnGuessingType(
312311
// nullable is not given, so we still infer nullability
313312
nullable == null && suggestedType is TypeSuggestion.Use -> Infer.Nulls
314313

315-
// nullability already inferred by guessValueType
314+
// nullability already handled; inferred by guessValueType or explicitly given
316315
else -> Infer.None
317316
},
318317
defaultValue = defaultValue,

0 commit comments

Comments
 (0)