Skip to content

Commit 1633e2c

Browse files
committed
added tests and some small fixes for dataFrameOf constructors
1 parent 88ee08d commit 1633e2c

File tree

2 files changed

+206
-18
lines changed

2 files changed

+206
-18
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,10 @@ public fun dataFrameOf(vararg columns: AnyBaseCol): DataFrame<*> = dataFrameOf(c
285285
@Interpretable("DataFrameOf0")
286286
public fun dataFrameOf(vararg header: String): DataFrameBuilder = dataFrameOf(header.toList())
287287

288-
public inline fun <reified C> dataFrameOf(vararg header: String, fill: (String) -> Iterable<C>): DataFrame<*> =
289-
dataFrameOf(header.asIterable(), fill)
288+
public inline fun <reified C> dataFrameOf(
289+
vararg header: String,
290+
crossinline fill: (String) -> Iterable<C>,
291+
): DataFrame<*> = dataFrameOf(header.asIterable()).invoke(fill)
290292

291293
public fun dataFrameOf(header: Iterable<String>): DataFrameBuilder = DataFrameBuilder(header.asList())
292294

@@ -300,9 +302,12 @@ public fun dataFrameOf(header: Iterable<String>, values: Iterable<Any?>): DataFr
300302

301303
public inline fun <T, reified C> dataFrameOf(header: Iterable<T>, fill: (T) -> Iterable<C>): DataFrame<*> =
302304
header.map { value ->
303-
fill(value).asList().let {
304-
DataColumn.createUnsafe(value.toString(), it)
305-
}
305+
createColumnGuessingType(
306+
name = value.toString(),
307+
values = fill(value).asList(),
308+
suggestedType = typeOf<C>(),
309+
guessTypeWithSuggestedAsUpperbound = true,
310+
)
306311
}.toDataFrame()
307312

308313
public fun dataFrameOf(header: CharProgression): DataFrameBuilder = dataFrameOf(header.map { it.toString() })
@@ -331,16 +336,19 @@ public class DataFrameBuilder(private val header: List<String>) {
331336

332337
public operator fun invoke(args: Sequence<Any?>): DataFrame<*> = invoke(*args.toList().toTypedArray())
333338

334-
public fun withColumns(columnBuilder: (String) -> AnyCol): DataFrame<*> = header.map(columnBuilder).toDataFrame()
339+
public fun withColumns(columnBuilder: (String) -> AnyCol): DataFrame<*> =
340+
header
341+
.map { columnBuilder(it) named it } // create a columns and make sure to rename them to the given header
342+
.toDataFrame()
335343

336344
public inline operator fun <reified T> invoke(crossinline valuesBuilder: (String) -> Iterable<T>): DataFrame<*> =
337345
withColumns { name ->
338-
valuesBuilder(name).let {
339-
DataColumn.createUnsafe(
340-
name = name,
341-
values = it.asList(),
342-
)
343-
}
346+
createColumnGuessingType(
347+
name = name,
348+
values = valuesBuilder(name).asList(),
349+
suggestedType = typeOf<T>(),
350+
guessTypeWithSuggestedAsUpperbound = true,
351+
)
344352
}
345353

346354
public inline fun <reified C> fill(nrow: Int, value: C): DataFrame<*> =
@@ -352,30 +360,39 @@ public class DataFrameBuilder(private val header: List<String>) {
352360
)
353361
}
354362

363+
public fun fill(nrow: Int, dataFrame: AnyFrame): DataFrame<*> =
364+
withColumns { name ->
365+
DataColumn.createFrameColumn(
366+
name = name,
367+
groups = List(nrow) { dataFrame },
368+
schema = lazy { dataFrame.schema() },
369+
)
370+
}
371+
355372
public inline fun <reified C> nulls(nrow: Int): DataFrame<*> = fill<C?>(nrow, null)
356373

357374
public inline fun <reified C> fillIndexed(nrow: Int, crossinline init: (Int, String) -> C): DataFrame<*> =
358375
withColumns { name ->
359-
DataColumn.createUnsafe(
360-
name,
361-
List(nrow) { init(it, name) },
376+
DataColumn.createWithTypeInference(
377+
name = name,
378+
values = List(nrow) { init(it, name) },
362379
)
363380
}
364381

365382
public inline fun <reified C> fill(nrow: Int, crossinline init: (Int) -> C): DataFrame<*> =
366383
withColumns { name ->
367-
DataColumn.createUnsafe(
384+
DataColumn.createWithTypeInference(
368385
name = name,
369386
values = List(nrow, init),
370387
)
371388
}
372389

373-
private inline fun <reified C> fillNotNull(nrow: Int, crossinline init: (Int) -> C) =
390+
private inline fun <reified C> fillNotNull(nrow: Int, crossinline init: (Int) -> C & Any) =
374391
withColumns { name ->
375392
DataColumn.createValueColumn(
376393
name = name,
377394
values = List(nrow, init),
378-
type = typeOf<C>(),
395+
type = typeOf<C>().withNullability(false),
379396
)
380397
}
381398

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,4 +149,175 @@ class ConstructorsTests {
149149
}
150150

151151
// endregion
152+
153+
// region dataFrameOf
154+
@Test
155+
fun `dataFrameOf withColumns`() {
156+
val df = dataFrameOf("value", "value2", "frameCol").withColumns {
157+
when (it) {
158+
"value" -> columnOf(1, 2, 3, null)
159+
160+
"value2" -> columnOf(
161+
columnOf(1, 2),
162+
columnOf(3, 4),
163+
columnOf(5, null),
164+
null,
165+
)
166+
167+
"frameCol" -> columnOf(
168+
dataFrameOf("a", "b")(1, 2),
169+
dataFrameOf("a", "b")(3, 4),
170+
dataFrameOf("a", "b")(5, null),
171+
null,
172+
)
173+
174+
else -> error("Unexpected column name: $it")
175+
}
176+
}
177+
178+
df["value"].type shouldBe typeOf<Int?>()
179+
df["value"].kind() shouldBe ColumnKind.Value
180+
181+
df["value2"].type shouldBe typeOf<DataColumn<Int?>?>()
182+
df["value2"].kind() shouldBe ColumnKind.Value
183+
184+
df["frameCol"].type shouldBe typeOf<DataFrame<*>>()
185+
df["frameCol"].kind() shouldBe ColumnKind.Frame
186+
df["frameCol"].last() shouldBe DataFrame.empty()
187+
}
188+
189+
@Test
190+
fun `dataFrameOf invoke`() {
191+
val df1 = dataFrameOf("value", "value2", "frameCol") {
192+
when (it) {
193+
"value" -> listOf(1, 2, 3, null)
194+
195+
"value2" -> listOf(
196+
columnOf(1, 2),
197+
columnOf(3, 4),
198+
columnOf(5, null),
199+
null,
200+
)
201+
202+
"frameCol" -> listOf(
203+
dataFrameOf("a", "b")(1, 2),
204+
dataFrameOf("a", "b")(3, 4),
205+
dataFrameOf("a", "b")(5, null),
206+
null,
207+
)
208+
209+
else -> error("Unexpected column name: $it")
210+
}
211+
}
212+
213+
val df2 = dataFrameOf("value", "value2", "frameCol").invoke {
214+
when (it) {
215+
"value" -> listOf(1, 2, 3, null)
216+
217+
"value2" -> listOf(columnOf(1, 2), columnOf(3, 4), columnOf(5, null), null)
218+
219+
"frameCol" -> listOf(
220+
dataFrameOf("a", "b")(1, 2),
221+
dataFrameOf("a", "b")(3, 4),
222+
dataFrameOf("a", "b")(5, null),
223+
null,
224+
)
225+
226+
else -> error("Unexpected column name: $it")
227+
}
228+
}
229+
230+
val names = listOf("value", "value2", "frameCol")
231+
val df3 = dataFrameOf(listOf(1, 2, 3)) {
232+
when (it) {
233+
1 -> listOf(1, 2, 3, null)
234+
235+
2 -> listOf(columnOf(1, 2), columnOf(3, 4), columnOf(5, null), null)
236+
237+
3 -> listOf(
238+
dataFrameOf("a", "b")(1, 2),
239+
dataFrameOf("a", "b")(3, 4),
240+
dataFrameOf("a", "b")(5, null),
241+
null,
242+
)
243+
244+
else -> error("Unexpected column name: $it")
245+
}
246+
}.rename { all() }.into { names[it.name.toInt() - 1] }
247+
248+
val df4 = dataFrameOf(names).invoke {
249+
when (it) {
250+
"value" -> listOf(1, 2, 3, null)
251+
252+
"value2" -> listOf(columnOf(1, 2), columnOf(3, 4), columnOf(5, null), null)
253+
254+
"frameCol" -> listOf(
255+
dataFrameOf("a", "b")(1, 2),
256+
dataFrameOf("a", "b")(3, 4),
257+
dataFrameOf("a", "b")(5, null),
258+
null,
259+
)
260+
261+
else -> error("Unexpected column name: $it")
262+
}
263+
}
264+
265+
df1 shouldBe df2
266+
df2 shouldBe df3
267+
df3 shouldBe df4
268+
269+
df1["value"].type shouldBe typeOf<Int?>()
270+
df1["value"].kind() shouldBe ColumnKind.Value
271+
272+
df1["value2"].type shouldBe typeOf<DataColumn<*>?>()
273+
df1["value2"].kind() shouldBe ColumnKind.Value
274+
275+
df1["frameCol"].type shouldBe typeOf<DataFrame<*>>()
276+
df1["frameCol"].kind() shouldBe ColumnKind.Frame
277+
df1["frameCol"].last() shouldBe DataFrame.empty()
278+
}
279+
280+
@Test
281+
fun `dataFrameOf fill`() {
282+
val df1 = dataFrameOf("a", "b").fill(2, "lol")
283+
284+
df1["a"].values shouldBe listOf("lol", "lol")
285+
df1["a"].kind() shouldBe ColumnKind.Value
286+
df1["b"].values shouldBe listOf("lol", "lol")
287+
df1["b"].kind() shouldBe ColumnKind.Value
288+
289+
val df2 = dataFrameOf("a", "b").fill(2, dataFrameOf("a", "b")(1, 2))
290+
df2["a"].type() shouldBe typeOf<DataFrame<*>>()
291+
df2["a"].kind() shouldBe ColumnKind.Frame
292+
df2["b"].type() shouldBe typeOf<DataFrame<*>>()
293+
df2["b"].kind() shouldBe ColumnKind.Frame
294+
295+
val df3 = dataFrameOf("a", "b").fill(2) { it }
296+
df3["a"].values shouldBe listOf(0, 1)
297+
df3["a"].kind() shouldBe ColumnKind.Value
298+
df3["b"].values shouldBe listOf(0, 1)
299+
df3["b"].kind() shouldBe ColumnKind.Value
300+
301+
val df4 = dataFrameOf("a", "b").fill(2) { dataFrameOf("a", "b")(1, 2) }
302+
df4["a"].type() shouldBe typeOf<DataFrame<*>>()
303+
df4["a"].kind() shouldBe ColumnKind.Frame
304+
df4["b"].type() shouldBe typeOf<DataFrame<*>>()
305+
df4["b"].kind() shouldBe ColumnKind.Frame
306+
307+
val a = listOf(1, 2)
308+
val b = listOf(dataFrameOf("a", "b")(1, 2), null)
309+
val df5 = dataFrameOf("a", "b").fillIndexed(2) { it, colName ->
310+
when (colName) {
311+
"a" -> a[it]
312+
"b" -> b[it]
313+
else -> error("Unexpected column name: $colName")
314+
}
315+
}
316+
df5["a"].values shouldBe a
317+
df5["a"].kind() shouldBe ColumnKind.Value
318+
df5["b"].values shouldBe listOf(b[0], DataFrame.empty())
319+
df5["b"].kind() shouldBe ColumnKind.Frame
320+
}
321+
322+
// endregion
152323
}

0 commit comments

Comments
 (0)