Skip to content

Commit 9ce56e4

Browse files
committed
cleaned colsOf overloads, fixed first {} and last {} in column selection dsl with docs (still need to add tests)
1 parent aaa9dde commit 9ce56e4

File tree

10 files changed

+363
-253
lines changed

10 files changed

+363
-253
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 174 additions & 168 deletions
Large diffs are not rendered by default.

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,13 @@ public enum class Infer {
187187
/**
188188
* Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using optionally provided base type as an upper bound.
189189
*/
190-
Type
190+
Type;
191+
192+
/**
193+
* @param [infer\] [An enum][Infer] that indicates how [DataColumn.type] should be calculated.
194+
* Either [None], [Nulls], or [Type].
195+
*/
196+
internal interface Param
191197
}
192198

193199
/**

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ public fun <T> DataFrame<T>.addAll(dataFrames: Iterable<AnyFrame>): DataFrame<T>
8484
// region Create and add a single column
8585

8686
/**
87-
* Receiver that is used in [add] and [update] operations to access new (added or updated) column value in preceding row.
87+
* Receiver that is used by the [AddExpression] (for instance in the [add] and [update] operations)
88+
* to access new (added or updated) column value in preceding row.
8889
*/
8990
public interface AddDataRow<out T> : DataRow<T> {
9091

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
import org.jetbrains.kotlinx.dataframe.*
4+
import org.jetbrains.kotlinx.dataframe.ColumnExpression
45
import org.jetbrains.kotlinx.dataframe.api.Update.Usage
56
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
67
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
@@ -331,14 +332,14 @@ public fun <T, C> Update<T, C>.at(rowRange: IntRange): Update<T, C> = where { in
331332
*
332333
* For example:
333334
*
334-
* `df.`[update][update]` { age ` { row, col ->`
335+
* `df.`[update][update]` { age }.`[perRowCol][perRowCol]` { row, col ->`
335336
*
336337
* `row.age / col.`[mean][org.jetbrains.kotlinx.dataframe.DataColumn.mean]`(skipNA = true)`
337338
*
338339
* `}`
339340
*
340341
*
341-
* .`[perRowCol][perRowCol]}
342+
*
342343
*
343344
* ## See Also
344345
* - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package org.jetbrains.kotlinx.dataframe.documentation
2+
3+
import org.jetbrains.kotlinx.dataframe.api.*
4+
5+
/**
6+
* ## Column Expression
7+
* In many DSLs, the lambda `expr {}` can be used to
8+
* create a temporary new column by defining an expression to fill up each row.
9+
*
10+
* These DSLs include (but are not limited to):
11+
* [The Add DSL][AddDsl.expr], [The Columns Selection DSL][ColumnsSelectionDsl.expr], and
12+
* [The Create DataFrame DSL][CreateDataFrameDsl.expr].
13+
*
14+
* The `expr {}` call functions like a mapping statement iterating over the object it's called on.
15+
*/
16+
internal interface ColumnExpression {
17+
18+
/**
19+
* ## Column Expression
20+
* Create a temporary new column by defining an expression to fill up each row.
21+
*
22+
* See [Column Expression][org.jetbrains.kotlinx.dataframe.documentation.ColumnExpression] for more information.
23+
*/
24+
interface CommonDocs
25+
}
26+
27+
/** [Column Expression][ColumnExpression] */
28+
internal interface ColumnExpressionLink

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 125 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
1111
import org.jetbrains.kotlinx.dataframe.DataFrame
1212
import org.jetbrains.kotlinx.dataframe.DataRow
1313
import org.jetbrains.kotlinx.dataframe.Predicate
14+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
1415
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
1516
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
1617
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
@@ -23,6 +24,7 @@ import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
2324
import org.jetbrains.kotlinx.dataframe.columns.renamedReference
2425
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
2526
import org.jetbrains.kotlinx.dataframe.documentation.AccessApi
27+
import org.jetbrains.kotlinx.dataframe.documentation.ColumnExpression
2628
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
2729
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
2830
import org.jetbrains.kotlinx.dataframe.hasNulls
@@ -41,7 +43,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.top
4143
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
4244
import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle
4345
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs
44-
import org.jetbrains.kotlinx.dataframe.documentation.*
46+
import org.jetbrains.kotlinx.dataframe.io.read
4547
import kotlin.reflect.KProperty
4648
import kotlin.reflect.KType
4749
import kotlin.reflect.typeOf
@@ -196,8 +198,97 @@ internal interface ColumnsSelectionDslLink
196198
/** @include [CommonColumnSelectionDocs] */
197199
public interface ColumnsSelectionDsl<out T> : ColumnSelectionDsl<T>, SingleColumn<DataRow<T>> {
198200

199-
public fun <C> ColumnSet<C>.first(condition: ColumnFilter<C>): SingleColumn<C> =
200-
transform { listOf(it.first(condition)) }.single()
201+
/**
202+
* ## First
203+
* Returns the first column in this [ColumnSet] that adheres to the given [condition\].
204+
*
205+
* For example:
206+
*
207+
* {@includeArg [Examples]}
208+
*
209+
* @param [condition\] The [ColumnFilter] condition that the column must adhere to.
210+
* @return A [SingleColumn] containing the first column that adheres to the given [condition\].
211+
* @see [last\]
212+
*/
213+
private interface CommonFirstDocs {
214+
215+
/** Examples key */
216+
interface Examples
217+
}
218+
219+
/**
220+
* @include [CommonFirstDocs]
221+
* @arg [CommonFirstDocs.Examples]
222+
* `df.`[select][select]` { `[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }`
223+
*
224+
* `df.`[select][select]` { myColumnGroup.`[first][first]`() }`
225+
*/
226+
public fun <C> ColumnSet<C>.first(condition: ColumnFilter<C> = { true }): SingleColumn<C> =
227+
children { condition(it.cast()) }[0].cast()
228+
229+
/**
230+
* @include [CommonFirstDocs]
231+
* @arg [CommonFirstDocs.Examples]
232+
* `df.`[select][select]` { "myColumnGroup".`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }`
233+
*/
234+
public fun String.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> =
235+
toColumnAccessor().first(condition)
236+
237+
/**
238+
* @include [CommonFirstDocs]
239+
* @arg [CommonFirstDocs.Examples]
240+
* `df.`[select][select]` { Type::myColumnGroup.`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }`
241+
*/
242+
public fun <C> KProperty<C>.first(condition: ColumnFilter<C>): SingleColumn<*> =
243+
toColumnAccessor().first(condition)
244+
245+
246+
/**
247+
* ## Last
248+
* Returns the last column in this [ColumnSet] that adheres to the given [condition\].
249+
*
250+
* For example:
251+
*
252+
* {@includeArg [Examples]}
253+
*
254+
* @param [condition\] The [ColumnFilter] condition that the column must adhere to.
255+
* @return A [SingleColumn] containing the last column that adheres to the given [condition\].
256+
* @see [first\]
257+
*/
258+
private interface CommonLastDocs {
259+
260+
/** Examples key */
261+
interface Examples
262+
}
263+
264+
/**
265+
* @include [CommonLastDocs]
266+
* @arg [CommonLastDocs.Examples]
267+
* `df.`[select][select]` { `[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }`
268+
*
269+
* `df.`[select][select]` { myColumnGroup.`[first][last]`() }`
270+
*/
271+
public fun <C> ColumnSet<C>.last(condition: ColumnFilter<C> = { true }): SingleColumn<C> =
272+
children { condition(it.cast()) }
273+
.transform { listOf(it.last()) }
274+
.single()
275+
.cast()
276+
277+
/**
278+
* @include [CommonLastDocs]
279+
* @arg [CommonLastDocs.Examples]
280+
* `df.`[select][select]` { "myColumnGroup".`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }`
281+
*/
282+
public fun String.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> =
283+
toColumnAccessor().last(condition)
284+
285+
/**
286+
* @include [CommonLastDocs]
287+
* @arg [CommonLastDocs.Examples]
288+
* `df.`[select][select]` { Type::myColumnGroup.`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }`
289+
*/
290+
public fun <C> KProperty<C>.last(condition: ColumnFilter<C>): SingleColumn<*> =
291+
toColumnAccessor().last(condition)
201292

202293
public fun <C> ColumnSet<C>.single(condition: ColumnFilter<C>): SingleColumn<C> =
203294
transform { listOf(it.single(condition)) }.single()
@@ -592,57 +683,47 @@ public interface ColumnsSelectionDsl<out T> : ColumnSelectionDsl<T>, SingleColum
592683

593684
/**
594685
* @include [CommonColsOfDocs]
595-
* Get sub-columns of the column with this name by [type] without a filter.
686+
* Get sub-columns of the column with this name by [type] with a [filter].
596687
* For example:
597688
*
598689
* `df.`[select][DataFrame.select]` { "myColumnGroup".`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }`
599690
*
600-
* @include [CommonColsOfDocs.Return]
601-
*/
602-
public fun String.colsOf(type: KType): ColumnSet<Any?> = toColumnAccessor().colsOf(type)
603-
604-
/**
605-
* @include [CommonColsOfDocs]
606-
* Get sub-columns of the column with this name by [type] with a [filter].
607-
* For example:
608-
*
609691
* `df.`[select][DataFrame.select]` { "myColumnGroup".`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][DataColumn]`<`[Int][Int]`> -> it.`[size][DataColumn.size]` > 10 } }`
610692
*
611693
* @include [CommonColsOfDocs.FilterParam]
612-
* @include [CommonColsOfDocs.ReturnFiltered]
694+
* @include [CommonColsOfDocs.Return]
613695
*/
614-
public fun <C> String.colsOf(type: KType, filter: (DataColumn<C>) -> Boolean): ColumnSet<Any?> =
696+
public fun <C> String.colsOf(type: KType, filter: (DataColumn<C>) -> Boolean = { true }): ColumnSet<Any?> =
615697
toColumnAccessor().colsOf(type, filter)
616698

617699
/**
618700
* @include [CommonColsOfDocs]
619-
* Get sub-columns of the column this [KProperty Accessor][KProperty] points to by [type] without a filter.
701+
* Get sub-columns of the column this [KProperty Accessor][KProperty] points to by [type] with or without [filter].
620702
* For example:
621703
*
622704
* `df.`[select][DataFrame.select]` { Type::myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }`
623705
*
624-
* @include [CommonColsOfDocs.Return]
625-
*/
626-
public fun KProperty<*>.colsOf(type: KType): ColumnSet<Any?> = toColumnAccessor().colsOf(type)
627-
628-
/**
629-
* @include [CommonColsOfDocs]
630-
* Get sub-columns of the column this [KProperty Accessor][KProperty] points to by [type] with a [filter].
631-
* For example:
632-
*
633706
* `df.`[select][DataFrame.select]` { Type::myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][DataColumn]`<`[Int][Int]`> -> it.`[size][DataColumn.size]` > 10 } }`
634707
*
635708
* @include [CommonColsOfDocs.FilterParam]
636-
* @include [CommonColsOfDocs.ReturnFiltered]
709+
* @include [CommonColsOfDocs.Return]
637710
*/
638-
public fun <C> KProperty<*>.colsOf(type: KType, filter: (DataColumn<C>) -> Boolean): ColumnSet<Any?> =
711+
public fun <C> KProperty<*>.colsOf(type: KType, filter: (DataColumn<C>) -> Boolean = { true }): ColumnSet<Any?> =
639712
toColumnAccessor().colsOf(type, filter)
640713
}
641714

642715
/**
643-
* @include [ColumnExpression]
716+
* @include [ColumnExpression.CommonDocs]
644717
*
645-
* TODO
718+
* For example:
719+
*
720+
* `df.`[groupBy][DataFrame.groupBy]` { `[expr][expr]` { firstName.`[length][String.length]` + lastName.`[length][String.length]` } `[named][named]` "nameLength" }`
721+
*
722+
* `df.`[sortBy][DataFrame.sortBy]` { `[expr][expr]` { name.`[length][String.length]` }.`[desc][SortDsl.desc]`() }`
723+
*
724+
* @param [name] The name the temporary column. Will be empty by default.
725+
* @include [Infer.Param] By default: [Nulls][Infer.Nulls].
726+
* @param [expression] An [AddExpression] to define what each new row of the temporary column should contain.
646727
*/
647728
public inline fun <T, reified R> ColumnsSelectionDsl<T>.expr(
648729
name: String = "",
@@ -699,85 +780,54 @@ internal interface ColsOf
699780
*/
700781
private interface CommonColsOfDocs {
701782

702-
/** @return A [ColumnSet] containing the columns of given type. */
703-
interface Return
704-
705783
/** @return A [ColumnSet] containing the columns of given type that were included by [filter\]. */
706-
interface ReturnFiltered
784+
interface Return
707785

708-
/** @param [filter\] a filter function that takes a column of type [C\] and returns `true` if the column should be included. */
786+
/** @param [filter\] an optional filter function that takes a column of type [C\] and returns `true` if the column should be included. */
709787
interface FilterParam
710788
}
711789

712790
/**
713791
* @include [CommonColsOfDocs]
714-
* Get (sub-)columns by [type] without a filter.
792+
* Get (sub-)columns by [type] with or without [filter].
715793
* For example:
716794
*
717795
* `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }`
718796
*
719-
* `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }`
720-
*
721-
* @include [CommonColsOfDocs.Return]
722-
*/
723-
public fun ColumnSet<*>.colsOf(type: KType): ColumnSet<Any?> = colsOf(type) { true }
724-
725-
/**
726-
* @include [CommonColsOfDocs]
727-
* Get (sub-)columns by a given type without a filter.
728-
* For example:
729-
*
730-
* `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`<`[Int][Int]`>() }`
731-
*
732-
* `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`<`[Int][Int]`>() }`
733-
*
734-
* @include [CommonColsOfDocs.Return]
735-
*/
736-
public inline fun <reified C> ColumnSet<*>.colsOf(): ColumnSet<C> = colsOf(typeOf<C>()) as ColumnSet<C>
737-
738-
/**
739-
* @include [CommonColsOfDocs]
740-
* Get (sub-)columns by [type] with [filter].
741-
* For example:
742-
*
743-
* `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][DataColumn]`<`[Int][Int]`> -> it.`[size][DataColumn.size]` > 10 } }`
744-
*
745797
* `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][DataColumn]`<`[Int][Int]`> -> it.`[size][DataColumn.size]` > 10 } }`
746798
*
799+
* `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }`
800+
*
747801
* @include [CommonColsOfDocs.FilterParam]
748-
* @include [CommonColsOfDocs.ReturnFiltered]
802+
* @include [CommonColsOfDocs.Return]
749803
*/
750-
public fun <C> ColumnSet<*>.colsOf(type: KType, filter: (DataColumn<C>) -> Boolean): ColumnSet<C> =
804+
public fun <C> ColumnSet<*>.colsOf(type: KType, filter: (DataColumn<C>) -> Boolean = { true }): ColumnSet<C> =
751805
colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as ColumnSet<C>
752806

753807
/**
754808
* @include [CommonColsOfDocs]
755-
* Get (sub-)columns by a given type with filter.
809+
* Get (sub-)columns by a given type with or without [filter].
756810
* For example:
757811
*
758-
* `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`<`[Int][Int]`> { it.`[size][DataColumn.size]` > 10 } }`
812+
* `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`<`[Int][Int]`>() }`
759813
*
760814
* `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`<`[Int][Int]`> { it.`[size][DataColumn.size]` > 10 } }`
761815
*
816+
* `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`<`[Int][Int]`>() }`
817+
*
762818
* @include [CommonColsOfDocs.FilterParam]
763-
* @include [CommonColsOfDocs.ReturnFiltered]
819+
* @include [CommonColsOfDocs.Return]
764820
*/
765821
public inline fun <reified C> ColumnSet<*>.colsOf(noinline filter: (DataColumn<C>) -> Boolean = { true }): ColumnSet<C> =
766822
colsOf(typeOf<C>(), filter)
767823

768824
/* TODO: [Issue: #325, context receiver support](https://github.com/Kotlin/dataframe/issues/325)
769825
context(ColumnsSelectionDsl)
770-
public inline fun <reified C> KProperty<*>.colsOf(noinline filter: (DataColumn<C>) -> Boolean): ColumnSet<Any?> =
826+
public inline fun <reified C> KProperty<*>.colsOf(noinline filter: (DataColumn<C>) -> Boolean = { true }): ColumnSet<Any?> =
771827
colsOf(typeOf<C>(), filter)
772828
773829
context(ColumnsSelectionDsl)
774-
public inline fun <reified C> KProperty<*>.colsOf(): ColumnSet<Any?> =
775-
colsOf(typeOf<C>())
776-
777-
context(ColumnsSelectionDsl)
778-
public inline fun <reified C> String.colsOf(noinline filter: (DataColumn<C>) -> Boolean): ColumnSet<Any?> =
830+
public inline fun <reified C> String.colsOf(noinline filter: (DataColumn<C>) -> Boolean = { true }): ColumnSet<Any?> =
779831
colsOf(typeOf<C>(), filter)
780832
781-
context(ColumnsSelectionDsl)
782-
public inline fun <reified C> String.colsOf(): ColumnSet<Any?> =
783-
colsOf(typeOf<C>()) */
833+
*/

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,13 @@ public enum class Infer {
187187
/**
188188
* Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using optionally provided base type as an upper bound.
189189
*/
190-
Type
190+
Type;
191+
192+
/**
193+
* @param [infer\] [An enum][Infer] that indicates how [DataColumn.type] should be calculated.
194+
* Either [None], [Nulls], or [Type].
195+
*/
196+
internal interface Param
191197
}
192198

193199
/**

0 commit comments

Comments
 (0)