Skip to content

Commit 5690583

Browse files
committed
update groupBy documentation
add docs for moveToTop + emphasize important groupBy use case
1 parent 54d2c74 commit 5690583

File tree

9 files changed

+1863
-99
lines changed

9 files changed

+1863
-99
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,16 @@ import kotlin.reflect.KProperty
1717

1818
// region DataFrame
1919

20+
/**
21+
*
22+
* @param cols key columns; Column for grouping can be created inplace
23+
*
24+
* `df.groupBy { expr("columnName") { "someColumn"<Int>() + 15 } }`
25+
*
26+
* is equivalent to
27+
*
28+
* `df.add("columnName") { "someColumn"<Int>() + 15 }.groupBy("columnName")`
29+
*/
2030
public fun <T> DataFrame<T>.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector<T, *>): GroupBy<T, T> =
2131
groupByImpl(moveToTop, cols)
2232

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,6 @@ class Analyze : TestBase() {
577577
df.groupBy { name }
578578
df.groupBy { city and name.lastName }
579579
df.groupBy { age / 10 named "ageDecade" }
580-
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
581580
// SampleEnd
582581
}
583582

@@ -601,7 +600,6 @@ class Analyze : TestBase() {
601600

602601
df.groupBy { age / 10 named "ageDecade" }
603602

604-
df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
605603
// SampleEnd
606604
}
607605

@@ -612,10 +610,53 @@ class Analyze : TestBase() {
612610
df.groupBy("name")
613611
df.groupBy { "city" and "name"["lastName"] }
614612
df.groupBy { "age"<Int>() / 10 named "ageDecade" }
613+
// SampleEnd
614+
}
615+
616+
@Test
617+
@TransformDataFrameExpressions
618+
fun groupByExpr_properties() {
619+
// SampleStart
620+
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
621+
// SampleEnd
622+
}
623+
624+
@Test
625+
@TransformDataFrameExpressions
626+
fun groupByExpr_accessors() {
627+
// SampleStart
628+
val name by columnGroup()
629+
val lastName by name.column<String>()
630+
val firstName by name.column<String>()
631+
632+
df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
633+
// SampleEnd
634+
}
635+
636+
@Test
637+
@TransformDataFrameExpressions
638+
fun groupByExpr_strings() {
639+
// SampleStart
615640
df.groupBy { expr { "name"["firstName"]<String>().length + "name"["lastName"]<String>().length } named "nameLength" }
616641
// SampleEnd
617642
}
618643

644+
@Test
645+
@TransformDataFrameExpressions
646+
fun groupByMoveToTop() {
647+
// SampleStart
648+
df.groupBy(moveToTop = true) { name.lastName }
649+
// SampleEnd
650+
}
651+
652+
@Test
653+
@TransformDataFrameExpressions
654+
fun groupByMoveToTopFalse() {
655+
// SampleStart
656+
df.groupBy(moveToTop = false) { name.lastName }
657+
// SampleEnd
658+
}
659+
619660
@Test
620661
@TransformDataFrameExpressions
621662
fun dataFrameToGroupBy() {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,16 @@ import kotlin.reflect.KProperty
1717

1818
// region DataFrame
1919

20+
/**
21+
*
22+
* @param cols key columns; Column for grouping can be created inplace
23+
*
24+
* `df.groupBy { expr("columnName") { "someColumn"<Int>() + 15 } }`
25+
*
26+
* is equivalent to
27+
*
28+
* `df.add("columnName") { "someColumn"<Int>() + 15 }.groupBy("columnName")`
29+
*/
2030
public fun <T> DataFrame<T>.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector<T, *>): GroupBy<T, T> =
2131
groupByImpl(moveToTop, cols)
2232

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,6 @@ class Analyze : TestBase() {
577577
df.groupBy { name }
578578
df.groupBy { city and name.lastName }
579579
df.groupBy { age / 10 named "ageDecade" }
580-
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
581580
// SampleEnd
582581
}
583582

@@ -601,7 +600,6 @@ class Analyze : TestBase() {
601600

602601
df.groupBy { age / 10 named "ageDecade" }
603602

604-
df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
605603
// SampleEnd
606604
}
607605

@@ -612,10 +610,53 @@ class Analyze : TestBase() {
612610
df.groupBy("name")
613611
df.groupBy { "city" and "name"["lastName"] }
614612
df.groupBy { "age"<Int>() / 10 named "ageDecade" }
613+
// SampleEnd
614+
}
615+
616+
@Test
617+
@TransformDataFrameExpressions
618+
fun groupByExpr_properties() {
619+
// SampleStart
620+
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
621+
// SampleEnd
622+
}
623+
624+
@Test
625+
@TransformDataFrameExpressions
626+
fun groupByExpr_accessors() {
627+
// SampleStart
628+
val name by columnGroup()
629+
val lastName by name.column<String>()
630+
val firstName by name.column<String>()
631+
632+
df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
633+
// SampleEnd
634+
}
635+
636+
@Test
637+
@TransformDataFrameExpressions
638+
fun groupByExpr_strings() {
639+
// SampleStart
615640
df.groupBy { expr { "name"["firstName"]<String>().length + "name"["lastName"]<String>().length } named "nameLength" }
616641
// SampleEnd
617642
}
618643

644+
@Test
645+
@TransformDataFrameExpressions
646+
fun groupByMoveToTop() {
647+
// SampleStart
648+
df.groupBy(moveToTop = true) { name.lastName }
649+
// SampleEnd
650+
}
651+
652+
@Test
653+
@TransformDataFrameExpressions
654+
fun groupByMoveToTopFalse() {
655+
// SampleStart
656+
df.groupBy(moveToTop = false) { name.lastName }
657+
// SampleEnd
658+
}
659+
619660
@Test
620661
@TransformDataFrameExpressions
621662
fun dataFrameToGroupBy() {

docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Analyze.groupBy.html

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,6 @@
188188

189189

190190

191-
192-
193-
194-
195-
196191
</style>
197192
</head>
198193
<body>
@@ -245,23 +240,6 @@
245240
<summary>Output GroupBy</summary>
246241
<table class="dataframe" id="df_19"></table>
247242

248-
<p class="dataframe_description"></p>
249-
</details>
250-
</details>
251-
<br>
252-
<details>
253-
<summary>df.groupBy { expr { name.firstName.length + name.lastName.length } named &quot;nameLength&quot; }</summary>
254-
255-
<details>
256-
<summary>Input DataFrame: rowsCount = 7, columnsCount = 5</summary>
257-
<table class="dataframe" id="df_24"></table>
258-
259-
<p class="dataframe_description"></p>
260-
</details>
261-
<details>
262-
<summary>Output GroupBy</summary>
263-
<table class="dataframe" id="df_25"></table>
264-
265243
<p class="dataframe_description"></p>
266244
</details>
267245
</details>
@@ -816,73 +794,5 @@
816794
call_DataFrame(function() { DataFrame.renderTable(19) });
817795

818796

819-
820-
/*<!--*/
821-
call_DataFrame(function() { DataFrame.addTable({ cols: [{ name: "<span title=\"firstName: String\">firstName</span>", children: [], rightAlign: false, values: ["Alice","Bob","Charlie","Charlie","Bob","Alice","Charlie"] },
822-
{ name: "<span title=\"lastName: String\">lastName</span>", children: [], rightAlign: false, values: ["Cooper","Dylan","Daniels","Chaplin","Marley","Wolf","Byrd"] },
823-
{ name: "<span title=\"name: DataRow<*>\">name</span>", children: [0, 1], rightAlign: false, values: ["<span class=\"formatted\" title=\"firstName: Alice\nlastName: Cooper\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Alice<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Cooper<span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Bob\nlastName: Dylan\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Bob<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Dylan<span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Charlie\nlastName: Daniels\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Charlie<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Dan<span class=\"structural\">...</span><span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Charlie\nlastName: Chaplin\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Charlie<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Cha<span class=\"structural\">...</span><span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Bob\nlastName: Marley\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Bob<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Marley<span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Alice\nlastName: Wolf\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Alice<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Wolf<span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Charlie\nlastName: Byrd\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Charlie<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Byrd<span class=\"structural\"> }</span></span>"] },
824-
{ name: "<span title=\"age: Int\">age</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">15</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">45</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">20</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">40</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">30</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">20</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">30</span></span>"] },
825-
{ name: "<span title=\"city: String?\">city</span>", children: [], rightAlign: false, values: ["London","Dubai","Moscow","Milan","Tokyo","<span class=\"formatted\" title=\"\"><span class=\"null\">null</span></span>","Moscow"] },
826-
{ name: "<span title=\"weight: Int?\">weight</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">54</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">87</span></span>","<span class=\"formatted\" title=\"\"><span class=\"null\">null</span></span>","<span class=\"formatted\" title=\"\"><span class=\"null\">null</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">68</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">55</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">90</span></span>"] },
827-
{ name: "<span title=\"isHappy: Boolean\">isHappy</span>", children: [], rightAlign: false, values: ["true","true","false","true","true","false","true"] },
828-
], id: 24, rootId: 24, totalRows: 7 } ) });
829-
/*-->*/
830-
831-
call_DataFrame(function() { DataFrame.renderTable(24) });
832-
833-
834-
/*<!--*/
835-
call_DataFrame(function() { DataFrame.addTable({ cols: [{ name: "<span title=\"nameLength: Int\">nameLength</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">11</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">8</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">14</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">9</span></span>"] },
836-
{ name: "<span title=\"group: DataFrame<*>\">group</span>", children: [], rightAlign: false, values: [{ frameId: 26, value: "<b>DataFrame 2 x 5</b>" },{ frameId: 27, value: "<b>DataFrame 1 x 5</b>" },{ frameId: 28, value: "<b>DataFrame 2 x 5</b>" },{ frameId: 29, value: "<b>DataFrame 2 x 5</b>" }] },
837-
], id: 25, rootId: 25, totalRows: 4 } ) });
838-
/*-->*/
839-
840-
/*<!--*/
841-
call_DataFrame(function() { DataFrame.addTable({ cols: [{ name: "<span title=\"firstName: String\">firstName</span>", children: [], rightAlign: false, values: ["Alice","Charlie"] },
842-
{ name: "<span title=\"lastName: String\">lastName</span>", children: [], rightAlign: false, values: ["Cooper","Byrd"] },
843-
{ name: "<span title=\"name: DataRow<*>\">name</span>", children: [0, 1], rightAlign: false, values: ["<span class=\"formatted\" title=\"firstName: Alice\nlastName: Cooper\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Alice<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Cooper<span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Charlie\nlastName: Byrd\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Charlie<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Byrd<span class=\"structural\"> }</span></span>"] },
844-
{ name: "<span title=\"age: Int\">age</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">15</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">30</span></span>"] },
845-
{ name: "<span title=\"city: String\">city</span>", children: [], rightAlign: false, values: ["London","Moscow"] },
846-
{ name: "<span title=\"weight: Int\">weight</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">54</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">90</span></span>"] },
847-
{ name: "<span title=\"isHappy: Boolean\">isHappy</span>", children: [], rightAlign: false, values: ["true","true"] },
848-
], id: 26, rootId: 25, totalRows: 2 } ) });
849-
/*-->*/
850-
851-
/*<!--*/
852-
call_DataFrame(function() { DataFrame.addTable({ cols: [{ name: "<span title=\"firstName: String\">firstName</span>", children: [], rightAlign: false, values: ["Bob"] },
853-
{ name: "<span title=\"lastName: String\">lastName</span>", children: [], rightAlign: false, values: ["Dylan"] },
854-
{ name: "<span title=\"name: DataRow<*>\">name</span>", children: [0, 1], rightAlign: false, values: ["<span class=\"formatted\" title=\"firstName: Bob\nlastName: Dylan\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Bob<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Dylan<span class=\"structural\"> }</span></span>"] },
855-
{ name: "<span title=\"age: Int\">age</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">45</span></span>"] },
856-
{ name: "<span title=\"city: String\">city</span>", children: [], rightAlign: false, values: ["Dubai"] },
857-
{ name: "<span title=\"weight: Int\">weight</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">87</span></span>"] },
858-
{ name: "<span title=\"isHappy: Boolean\">isHappy</span>", children: [], rightAlign: false, values: ["true"] },
859-
], id: 27, rootId: 25, totalRows: 1 } ) });
860-
/*-->*/
861-
862-
/*<!--*/
863-
call_DataFrame(function() { DataFrame.addTable({ cols: [{ name: "<span title=\"firstName: String\">firstName</span>", children: [], rightAlign: false, values: ["Charlie","Charlie"] },
864-
{ name: "<span title=\"lastName: String\">lastName</span>", children: [], rightAlign: false, values: ["Daniels","Chaplin"] },
865-
{ name: "<span title=\"name: DataRow<*>\">name</span>", children: [0, 1], rightAlign: false, values: ["<span class=\"formatted\" title=\"firstName: Charlie\nlastName: Daniels\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Charlie<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Dan<span class=\"structural\">...</span><span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Charlie\nlastName: Chaplin\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Charlie<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Cha<span class=\"structural\">...</span><span class=\"structural\"> }</span></span>"] },
866-
{ name: "<span title=\"age: Int\">age</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">20</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">40</span></span>"] },
867-
{ name: "<span title=\"city: String\">city</span>", children: [], rightAlign: false, values: ["Moscow","Milan"] },
868-
{ name: "<span title=\"weight: Int?\">weight</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"null\">null</span></span>","<span class=\"formatted\" title=\"\"><span class=\"null\">null</span></span>"] },
869-
{ name: "<span title=\"isHappy: Boolean\">isHappy</span>", children: [], rightAlign: false, values: ["false","true"] },
870-
], id: 28, rootId: 25, totalRows: 2 } ) });
871-
/*-->*/
872-
873-
/*<!--*/
874-
call_DataFrame(function() { DataFrame.addTable({ cols: [{ name: "<span title=\"firstName: String\">firstName</span>", children: [], rightAlign: false, values: ["Bob","Alice"] },
875-
{ name: "<span title=\"lastName: String\">lastName</span>", children: [], rightAlign: false, values: ["Marley","Wolf"] },
876-
{ name: "<span title=\"name: DataRow<*>\">name</span>", children: [0, 1], rightAlign: false, values: ["<span class=\"formatted\" title=\"firstName: Bob\nlastName: Marley\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Bob<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Marley<span class=\"structural\"> }</span></span>","<span class=\"formatted\" title=\"firstName: Alice\nlastName: Wolf\"><span class=\"structural\">{ </span><span class=\"structural\">firstName: </span>Alice<span class=\"structural\">, </span><span class=\"structural\">lastName: </span>Wolf<span class=\"structural\"> }</span></span>"] },
877-
{ name: "<span title=\"age: Int\">age</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">30</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">20</span></span>"] },
878-
{ name: "<span title=\"city: String?\">city</span>", children: [], rightAlign: false, values: ["Tokyo","<span class=\"formatted\" title=\"\"><span class=\"null\">null</span></span>"] },
879-
{ name: "<span title=\"weight: Int\">weight</span>", children: [], rightAlign: true, values: ["<span class=\"formatted\" title=\"\"><span class=\"numbers\">68</span></span>","<span class=\"formatted\" title=\"\"><span class=\"numbers\">55</span></span>"] },
880-
{ name: "<span title=\"isHappy: Boolean\">isHappy</span>", children: [], rightAlign: false, values: ["true","false"] },
881-
], id: 29, rootId: 25, totalRows: 2 } ) });
882-
/*-->*/
883-
884-
call_DataFrame(function() { DataFrame.renderTable(25) });
885-
886-
887797
</script>
888798
</html>

0 commit comments

Comments
 (0)