Skip to content

Commit 320e54d

Browse files
committed
Closed #80. Completely reimplemented aggregation and grouping
1 parent 4f1b49e commit 320e54d

10 files changed

+343
-348
lines changed

src/DataFrame-Tests/DataFrameAggrGroupTest.class.st

Lines changed: 224 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -10,89 +10,259 @@ Class {
1010
{ #category : #initialization }
1111
DataFrameAggrGroupTest >> setUp [
1212

13-
df := DataFrame withRows:
14-
#((16.99 1.01 'Female' 'No' 'Sun' 'Dinner' 2)
15-
(10.34 1.66 'Male' 'No' 'Sun' 'Dinner' 3)
16-
(21.01 3.5 'Male' 'No' 'Sun' 'Dinner' 3)
17-
(23.68 3.31 'Male' 'No' 'Sun' 'Dinner' 2)
18-
(24.59 3.61 'Female' 'No' 'Sun' 'Dinner' 4)).
13+
df := DataFrame withRows: #(
14+
(16.99 1.01 Female No Sun Dinner 2)
15+
(10.34 1.66 Male No Sun Dinner 3)
16+
(21.01 3.5 Male No Sun Dinner 3)
17+
(23.68 3.31 Male No Sun Dinner 2)
18+
(24.59 3.61 Female No Sun Dinner 4)).
1919

20-
df columnNames: #('total_bill' 'tip' 'sex' 'smoker' 'day' 'time' 'size').
20+
df columnNames: #(total_bill tip sex smoker day time size).
2121
]
2222

2323
{ #category : #initialization }
24-
DataFrameAggrGroupTest >> testAggregateAverage [
25-
24+
DataFrameAggrGroupTest >> testGroupByAggregateArrayMultipleUsingAsSelector [
2625
| expected actual |
2726

28-
expected := DataSeries
29-
withKeys: #(Male Female)
30-
values: #(18.3433 20.79)
31-
name: #total_bill.
32-
33-
actual := (df group: #total_bill by: #sex) average.
34-
self assert: actual closeTo: expected.
27+
expected := DataFrame
28+
withColumns: #(
29+
(41.58 55.03)
30+
(20.79 18.343333333333334)
31+
(2.31 2.8233333333333337))
32+
rowNames: #(Female Male)
33+
columnNames: #(total average averageTip).
34+
35+
actual := df
36+
groupBy: #sex
37+
aggregate: {
38+
#total_bill using: #sum as: #total .
39+
#total_bill using: #average as: #average .
40+
#tip using: #average as: #averageTip
41+
}.
42+
43+
self assert: actual equals: expected
3544
]
3645

3746
{ #category : #initialization }
38-
DataFrameAggrGroupTest >> testAggregateMax [
47+
DataFrameAggrGroupTest >> testGroupByAggregateArrayUsingAsBlock [
3948
| expected actual |
4049

41-
expected := DataSeries
42-
withKeys: #(Male Female)
43-
values: #(23.68 24.59)
44-
name: #total_bill.
45-
46-
actual := (df group: #total_bill by: #sex) max.
47-
self assert: actual equals: expected.
50+
expected := DataFrame
51+
withColumns: #((20.79 18.343333333333334))
52+
rowNames: #(Female Male)
53+
columnNames: #(total).
54+
55+
actual := df
56+
groupBy: #sex
57+
aggregate: { #total_bill using: [ :column | column sum / column size ] as: #total }.
58+
59+
self assert: actual equals: expected
4860
]
4961

5062
{ #category : #initialization }
51-
DataFrameAggrGroupTest >> testAggregateMin [
63+
DataFrameAggrGroupTest >> testGroupByAggregateArrayUsingAsNoSuchAggregateColumnError [
64+
self
65+
should: [
66+
df groupBy: #sex
67+
aggregate: { #NoSuchColumn using: #sum as: #total } ]
68+
raise: NotFoundError.
69+
]
5270

71+
{ #category : #initialization }
72+
DataFrameAggrGroupTest >> testGroupByAggregateArrayUsingAsNoSuchGroupColumnError [
73+
self
74+
should: [
75+
df groupBy: #NoSuchColumn
76+
aggregate: { #total_bill using: #sum as: #total } ]
77+
raise: NotFoundError.
78+
]
79+
80+
{ #category : #initialization }
81+
DataFrameAggrGroupTest >> testGroupByAggregateArrayUsingAsSameColumnError [
82+
self
83+
should: [
84+
df groupBy: #sex
85+
aggregate: { #sex using: #sum as: #total } ]
86+
raise: Error.
87+
]
88+
89+
{ #category : #initialization }
90+
DataFrameAggrGroupTest >> testGroupByAggregateArrayUsingAsSelector [
5391
| expected actual |
5492

55-
expected := DataSeries
56-
withKeys: #(Male Female)
57-
values: #(10.34 16.99)
58-
name: #total_bill.
59-
60-
actual := (df group: #total_bill by: #sex) min.
61-
self assert: actual equals: expected.
93+
expected := DataFrame
94+
withColumns: #((41.58 55.03))
95+
rowNames: #(Female Male)
96+
columnNames: #(total).
97+
98+
actual := df
99+
groupBy: #sex
100+
aggregate: { #total_bill using: #sum as: #total }.
101+
102+
self assert: actual equals: expected
62103
]
63104

64105
{ #category : #initialization }
65-
DataFrameAggrGroupTest >> testDataFrameGroupedPrintOn [
66-
106+
DataFrameAggrGroupTest >> testGroupByAggregateArrayUsingBlock [
67107
| expected actual |
68108

69-
expected := String new writeStream.
70-
expected
71-
nextPutAll: 'a DataFrameGrouped'; cr;
72-
nextPutAll: 'Male: a DataFrame (3@6)'; cr;
73-
nextPutAll: 'Female: a DataFrame (2@6)'.
74-
expected := expected contents.
109+
expected := DataFrame
110+
withColumns: #((20.79 18.343333333333334))
111+
rowNames: #(Female Male)
112+
columnNames: #(total_bill).
113+
114+
actual := df
115+
groupBy: #sex
116+
aggregate: { #total_bill using: [ :column | column sum / column size ] }.
117+
118+
self assert: actual equals: expected
119+
]
120+
121+
{ #category : #initialization }
122+
DataFrameAggrGroupTest >> testGroupByAggregateArrayUsingSelector [
123+
| expected actual |
75124

76-
actual := String new writeStream.
77-
(df groupBy: #sex) printOn: actual.
78-
actual := actual contents.
125+
expected := DataFrame
126+
withColumns: #((41.58 55.03))
127+
rowNames: #(Female Male)
128+
columnNames: #(total_bill).
129+
130+
actual := df
131+
groupBy: #sex
132+
aggregate: { #total_bill using: #sum }.
133+
134+
self assert: actual equals: expected
135+
]
136+
137+
{ #category : #initialization }
138+
DataFrameAggrGroupTest >> testGroupByAggregateUsingAsBlock [
139+
| expected actual |
79140

80-
self assert: actual equals: expected.
141+
expected := DataSeries
142+
withKeys: #(Female Male)
143+
values: #(20.79 18.343333333333334)
144+
name: #total.
145+
146+
actual := df
147+
group: #total_bill
148+
by: #sex
149+
aggregateUsing: [ :column | column sum / column size ]
150+
as: #total.
151+
152+
self assert: actual equals: expected
81153
]
82154

83-
{ #category : #tests }
84-
DataFrameAggrGroupTest >> testGroupSeriesBySeries [
155+
{ #category : #initialization }
156+
DataFrameAggrGroupTest >> testGroupByAggregateUsingAsNoSuchAggregateColumnError [
157+
self
158+
should: [
159+
df group: #NoSuchColumn
160+
by: #sex
161+
aggregateUsing: #sum
162+
as: #total ]
163+
raise: NotFoundError.
164+
]
165+
166+
{ #category : #initialization }
167+
DataFrameAggrGroupTest >> testGroupByAggregateUsingAsNoSuchGroupColumnError [
168+
self
169+
should: [
170+
df group: #sex
171+
by: #NoSuchColumn
172+
aggregateUsing: #sum
173+
as: #total ]
174+
raise: NotFoundError.
175+
]
176+
177+
{ #category : #initialization }
178+
DataFrameAggrGroupTest >> testGroupByAggregateUsingAsSameColumnError [
179+
self
180+
should: [
181+
df group: #sex
182+
by: #sex
183+
aggregateUsing: #sum
184+
as: #total ]
185+
raise: Error.
186+
]
85187

86-
| femaleGroup maleGroup expected actual |
188+
{ #category : #initialization }
189+
DataFrameAggrGroupTest >> testGroupByAggregateUsingAsSelector [
190+
| expected actual |
87191

88-
femaleGroup := #(16.99 24.59) asDataSeries.
89-
maleGroup := #(10.34 21.01 23.68) asDataSeries.
192+
expected := DataSeries
193+
withKeys: #(Female Male)
194+
values: #(41.58 55.03)
195+
name: #total.
196+
197+
actual := df
198+
group: #total_bill
199+
by: #sex
200+
aggregateUsing: #sum
201+
as: #total.
202+
203+
self assert: actual equals: expected
204+
]
205+
206+
{ #category : #initialization }
207+
DataFrameAggrGroupTest >> testGroupByAggregateUsingBlock [
208+
| expected actual |
90209

91-
expected := DataSeries
92-
withKeys: #(Male Female)
93-
values: { maleGroup . femaleGroup }
210+
expected := DataSeries
211+
withKeys: #(Female Male)
212+
values: #(20.79 18.343333333333334)
94213
name: #total_bill.
214+
215+
actual := df
216+
group: #total_bill
217+
by: #sex
218+
aggregateUsing: [ :column | column sum / column size ].
219+
220+
self assert: actual equals: expected
221+
]
222+
223+
{ #category : #initialization }
224+
DataFrameAggrGroupTest >> testGroupByAggregateUsingNoSuchAggregateColumnError [
225+
self
226+
should: [
227+
df group: #NoSuchColumn
228+
by: #sex
229+
aggregateUsing: #sum ]
230+
raise: NotFoundError.
231+
]
232+
233+
{ #category : #initialization }
234+
DataFrameAggrGroupTest >> testGroupByAggregateUsingNoSuchGroupColumnError [
235+
self
236+
should: [
237+
df group: #sex
238+
by: #NoSuchColumn
239+
aggregateUsing: #sum ]
240+
raise: NotFoundError.
241+
]
242+
243+
{ #category : #initialization }
244+
DataFrameAggrGroupTest >> testGroupByAggregateUsingSameColumnError [
245+
self
246+
should: [
247+
df group: #sex
248+
by: #sex
249+
aggregateUsing: #sum ]
250+
raise: Error.
251+
]
252+
253+
{ #category : #initialization }
254+
DataFrameAggrGroupTest >> testGroupByAggregateUsingSelector [
255+
| expected actual |
95256

96-
actual := (df group: #total_bill by: #sex) groups.
97-
self assert: actual equals: expected.
257+
expected := DataSeries
258+
withKeys: #(Female Male)
259+
values: #(41.58 55.03)
260+
name: #total_bill.
261+
262+
actual := df
263+
group: #total_bill
264+
by: #sex
265+
aggregateUsing: #sum.
266+
267+
self assert: actual equals: expected
98268
]

src/DataFrame-Tests/DataFrameQueriesTest.class.st

Lines changed: 0 additions & 56 deletions
This file was deleted.

src/DataFrame-Tests/DataFrameTest.class.st

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,10 +1125,10 @@ DataFrameTest >> testCrossTabulation [
11251125
dataFrame columnNames: #(Gender Age).
11261126

11271127
expected := DataFrame withRows:
1128-
#((2 1)(1 1)).
1128+
#((1 2)(1 1)).
11291129

11301130
expected rowNames: #(Female Male).
1131-
expected columnNames: #(Young Old).
1131+
expected columnNames: #(Old Young).
11321132

11331133
self assert: (dataFrame crossTabulate: #Gender with: #Age) equals: expected.
11341134
]

0 commit comments

Comments
 (0)