Skip to content

Commit cc574b7

Browse files
committed
New aggregation and handle empty aggregation selection
1 parent b532270 commit cc574b7

File tree

3 files changed

+50
-15
lines changed

3 files changed

+50
-15
lines changed

Orange/data/aggregate.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,12 @@ def _compute_aggregation(
9999

100100
def _aggregations_to_table(self, aggregations: List[pd.Series]) -> Table:
101101
"""Concatenate aggregation series and convert back to Table"""
102-
df = pd.concat(aggregations, axis=1)
102+
if aggregations:
103+
df = pd.concat(aggregations, axis=1)
104+
else:
105+
# when no aggregation is computed return a table with gropby columns
106+
df = self.group_by.first()
107+
df = df.drop(columns=df.columns)
103108
gb_attributes = df.index.names
104109
df = df.reset_index() # move group by var that are in index to columns
105110
table = table_from_frame(df)

Orange/widgets/data/owgroupby.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,10 @@ def concatenate(x):
7575
{ContinuousVariable, TimeVariable},
7676
),
7777
"First value": Aggregation(
78-
"first",
79-
{ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
78+
"first", {ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
8079
),
8180
"Last value": Aggregation(
82-
"last",
83-
{ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
81+
"last", {ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
8482
),
8583
"Random value": Aggregation(
8684
lambda x: x.sample(1, random_state=0),
@@ -92,16 +90,19 @@ def concatenate(x):
9290
"Count": Aggregation(
9391
"size", {ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
9492
),
93+
"Proportion defined": Aggregation(
94+
lambda x: x.count() / x.size,
95+
{ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable},
96+
),
9597
}
9698
# list of ordered aggregation names is required on several locations so we
9799
# prepare it in advance
98100
AGGREGATIONS_ORD = list(AGGREGATIONS)
99101

102+
# use first aggregation suitable for each type as default
100103
DEFAULT_AGGREGATIONS = {
101-
ContinuousVariable: {next(iter(AGGREGATIONS.keys()))},
102-
TimeVariable: {next(iter(AGGREGATIONS.keys()))},
103-
DiscreteVariable: set(),
104-
StringVariable: set(),
104+
var: {next(name for name, agg in AGGREGATIONS.items() if var in agg.types)}
105+
for var in (ContinuousVariable, TimeVariable, DiscreteVariable, StringVariable)
105106
}
106107

107108

@@ -114,7 +115,7 @@ class Result:
114115
def _run(
115116
data: Table,
116117
group_by_attrs: List[Variable],
117-
aggregations: Dict[Variable, List[str]],
118+
aggregations: Dict[Variable, Set[str]],
118119
result: Result,
119120
state: TaskState,
120121
) -> Result:
@@ -383,7 +384,9 @@ def __rows_selected(self) -> None:
383384
# check if aggregation active for all selected attributes,
384385
# partially check if active for some else uncheck
385386
cb.setCheckState(
386-
Qt.Checked if activated == {True} else (Qt.Unchecked if activated == {False} else Qt.PartiallyChecked)
387+
Qt.Checked
388+
if activated == {True}
389+
else (Qt.Unchecked if activated == {False} else Qt.PartiallyChecked)
387390
)
388391

389392
def __gb_changed(self) -> None:

Orange/widgets/data/tests/test_owgroupby.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,15 +159,36 @@ def test_attr_table_row_selection(self):
159159
)
160160
self.select_table_rows(table, [3]) # discrete variable
161161
self.assert_enabled_cbs(
162-
{"Count defined", "Count", "Concatenate", "First value", "Last value", "Random value"}
162+
{
163+
"Count defined",
164+
"Count",
165+
"Concatenate",
166+
"First value",
167+
"Last value",
168+
"Random value",
169+
}
163170
)
164171
self.select_table_rows(table, [4]) # string variable
165172
self.assert_enabled_cbs(
166-
{"Count defined", "Count", "Concatenate", "First value", "Last value", "Random value"}
173+
{
174+
"Count defined",
175+
"Count",
176+
"Concatenate",
177+
"First value",
178+
"Last value",
179+
"Random value",
180+
}
167181
)
168182
self.select_table_rows(table, [3, 4]) # string variable
169183
self.assert_enabled_cbs(
170-
{"Count defined", "Count", "Concatenate", "First value", "Last value", "Random value"}
184+
{
185+
"Count defined",
186+
"Count",
187+
"Concatenate",
188+
"First value",
189+
"Last value",
190+
"Random value",
191+
}
171192
)
172193
self.select_table_rows(table, [2, 3, 4]) # string variable
173194
self.assert_enabled_cbs(
@@ -399,7 +420,13 @@ def test_aggregations_change(self):
399420
Qt.PartiallyChecked, self.widget.agg_checkboxes["Mode"].checkState()
400421
)
401422
self.assert_aggregations_equal(
402-
["Mean, Mode, Count defined and 1 more", "Mean, Mode, Count defined", "Mean", "Count defined, Count", ""]
423+
[
424+
"Mean, Mode, Count defined and 1 more",
425+
"Mean, Mode, Count defined",
426+
"Mean",
427+
"Count defined, Count",
428+
"",
429+
]
403430
)
404431
self.assertDictEqual(
405432
{

0 commit comments

Comments
 (0)