Skip to content

Commit edcc500

Browse files
committed
New aggregation and handle empty aggregation selection
1 parent b532270 commit edcc500

File tree

2 files changed

+19
-11
lines changed

2 files changed

+19
-11
lines changed

Orange/data/aggregate.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,12 @@ def _compute_aggregation(
9999

100100
def _aggregations_to_table(self, aggregations: List[pd.Series]) -> Table:
101101
"""Concatenate aggregation series and convert back to Table"""
102-
df = pd.concat(aggregations, axis=1)
102+
if aggregations:
103+
df = pd.concat(aggregations, axis=1)
104+
else:
105+
# when no aggregation is computed return a table with gropby columns
106+
df = self.group_by.first()
107+
df = df.drop(columns=df.columns)
103108
gb_attributes = df.index.names
104109
df = df.reset_index() # move group by var that are in index to columns
105110
table = table_from_frame(df)

Orange/widgets/data/owgroupby.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,10 @@ def concatenate(x):
7575
{ContinuousVariable, TimeVariable},
7676
),
7777
"First value": Aggregation(
78-
"first",
79-
{ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
78+
"first", {ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
8079
),
8180
"Last value": Aggregation(
82-
"last",
83-
{ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
81+
"last", {ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
8482
),
8583
"Random value": Aggregation(
8684
lambda x: x.sample(1, random_state=0),
@@ -92,16 +90,19 @@ def concatenate(x):
9290
"Count": Aggregation(
9391
"size", {ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable}
9492
),
93+
"Proportion defined": Aggregation(
94+
lambda x: x.count() / x.size,
95+
{ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable},
96+
),
9597
}
9698
# list of ordered aggregation names is required on several locations so we
9799
# prepare it in advance
98100
AGGREGATIONS_ORD = list(AGGREGATIONS)
99101

102+
# use first aggregation suitable for each type as default
100103
DEFAULT_AGGREGATIONS = {
101-
ContinuousVariable: {next(iter(AGGREGATIONS.keys()))},
102-
TimeVariable: {next(iter(AGGREGATIONS.keys()))},
103-
DiscreteVariable: set(),
104-
StringVariable: set(),
104+
var: {next(name for name, agg in AGGREGATIONS.items() if var in agg.types)}
105+
for var in (ContinuousVariable, TimeVariable, DiscreteVariable, StringVariable)
105106
}
106107

107108

@@ -114,7 +115,7 @@ class Result:
114115
def _run(
115116
data: Table,
116117
group_by_attrs: List[Variable],
117-
aggregations: Dict[Variable, List[str]],
118+
aggregations: Dict[Variable, Set[str]],
118119
result: Result,
119120
state: TaskState,
120121
) -> Result:
@@ -383,7 +384,9 @@ def __rows_selected(self) -> None:
383384
# check if aggregation active for all selected attributes,
384385
# partially check if active for some else uncheck
385386
cb.setCheckState(
386-
Qt.Checked if activated == {True} else (Qt.Unchecked if activated == {False} else Qt.PartiallyChecked)
387+
Qt.Checked
388+
if activated == {True}
389+
else (Qt.Unchecked if activated == {False} else Qt.PartiallyChecked)
387390
)
388391

389392
def __gb_changed(self) -> None:

0 commit comments

Comments
 (0)