Skip to content

Commit 3be2d2c

Browse files
authored
Merge pull request #4569 from aturanjanin/statesummary
state_summary: Add multiple inputs function and missing values info
2 parents be3a9ff + 2d19ae1 commit 3be2d2c

File tree

2 files changed

+104
-11
lines changed

2 files changed

+104
-11
lines changed

Orange/widgets/utils/state_summary.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,49 @@ def _plural(number):
5555
targets = format_variables_string(data.domain.class_vars)
5656
metas = format_variables_string(data.domain.metas)
5757

58+
features_missing = missing_values(data.has_missing_attribute()
59+
and data.get_nan_frequency_attribute())
5860
n_features = len(data.domain.variables) + len(data.domain.metas)
5961
details = \
6062
f'{len(data)} instance{_plural(len(data))}, ' \
6163
f'{n_features} variable{_plural(n_features)}\n' \
62-
f'Features: {features}\nTarget: {targets}\nMetas: {metas}'
63-
64+
f'Features: {features} {features_missing}\n' \
65+
f'Target: {targets}\nMetas: {metas}'
6466
return details
67+
68+
69+
def missing_values(value):
70+
if value:
71+
return f'({value*100:.1f}% missing values)'
72+
else:
73+
return '(No missing values)'
74+
75+
76+
def format_multiple_summaries(data_list, type_io='input'):
77+
"""
78+
A function that forms the entire descriptive part of the input/output
79+
summary for widgets that have more than one input/output.
80+
81+
:param data_list: A list of tuples for each input/output dataset where the
82+
first element of the tuple is the name of the dataset (can be omitted)
83+
and the second is the dataset
84+
:type data_list: list(tuple(str, Orange.data.Table))
85+
:param type_io: A string that indicates weather the input or output data
86+
is being formatted
87+
:type type_io: str
88+
89+
:return A formatted summary
90+
:rtype str
91+
"""
92+
93+
def new_line(text):
94+
return text.replace('\n', '<br>')
95+
96+
full_details = []
97+
for (name, data) in data_list:
98+
if data:
99+
details = new_line(format_summary_details(data))
100+
else:
101+
details = f'No data on {type_io}.'
102+
full_details.append(details if not name else f'{name}:<br>{details}')
103+
return '<hr>'.join(full_details)

Orange/widgets/utils/tests/test_state_summary.py

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \
88
DiscreteVariable, TimeVariable
9-
from Orange.widgets.utils.state_summary import format_summary_details
9+
from Orange.widgets.utils.state_summary import format_summary_details, \
10+
format_multiple_summaries
1011

1112
VarDataPair = namedtuple('VarDataPair', ['variable', 'data'])
1213

@@ -106,7 +107,8 @@ def test_details(self):
106107
n_features = len(data.domain.variables) + len(data.domain.metas)
107108
details = f'{len(data)} instances, ' \
108109
f'{n_features} variables\n' \
109-
f'Features: {len(data.domain.attributes)} categorical\n' \
110+
f'Features: {len(data.domain.attributes)} categorical ' \
111+
f'(No missing values)\n' \
110112
f'Target: categorical\n' \
111113
f'Metas: string'
112114
self.assertEqual(details, format_summary_details(data))
@@ -115,7 +117,8 @@ def test_details(self):
115117
n_features = len(data.domain.variables) + len(data.domain.metas)
116118
details = f'{len(data)} instances, ' \
117119
f'{n_features} variables\n' \
118-
f'Features: {len(data.domain.attributes)} numeric\n' \
120+
f'Features: {len(data.domain.attributes)} numeric ' \
121+
f'(No missing values)\n' \
119122
f'Target: numeric\n' \
120123
f'Metas: —'
121124
self.assertEqual(details, format_summary_details(data))
@@ -125,7 +128,7 @@ def test_details(self):
125128
details = f'{len(data)} instances, ' \
126129
f'{n_features} variables\n' \
127130
f'Features: {len(data.domain.attributes)} ' \
128-
f'(7 categorical, 6 numeric)\n' \
131+
f'(7 categorical, 6 numeric) (0.2% missing values)\n' \
129132
f'Target: categorical\n' \
130133
f'Metas: —'
131134
self.assertEqual(details, format_summary_details(data))
@@ -137,7 +140,8 @@ def test_details(self):
137140
n_features = len(data.domain.variables) + len(data.domain.metas)
138141
details = f'{len(data)} instances, ' \
139142
f'{n_features} variables\n' \
140-
f'Features: {len(data.domain.attributes)} numeric\n' \
143+
f'Features: {len(data.domain.attributes)} numeric ' \
144+
f'(10.0% missing values)\n' \
141145
f'Target: {len(data.domain.class_vars)} categorical\n' \
142146
f'Metas: {len(data.domain.metas)} categorical'
143147
self.assertEqual(details, format_summary_details(data))
@@ -151,7 +155,7 @@ def test_details(self):
151155
details = f'{len(data)} instances, ' \
152156
f'{n_features} variables\n' \
153157
f'Features: {len(data.domain.attributes)} ' \
154-
f'(2 categorical, 1 numeric, 1 time)\n' \
158+
f'(2 categorical, 1 numeric, 1 time) (5.0% missing values)\n' \
155159
f'Target: {len(data.domain.class_vars)} ' \
156160
f'(1 categorical, 1 numeric)\n' \
157161
f'Metas: {len(data.domain.metas)} string'
@@ -161,30 +165,80 @@ def test_details(self):
161165
metas=None)
162166
details = f'{len(data)} instances, ' \
163167
f'{len(data.domain.variables)} variables\n' \
164-
f'Features: {len(data.domain.attributes)} time\n' \
168+
f'Features: {len(data.domain.attributes)} time ' \
169+
f'(10.0% missing values)\n' \
165170
f'Target: categorical\n' \
166171
f'Metas: —'
167172
self.assertEqual(details, format_summary_details(data))
168173

169174
data = make_table([rgb_full, ints_full], target=None, metas=None)
170175
details = f'{len(data)} instances, ' \
171176
f'{len(data.domain.variables)} variables\n' \
172-
f'Features: {len(data.domain.variables)} categorical\n' \
177+
f'Features: {len(data.domain.variables)} categorical ' \
178+
f'(No missing values)\n' \
173179
f'Target: —\n' \
174180
f'Metas: —'
175181
self.assertEqual(details, format_summary_details(data))
176182

177183
data = make_table([rgb_full], target=None, metas=None)
178184
details = f'{len(data)} instances, ' \
179185
f'{len(data.domain.variables)} variable\n' \
180-
f'Features: categorical\n' \
186+
f'Features: categorical (No missing values)\n' \
181187
f'Target: —\n' \
182188
f'Metas: —'
183189
self.assertEqual(details, format_summary_details(data))
184190

185191
data = None
186192
self.assertEqual('', format_summary_details(data))
187193

194+
def test_multiple_summaries(self):
195+
data = Table('zoo')
196+
extra_data = Table('zoo')[20:]
197+
n_features_data = len(data.domain.variables) + len(data.domain.metas)
198+
n_features_extra_data = len(extra_data.domain.variables) + \
199+
len(extra_data.domain.metas)
200+
details = f'Data:<br>{len(data)} instances, ' \
201+
f'{n_features_data} variables<br>' \
202+
f'Features: {len(data.domain.attributes)} categorical ' \
203+
f'(No missing values)<br>' \
204+
f'Target: categorical<br>' \
205+
f'Metas: string<hr>'\
206+
f'Extra Data:<br>{len(extra_data)} instances, ' \
207+
f'{n_features_extra_data} variables<br>' \
208+
f'Features: {len(extra_data.domain.attributes)} ' \
209+
f'categorical (No missing values)<br>' \
210+
f'Target: categorical<br>' \
211+
f'Metas: string'
212+
inputs = [('Data', data), ('Extra Data', extra_data)]
213+
self.assertEqual(details, format_multiple_summaries(inputs))
214+
215+
details = f'{len(data)} instances, ' \
216+
f'{n_features_data} variables<br>' \
217+
f'Features: {len(data.domain.attributes)} categorical ' \
218+
f'(No missing values)<br>' \
219+
f'Target: categorical<br>' \
220+
f'Metas: string<hr>'\
221+
f'{len(extra_data)} instances, ' \
222+
f'{n_features_extra_data} variables<br>' \
223+
f'Features: {len(extra_data.domain.attributes)} ' \
224+
f'categorical (No missing values)<br>' \
225+
f'Target: categorical<br>' \
226+
f'Metas: string'
227+
inputs = [('', data), ('', extra_data)]
228+
self.assertEqual(details, format_multiple_summaries(inputs))
229+
230+
details = f'No data on output.<hr>' \
231+
f'Extra data:<br>{len(extra_data)} instances, ' \
232+
f'{n_features_extra_data} variables<br>' \
233+
f'Features: {len(extra_data.domain.attributes)} ' \
234+
f'categorical (No missing values)<br>' \
235+
f'Target: categorical<br>' \
236+
f'Metas: string<hr>'\
237+
f'No data on output.'
238+
outputs = [('', None), ('Extra data', extra_data), ('', None)]
239+
self.assertEqual(details,
240+
format_multiple_summaries(outputs, type_io='output'))
241+
188242

189243
if __name__ == "__main__":
190244
unittest.main()

0 commit comments

Comments
 (0)