Skip to content

Commit e3440f9

Browse files
authored
Merge pull request #6194 from janezd/datainfo-as-richtext
Datainfo: Reimplement to show rich text
2 parents c371880 + 4d7fe7a commit e3440f9

File tree

2 files changed

+194
-169
lines changed

2 files changed

+194
-169
lines changed

Orange/widgets/data/owdatainfo.py

Lines changed: 117 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,23 @@
1-
from collections import OrderedDict
21
import threading
32
import textwrap
43

5-
from Orange.widgets import widget, gui
6-
from Orange.widgets.utils.widgetpreview import WidgetPreview
7-
from Orange.widgets.widget import Input
8-
from Orange.data.table import Table
9-
from Orange.data import StringVariable, DiscreteVariable, ContinuousVariable
10-
from Orange.widgets import report
4+
from Orange.data import \
5+
Table, StringVariable, DiscreteVariable, ContinuousVariable
116
try:
127
from Orange.data.sql.table import SqlTable
138
except ImportError:
149
SqlTable = None
1510

11+
from Orange.widgets import widget, gui
12+
from Orange.widgets.utils.localization import pl
13+
from Orange.widgets.utils.widgetpreview import WidgetPreview
14+
from Orange.widgets.widget import Input
15+
1616

1717
class OWDataInfo(widget.OWWidget):
1818
name = "Data Info"
1919
id = "orange.widgets.data.info"
20-
description = """Display basic information about the dataset, such
21-
as the number and type of variables in the columns and the number of rows."""
20+
description = "Display basic information about the data set"
2221
icon = "icons/DataInfo.svg"
2322
priority = 80
2423
category = "Data"
@@ -34,174 +33,138 @@ class Inputs:
3433
def __init__(self):
3534
super().__init__()
3635

37-
self._clear_fields()
38-
39-
for box in ("Data Set Name", "Data Set Size", "Features", "Targets",
40-
"Meta Attributes", "Location", "Data Attributes"):
41-
name = box.lower().replace(" ", "_")
42-
bo = gui.vBox(self.controlArea, box)
43-
gui.label(bo, self, "%%(%s)s" % name)
44-
45-
# ensure the widget has some decent minimum width.
46-
self.targets = "Categorical outcome with 123 values"
47-
self.layout().activate()
48-
# NOTE: The minimum width is set on the 'contained' widget and
49-
# not `self`. The layout will set a fixed size to `self` taking
50-
# into account the minimum constraints of the children (it would
51-
# override any minimum/fixed size set on `self`).
52-
self.targets = ""
53-
self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
36+
self.data_desc = {}
37+
self.data_attrs = {}
38+
self.description = gui.widgetLabel(
39+
gui.vBox(self.controlArea, box="Data table properties"))
40+
self.attributes = gui.widgetLabel(
41+
gui.vBox(self.controlArea, box="Additional attributes"))
5442

5543
@Inputs.data
5644
def data(self, data):
5745
if data is None:
58-
self._clear_fields()
46+
self.data_desc = self.data_attrs = {}
47+
self.update_info()
5948
else:
60-
self._set_fields(data)
61-
self._set_report(data)
49+
self.data_desc = {
50+
label: value
51+
for label, func in (("Name", self._p_name),
52+
("Location", self._p_location),
53+
("Size", self._p_size),
54+
("Features", self._p_features),
55+
("Targets", self._p_targets),
56+
("Metas", self._p_metas))
57+
if bool(value := func(data))}
58+
self.data_attrs = data.attributes
59+
self.update_info()
60+
61+
if SqlTable is not None and isinstance(data, SqlTable):
62+
def set_exact_length():
63+
self.data_desc["Size"] = self._p_size(data, exact=True)
64+
self.update_info()
65+
66+
threading.Thread(target=set_exact_length).start()
67+
68+
def update_info(self):
69+
style = """<style>
70+
th { text-align: right; vertical-align: top; }
71+
th, td { padding-top: 4px; line-height: 125%}
72+
</style>"""
73+
74+
def dict_as_table(d):
75+
return "<table>" + \
76+
"".join(f"<tr><th>{label}: </th><td>" + \
77+
'<br/>'.join(textwrap.wrap(value, width=60)) + \
78+
"</td></tr>"
79+
for label, value in d.items()) + \
80+
"</table>"
81+
82+
if not self.data_desc:
83+
self.description.setText("No data.")
84+
else:
85+
self.description.setText(style + dict_as_table(self.data_desc))
86+
self.attributes.setHidden(not self.data_attrs)
87+
if self.data_attrs:
88+
self.attributes.setText(
89+
style + dict_as_table({k: str(v)
90+
for k, v in self.data_attrs.items()}))
6291

63-
def _clear_fields(self):
64-
self.data_set_name = ""
65-
self.data_set_size = ""
66-
self.features = self.targets = self.meta_attributes = ""
67-
self.location = ""
68-
self.data_desc = None
69-
self.data_attributes = ""
92+
def send_report(self):
93+
if self.data_desc:
94+
self.report_items("Data table properties", self.data_desc)
95+
if self.data_attrs:
96+
self.report_items("Additional attributes", self.data_attrs)
7097

7198
@staticmethod
72-
def _count(s, tpe):
73-
return sum(isinstance(x, tpe) for x in s)
99+
def _p_name(data):
100+
return getattr(data, "name", "-")
74101

75-
def _set_fields(self, data):
76-
# Attributes are defined in a function called from __init__
77-
# pylint: disable=attribute-defined-outside-init
78-
def n_or_none(n):
79-
return n or "-"
80-
81-
def pack_table(info):
82-
return '<table>\n' + "\n".join(
83-
'<tr><td align="right" width="90">{}:</td>\n'
84-
'<td width="40">{}</td></tr>\n'.format(
85-
d,
86-
textwrap.shorten(str(v), width=30, placeholder="..."))
87-
for d, v in info
88-
) + "</table>\n"
89-
90-
def pack_counts(s, include_non_primitive=False):
91-
if not s:
92-
return "None"
93-
return pack_table(
94-
(name, n_or_none(self._count(s, type_)))
95-
for name, type_ in (
96-
("Categorical", DiscreteVariable),
97-
("Numeric", ContinuousVariable),
98-
("Text", StringVariable))[:2 + include_non_primitive]
99-
)
100-
101-
domain = data.domain
102-
class_var = domain.class_var
102+
@staticmethod
103+
def _p_location(data):
104+
if SqlTable is not None and isinstance(data, SqlTable):
105+
connection_string = ' '.join(
106+
f'{key}={value}'
107+
for key, value in data.connection_params.items()
108+
if value is not None and key != 'password')
109+
return f"SQL Table using connection:<br/>{connection_string}"
110+
111+
@staticmethod
112+
def _p_size(data, exact=False):
113+
exact = exact or SqlTable is None or not isinstance(data, SqlTable)
114+
if exact:
115+
n = len(data)
116+
desc = f"{n} {pl(n, 'row')}"
117+
else:
118+
n = data.approx_len()
119+
desc = f"~{n} {pl(n, 'row')}"
120+
ncols = len(data.domain.variables) + len(data.domain.metas)
121+
desc += f", {ncols} {pl(ncols, 'column')}"
103122

104123
sparseness = [s for s, m in (("features", data.X_density),
105124
("meta attributes", data.metas_density),
106125
("targets", data.Y_density)) if m() > 1]
107126
if sparseness:
108-
sparseness = "<p>Sparse representation: {}</p>"\
109-
.format(", ".join(sparseness))
110-
else:
111-
sparseness = ""
112-
self.data_set_size = pack_table((
113-
("Rows", '~{}'.format(data.approx_len())),
114-
("Columns", len(domain.variables)+len(domain.metas)))) + sparseness
115-
116-
def update_size():
117-
self.data_set_size = pack_table((
118-
("Rows", len(data)),
119-
("Columns", len(domain.variables)+len(domain.metas)))) + sparseness
127+
desc += "; sparse {', '.join(sparseness)}"
128+
return desc
120129

121-
threading.Thread(target=update_size).start()
130+
@classmethod
131+
def _p_features(cls, data):
132+
return cls._pack_var_counts(data.domain.attributes)
122133

123-
self.data_set_name = getattr(data, "name", "N/A")
124-
125-
self.features = pack_counts(domain.attributes)
126-
self.meta_attributes = pack_counts(domain.metas, True)
127-
if class_var:
134+
def _p_targets(self, data):
135+
if class_var := data.domain.class_var:
128136
if class_var.is_continuous:
129-
self.targets = "Numeric target variable"
137+
return "numeric target variable"
130138
else:
131-
self.targets = "Categorical outcome with {} values"\
132-
.format(len(class_var.values))
133-
elif domain.class_vars:
134-
disc_class = self._count(domain.class_vars, DiscreteVariable)
135-
cont_class = self._count(domain.class_vars, ContinuousVariable)
139+
nclasses = len(class_var.values)
140+
return "categorical outcome with " \
141+
f"{nclasses} {pl(nclasses, 'class|classes')}"
142+
if class_vars := data.domain.class_vars:
143+
disc_class = self._count(class_vars, DiscreteVariable)
144+
cont_class = self._count(class_vars, ContinuousVariable)
136145
if not cont_class:
137-
self.targets = "Multi-target data,\n{} categorical targets"\
138-
.format(n_or_none(disc_class))
146+
return f"{disc_class} categorical {pl(disc_class, 'target')}"
139147
elif not disc_class:
140-
self.targets = "Multi-target data,\n{} numeric targets"\
141-
.format(n_or_none(cont_class))
142-
else:
143-
self.targets = "<p>Multi-target data</p>\n" + \
144-
pack_counts(domain.class_vars)
145-
else:
146-
self.targets = "None"
147-
148-
if data.attributes:
149-
self.data_attributes = pack_table(data.attributes.items())
150-
else:
151-
self.data_attributes = ""
148+
return f"{cont_class} numeric {pl(cont_class, 'targets')}"
149+
return "multi-target data,<br/>" + self._pack_var_counts(class_vars)
152150

153-
def _set_report(self, data):
154-
# Attributes are defined in a function called from __init__
155-
# pylint: disable=attribute-defined-outside-init
156-
domain = data.domain
157-
count = self._count
151+
@classmethod
152+
def _p_metas(cls, data):
153+
return cls._pack_var_counts(data.domain.metas)
158154

159-
self.data_desc = dd = OrderedDict()
160-
dd["Name"] = self.data_set_name
161-
162-
if SqlTable is not None and isinstance(data, SqlTable):
163-
connection_string = ' '.join(
164-
'{}={}'.format(key, value)
165-
for key, value in data.connection_params.items()
166-
if value is not None and key != 'password')
167-
self.location = "Table '{}', using connection:\n{}"\
168-
.format(data.table_name, connection_string)
169-
dd["Rows"] = data.approx_len()
170-
else:
171-
self.location = "Data is stored in memory"
172-
dd["Rows"] = len(data)
173-
174-
def join_if(items):
175-
return ", ".join(s.format(n) for s, n in items if n)
176-
177-
dd["Features"] = len(domain.attributes) > 0 and join_if((
178-
("{} categorical", count(domain.attributes, DiscreteVariable)),
179-
("{} numeric", count(domain.attributes, ContinuousVariable))
180-
))
181-
if domain.class_var:
182-
name = domain.class_var.name
183-
if domain.class_var.is_discrete:
184-
dd["Target"] = "categorical outcome '{}'".format(name)
185-
else:
186-
dd["Target"] = "numeric target '{}'".format(name)
187-
elif domain.class_vars:
188-
disc_class = count(domain.class_vars, DiscreteVariable)
189-
cont_class = count(domain.class_vars, ContinuousVariable)
190-
tt = ""
191-
if disc_class:
192-
tt += report.plural("{number} categorical outcome{s}", disc_class)
193-
if cont_class:
194-
tt += report.plural("{number} numeric target{s}", cont_class)
195-
dd["Meta attributes"] = len(domain.metas) > 0 and join_if((
196-
("{} categorical", count(domain.metas, DiscreteVariable)),
197-
("{} numeric", count(domain.metas, ContinuousVariable)),
198-
("{} text", count(domain.metas, StringVariable))
199-
))
155+
@staticmethod
156+
def _count(s, tpe):
157+
return sum(isinstance(x, tpe) for x in s)
200158

201-
def send_report(self):
202-
if self.data_desc:
203-
self.report_items(self.data_desc)
159+
@classmethod
160+
def _pack_var_counts(cls, s):
161+
counts = (
162+
(name, cls._count(s, type_))
163+
for name, type_ in (("categorical", DiscreteVariable),
164+
("numeric", ContinuousVariable),
165+
("text", StringVariable)))
166+
return ", ".join(f"{count} {name}" for name, count in counts if count)
204167

205168

206169
if __name__ == "__main__": # pragma: no cover
207-
WidgetPreview(OWDataInfo).run(Table("iris"))
170+
WidgetPreview(OWDataInfo).run(Table("heart_disease"))

0 commit comments

Comments
 (0)