1- from collections import OrderedDict
21import threading
32import textwrap
43
5- from Orange .widgets import widget , gui
6- from Orange .widgets .utils .widgetpreview import WidgetPreview
7- from Orange .widgets .widget import Input
8- from Orange .data .table import Table
9- from Orange .data import StringVariable , DiscreteVariable , ContinuousVariable
10- from Orange .widgets import report
4+ from Orange .data import \
5+ Table , StringVariable , DiscreteVariable , ContinuousVariable
116try :
127 from Orange .data .sql .table import SqlTable
138except ImportError :
149 SqlTable = None
1510
11+ from Orange .widgets import widget , gui
12+ from Orange .widgets .utils .localization import pl
13+ from Orange .widgets .utils .widgetpreview import WidgetPreview
14+ from Orange .widgets .widget import Input
15+
1616
1717class OWDataInfo (widget .OWWidget ):
1818 name = "Data Info"
1919 id = "orange.widgets.data.info"
20- description = """Display basic information about the dataset, such
21- as the number and type of variables in the columns and the number of rows."""
20+ description = "Display basic information about the data set"
2221 icon = "icons/DataInfo.svg"
2322 priority = 80
2423 category = "Data"
@@ -34,174 +33,138 @@ class Inputs:
3433 def __init__ (self ):
3534 super ().__init__ ()
3635
37- self ._clear_fields ()
38-
39- for box in ("Data Set Name" , "Data Set Size" , "Features" , "Targets" ,
40- "Meta Attributes" , "Location" , "Data Attributes" ):
41- name = box .lower ().replace (" " , "_" )
42- bo = gui .vBox (self .controlArea , box )
43- gui .label (bo , self , "%%(%s)s" % name )
44-
45- # ensure the widget has some decent minimum width.
46- self .targets = "Categorical outcome with 123 values"
47- self .layout ().activate ()
48- # NOTE: The minimum width is set on the 'contained' widget and
49- # not `self`. The layout will set a fixed size to `self` taking
50- # into account the minimum constraints of the children (it would
51- # override any minimum/fixed size set on `self`).
52- self .targets = ""
53- self .controlArea .setMinimumWidth (self .controlArea .sizeHint ().width ())
36+ self .data_desc = {}
37+ self .data_attrs = {}
38+ self .description = gui .widgetLabel (
39+ gui .vBox (self .controlArea , box = "Data table properties" ))
40+ self .attributes = gui .widgetLabel (
41+ gui .vBox (self .controlArea , box = "Additional attributes" ))
5442
5543 @Inputs .data
5644 def data (self , data ):
5745 if data is None :
58- self ._clear_fields ()
46+ self .data_desc = self .data_attrs = {}
47+ self .update_info ()
5948 else :
60- self ._set_fields (data )
61- self ._set_report (data )
49+ self .data_desc = {
50+ label : value
51+ for label , func in (("Name" , self ._p_name ),
52+ ("Location" , self ._p_location ),
53+ ("Size" , self ._p_size ),
54+ ("Features" , self ._p_features ),
55+ ("Targets" , self ._p_targets ),
56+ ("Metas" , self ._p_metas ))
57+ if bool (value := func (data ))}
58+ self .data_attrs = data .attributes
59+ self .update_info ()
60+
61+ if SqlTable is not None and isinstance (data , SqlTable ):
62+ def set_exact_length ():
63+ self .data_desc ["Size" ] = self ._p_size (data , exact = True )
64+ self .update_info ()
65+
66+ threading .Thread (target = set_exact_length ).start ()
67+
68+ def update_info (self ):
69+ style = """<style>
70+ th { text-align: right; vertical-align: top; }
71+ th, td { padding-top: 4px; line-height: 125%}
72+ </style>"""
73+
74+ def dict_as_table (d ):
75+ return "<table>" + \
76+ "" .join (f"<tr><th>{ label } : </th><td>" + \
77+ '<br/>' .join (textwrap .wrap (value , width = 60 )) + \
78+ "</td></tr>"
79+ for label , value in d .items ()) + \
80+ "</table>"
81+
82+ if not self .data_desc :
83+ self .description .setText ("No data." )
84+ else :
85+ self .description .setText (style + dict_as_table (self .data_desc ))
86+ self .attributes .setHidden (not self .data_attrs )
87+ if self .data_attrs :
88+ self .attributes .setText (
89+ style + dict_as_table ({k : str (v )
90+ for k , v in self .data_attrs .items ()}))
6291
63- def _clear_fields (self ):
64- self .data_set_name = ""
65- self .data_set_size = ""
66- self .features = self .targets = self .meta_attributes = ""
67- self .location = ""
68- self .data_desc = None
69- self .data_attributes = ""
92+ def send_report (self ):
93+ if self .data_desc :
94+ self .report_items ("Data table properties" , self .data_desc )
95+ if self .data_attrs :
96+ self .report_items ("Additional attributes" , self .data_attrs )
7097
7198 @staticmethod
72- def _count ( s , tpe ):
73- return sum ( isinstance ( x , tpe ) for x in s )
99+ def _p_name ( data ):
100+ return getattr ( data , "name" , "-" )
74101
75- def _set_fields (self , data ):
76- # Attributes are defined in a function called from __init__
77- # pylint: disable=attribute-defined-outside-init
78- def n_or_none (n ):
79- return n or "-"
80-
81- def pack_table (info ):
82- return '<table>\n ' + "\n " .join (
83- '<tr><td align="right" width="90">{}:</td>\n '
84- '<td width="40">{}</td></tr>\n ' .format (
85- d ,
86- textwrap .shorten (str (v ), width = 30 , placeholder = "..." ))
87- for d , v in info
88- ) + "</table>\n "
89-
90- def pack_counts (s , include_non_primitive = False ):
91- if not s :
92- return "None"
93- return pack_table (
94- (name , n_or_none (self ._count (s , type_ )))
95- for name , type_ in (
96- ("Categorical" , DiscreteVariable ),
97- ("Numeric" , ContinuousVariable ),
98- ("Text" , StringVariable ))[:2 + include_non_primitive ]
99- )
100-
101- domain = data .domain
102- class_var = domain .class_var
102+ @staticmethod
103+ def _p_location (data ):
104+ if SqlTable is not None and isinstance (data , SqlTable ):
105+ connection_string = ' ' .join (
106+ f'{ key } ={ value } '
107+ for key , value in data .connection_params .items ()
108+ if value is not None and key != 'password' )
109+ return f"SQL Table using connection:<br/>{ connection_string } "
110+
111+ @staticmethod
112+ def _p_size (data , exact = False ):
113+ exact = exact or SqlTable is None or not isinstance (data , SqlTable )
114+ if exact :
115+ n = len (data )
116+ desc = f"{ n } { pl (n , 'row' )} "
117+ else :
118+ n = data .approx_len ()
119+ desc = f"~{ n } { pl (n , 'row' )} "
120+ ncols = len (data .domain .variables ) + len (data .domain .metas )
121+ desc += f", { ncols } { pl (ncols , 'column' )} "
103122
104123 sparseness = [s for s , m in (("features" , data .X_density ),
105124 ("meta attributes" , data .metas_density ),
106125 ("targets" , data .Y_density )) if m () > 1 ]
107126 if sparseness :
108- sparseness = "<p>Sparse representation: {}</p>" \
109- .format (", " .join (sparseness ))
110- else :
111- sparseness = ""
112- self .data_set_size = pack_table ((
113- ("Rows" , '~{}' .format (data .approx_len ())),
114- ("Columns" , len (domain .variables )+ len (domain .metas )))) + sparseness
115-
116- def update_size ():
117- self .data_set_size = pack_table ((
118- ("Rows" , len (data )),
119- ("Columns" , len (domain .variables )+ len (domain .metas )))) + sparseness
127+ desc += "; sparse {', '.join(sparseness)}"
128+ return desc
120129
121- threading .Thread (target = update_size ).start ()
130+ @classmethod
131+ def _p_features (cls , data ):
132+ return cls ._pack_var_counts (data .domain .attributes )
122133
123- self .data_set_name = getattr (data , "name" , "N/A" )
124-
125- self .features = pack_counts (domain .attributes )
126- self .meta_attributes = pack_counts (domain .metas , True )
127- if class_var :
134+ def _p_targets (self , data ):
135+ if class_var := data .domain .class_var :
128136 if class_var .is_continuous :
129- self . targets = "Numeric target variable"
137+ return "numeric target variable"
130138 else :
131- self .targets = "Categorical outcome with {} values" \
132- .format (len (class_var .values ))
133- elif domain .class_vars :
134- disc_class = self ._count (domain .class_vars , DiscreteVariable )
135- cont_class = self ._count (domain .class_vars , ContinuousVariable )
139+ nclasses = len (class_var .values )
140+ return "categorical outcome with " \
141+ f"{ nclasses } { pl (nclasses , 'class|classes' )} "
142+ if class_vars := data .domain .class_vars :
143+ disc_class = self ._count (class_vars , DiscreteVariable )
144+ cont_class = self ._count (class_vars , ContinuousVariable )
136145 if not cont_class :
137- self .targets = "Multi-target data,\n {} categorical targets" \
138- .format (n_or_none (disc_class ))
146+ return f"{ disc_class } categorical { pl (disc_class , 'target' )} "
139147 elif not disc_class :
140- self .targets = "Multi-target data,\n {} numeric targets" \
141- .format (n_or_none (cont_class ))
142- else :
143- self .targets = "<p>Multi-target data</p>\n " + \
144- pack_counts (domain .class_vars )
145- else :
146- self .targets = "None"
147-
148- if data .attributes :
149- self .data_attributes = pack_table (data .attributes .items ())
150- else :
151- self .data_attributes = ""
148+ return f"{ cont_class } numeric { pl (cont_class , 'targets' )} "
149+ return "multi-target data,<br/>" + self ._pack_var_counts (class_vars )
152150
153- def _set_report (self , data ):
154- # Attributes are defined in a function called from __init__
155- # pylint: disable=attribute-defined-outside-init
156- domain = data .domain
157- count = self ._count
151+ @classmethod
152+ def _p_metas (cls , data ):
153+ return cls ._pack_var_counts (data .domain .metas )
158154
159- self .data_desc = dd = OrderedDict ()
160- dd ["Name" ] = self .data_set_name
161-
162- if SqlTable is not None and isinstance (data , SqlTable ):
163- connection_string = ' ' .join (
164- '{}={}' .format (key , value )
165- for key , value in data .connection_params .items ()
166- if value is not None and key != 'password' )
167- self .location = "Table '{}', using connection:\n {}" \
168- .format (data .table_name , connection_string )
169- dd ["Rows" ] = data .approx_len ()
170- else :
171- self .location = "Data is stored in memory"
172- dd ["Rows" ] = len (data )
173-
174- def join_if (items ):
175- return ", " .join (s .format (n ) for s , n in items if n )
176-
177- dd ["Features" ] = len (domain .attributes ) > 0 and join_if ((
178- ("{} categorical" , count (domain .attributes , DiscreteVariable )),
179- ("{} numeric" , count (domain .attributes , ContinuousVariable ))
180- ))
181- if domain .class_var :
182- name = domain .class_var .name
183- if domain .class_var .is_discrete :
184- dd ["Target" ] = "categorical outcome '{}'" .format (name )
185- else :
186- dd ["Target" ] = "numeric target '{}'" .format (name )
187- elif domain .class_vars :
188- disc_class = count (domain .class_vars , DiscreteVariable )
189- cont_class = count (domain .class_vars , ContinuousVariable )
190- tt = ""
191- if disc_class :
192- tt += report .plural ("{number} categorical outcome{s}" , disc_class )
193- if cont_class :
194- tt += report .plural ("{number} numeric target{s}" , cont_class )
195- dd ["Meta attributes" ] = len (domain .metas ) > 0 and join_if ((
196- ("{} categorical" , count (domain .metas , DiscreteVariable )),
197- ("{} numeric" , count (domain .metas , ContinuousVariable )),
198- ("{} text" , count (domain .metas , StringVariable ))
199- ))
155+ @staticmethod
156+ def _count (s , tpe ):
157+ return sum (isinstance (x , tpe ) for x in s )
200158
201- def send_report (self ):
202- if self .data_desc :
203- self .report_items (self .data_desc )
159+ @classmethod
160+ def _pack_var_counts (cls , s ):
161+ counts = (
162+ (name , cls ._count (s , type_ ))
163+ for name , type_ in (("categorical" , DiscreteVariable ),
164+ ("numeric" , ContinuousVariable ),
165+ ("text" , StringVariable )))
166+ return ", " .join (f"{ count } { name } " for name , count in counts if count )
204167
205168
206169if __name__ == "__main__" : # pragma: no cover
207- WidgetPreview (OWDataInfo ).run (Table ("iris " ))
170+ WidgetPreview (OWDataInfo ).run (Table ("heart_disease " ))
0 commit comments