1414from Orange .data .sql .table import SqlTable , LARGE_TABLE , DEFAULT_SAMPLE_TIME
1515from Orange .preprocess import Discretize
1616from Orange .preprocess .discretize import EqualFreq
17- from Orange .preprocess .score import ReliefF , RReliefF
17+ from Orange .preprocess .score import ReliefF
1818from Orange .statistics .distribution import get_distribution , get_distributions
1919from Orange .widgets import gui , widget
2020from Orange .widgets .gui import OWComponent
3131class MosaicVizRank (VizRankDialog , OWComponent ):
3232 """VizRank dialog for Mosaic"""
3333 captionTitle = "Mosaic Ranking"
34- K = 10 # for ReliefF
3534 max_attrs = Setting (3 )
3635
3736 pairSelected = Signal (Variable , Variable , Variable , Variable )
@@ -42,35 +41,45 @@ def __init__(self, master):
4241 VizRankDialog .__init__ (self , master )
4342 OWComponent .__init__ (self , master )
4443
45- box = gui .vBox (self )
44+ box = gui .hBox (self )
4645 self .max_attr_spin = gui .spin (
4746 box , self , "max_attrs" , 2 , 4 ,
4847 label = "Limit the number of attributes to: " ,
4948 controlWidth = 50 , alignment = Qt .AlignRight ,
5049 callback = self .max_attr_changed )
50+ gui .rubber (box )
5151 self .layout ().addWidget (self .button )
52- self .resize (320 , 512 )
53- self .attrs = []
52+ self .attr_ordering = None
5453 self .marginal = {}
5554 self .last_run_max_attr = None
5655
5756 def sizeHint (self ):
58- return QSize (320 , 512 )
57+ return QSize (400 , 512 )
58+
59+ def initialize (self ):
60+ """Clear the ordering to trigger recomputation when needed"""
61+ super ().initialize ()
62+ self .attr_ordering = None
63+
64+ def initialize_keep_ordering (self ):
65+ """Initialize triggered by change of coloring"""
66+ super ().initialize ()
5967
6068 def run (self ):
6169 """
6270 Add handling of the spin box for maximal number of attributes.
6371
64- Disable the box before running and enable afterwards.
65- Also, if the number of attributes is different than in the last run,
66- reset the saved state (if it was paused).
72+ Disable the spin for maximal number of attributes before running and
73+ enable afterwards. Also, if the number of attributes is different than
74+ in the last run, reset the saved state (if it was paused).
6775 """
6876 if self .max_attrs != self .last_run_max_attr :
6977 self .saved_state = None
7078 self .saved_progress = 0
7179 if self .saved_state is None :
7280 self .scores = []
7381 self .rank_model .clear ()
82+ self .compute_attr_order ()
7483 self .last_run_max_attr = self .max_attrs
7584 self .max_attr_spin .setDisabled (True )
7685 try :
@@ -85,45 +94,65 @@ def max_attr_changed(self):
8594 The method does not reset anything so the user can still see the
8695 results until actually restarting the search.
8796 """
88- self .button .setText ("Start" )
97+ if self .max_attrs != self .last_run_max_attr or self .saved_state is None :
98+ self .button .setText ("Start" )
99+ else :
100+ self .button .setText ("Continue" )
89101 self .button .setEnabled (self .check_preconditions ())
90102
103+ def coloring_changed (self ):
104+ self .stop_and_reset (self .initialize_keep_ordering )
105+
91106 def check_preconditions (self ):
92107 """Require at least one variable to allow ranking."""
93108 self .Information .add_message ("no_attributes" , "No variables to rank." )
94109 self .Information .no_attributes .clear ()
95- if not super ().check_preconditions ():
110+ data = self .master .discrete_data
111+ if not super ().check_preconditions () or data is None :
96112 return False
97- if not self . master . discrete_data .domain .attributes :
113+ if not data .domain .attributes :
98114 self .Information .no_attributes ()
99115 return False
100116 return True
101117
102- def score_heuristic (self ):
118+ def compute_attr_order (self ):
103119 """
104- Order attributes by ReliefF or RReliefF if there is a target
105- variable. In case of ties or without target, other by name.
120+ Order attributes by Relief if there is a target variable. In case of
121+ ties or without target, other by name.
122+
123+ Add the class variable at the beginning when not coloring by class
124+ distribution.
125+
126+ If `self.attrs` is not `None`, keep the ordering and just add or remove
127+ the class as needed.
106128 """
107129 data = self .master .discrete_data
108- if data .domain .class_var is None :
109- return data .domain .attributes
110- relief = ReliefF if data .domain .has_discrete_class else RReliefF
111- weights = relief (n_iterations = 100 , k_nearest = self .K )(data )
112- attrs = sorted (zip (weights , data .domain .attributes ),
113- key = lambda x : (- x [0 ], x [1 ].name ))
114- return [a for _ , a in attrs ]
130+ class_var = data .domain .class_var
131+ if not self .attr_ordering :
132+ if class_var is None :
133+ self .attr_ordering = sorted (data .domain , key = attrgetter ("name" ))
134+ else :
135+ weights = ReliefF (n_iterations = 100 , k_nearest = 10 )(data )
136+ attrs = sorted (zip (weights , data .domain .attributes ),
137+ key = lambda x : (- x [0 ], x [1 ].name ))
138+ self .attr_ordering = [a for _ , a in attrs ]
139+ if class_var is not None :
140+ if self ._compute_class_dists ():
141+ if self .attr_ordering [0 ] is class_var :
142+ del self .attr_ordering [0 ]
143+ elif self .attr_ordering [0 ] is not class_var :
144+ self .attr_ordering .insert (0 , class_var )
115145
116146 def _compute_class_dists (self ):
117- master = self .master
118- return master .interior_coloring == master .CLASS_DISTRIBUTION and \
119- master .data .domain .has_discrete_class
147+ return self .master .interior_coloring == self .master .CLASS_DISTRIBUTION
120148
121149 def state_count (self ):
122150 """
123151 Return the number of combinations, starting with a single attribute
124152 if Mosaic is colored by class distributions, and two if by Pearson
125153 """
126- n_attrs = len (self .master .discrete_data .domain .attributes )
154+ self .compute_attr_order ()
155+ n_attrs = len (self .attr_ordering )
127156 min_attrs = 1 if self ._compute_class_dists () else 2
128157 max_attrs = min (n_attrs , self .max_attrs )
129158 return sum (comb (n_attrs , k , exact = True )
@@ -136,11 +165,10 @@ def iterate_states(self, state):
136165 distributions, and two if by Pearson.
137166 """
138167 # If we put initialization of `self.attrs` to `initialize`,
139- # `score_heuristic` would be run on every call to `set_data`.
168+ # `score_heuristic` would be run on every call to master's `set_data`.
140169 master = self .master
141170 data = master .discrete_data
142171 if state is None : # on the first call, compute order
143- self .attrs = self .score_heuristic ()
144172 if self ._compute_class_dists ():
145173 self .marginal = get_distribution (data , data .domain .class_var )
146174 self .marginal .normalize ()
@@ -153,12 +181,15 @@ def iterate_states(self, state):
153181 n_attrs = len (data .domain .attributes )
154182 while True :
155183 yield state
184+ # Reset while running; just abort
185+ if self .attr_ordering is None :
186+ break
156187 for up in range (len (state )):
157188 state [up ] += 1
158189 if up + 1 == len (state ) or state [up ] < state [up + 1 ]:
159190 break
160191 state [up ] = up
161- if state [- 1 ] == len (self .attrs ):
192+ if state [- 1 ] == len (self .attr_ordering ):
162193 if len (state ) < min (self .max_attrs , n_attrs ):
163194 state = list (range (len (state ) + 1 ))
164195 else :
@@ -175,7 +206,7 @@ def compute_score(self, state):
175206 master = self .master
176207 data = master .discrete_data
177208 domain = data .domain
178- attrlist = [self .attrs [i ] for i in state ]
209+ attrlist = [self .attr_ordering [i ] for i in state ]
179210 cond_dist = get_conditional_distribution (data , attrlist )[0 ]
180211 n = cond_dist ["" ]
181212 ss = 0
@@ -200,7 +231,8 @@ def compute_score(self, state):
200231 mul ,
201232 (self .marginal [attr_idx ][ind ]
202233 for attr_idx , ind in zip (attr_indices , indices )))
203- ss += (expected - observed ) ** 2 / expected
234+ if expected > 1e-6 :
235+ ss += (expected - observed ) ** 2 / expected
204236 if class_values :
205237 dof = (len (class_values ) - 1 ) * \
206238 reduce (mul , (len (attr .values ) for attr in attrlist ))
@@ -213,8 +245,12 @@ def on_selection_changed(self, selected, deselected):
213245 self .selectionChanged .emit (attrs + (None , ) * (4 - len (attrs )))
214246
215247 def row_for_state (self , score , state ):
216- attrs = tuple (sorted ((self .attrs [x ] for x in state ),
217- key = attrgetter ("name" )))
248+ """The row consists of attributes sorted by name; class is at the
249+ beginning, if present, so it's on the x-axis and not lost somewhere."""
250+ class_var = self .master .data .domain .class_var
251+ attrs = tuple (
252+ sorted ((self .attr_ordering [x ] for x in state ),
253+ key = lambda attr : (1 - (attr is class_var ), attr .name )))
218254 item = QStandardItem (", " .join (a .name for a in attrs ))
219255 item .setData (attrs , self ._AttrRole )
220256 return [item ]
@@ -237,14 +273,12 @@ class OWMosaicDisplay(OWWidget):
237273
238274 settingsHandler = DomainContextHandler ()
239275 use_boxes = Setting (True )
276+ interior_coloring = Setting (CLASS_DISTRIBUTION )
240277 variable1 = ContextSetting ("" , exclude_metas = False )
241278 variable2 = ContextSetting ("" , exclude_metas = False )
242279 variable3 = ContextSetting ("" , exclude_metas = False )
243280 variable4 = ContextSetting ("" , exclude_metas = False )
244281 selection = ContextSetting (set ())
245- # interior_coloring is context setting to properly reset it
246- # if the widget switches to regression and back (set setData)
247- interior_coloring = ContextSetting (CLASS_DISTRIBUTION )
248282
249283 BAR_WIDTH = 5
250284 SPACING = 4
@@ -305,12 +339,16 @@ def __init__(self):
305339 gui .rubber (self .controlArea )
306340
307341 def sizeHint (self ):
308- return QSize (530 , 720 )
342+ return QSize (720 , 530 )
309343
310344 def _compare_with_total (self ):
311- if self .data and self .data .domain .has_discrete_class :
345+ if self .data is not None and \
346+ self .data .domain .class_var is not None and \
347+ self .interior_coloring != self .CLASS_DISTRIBUTION :
312348 self .interior_coloring = self .CLASS_DISTRIBUTION
313- self .coloring_changed ()
349+ self .coloring_changed () # This also calls self.update_graph
350+ else :
351+ self .update_graph ()
314352
315353 def init_combos (self , data ):
316354 for combo in self .attr_combos :
@@ -359,28 +397,26 @@ def set_data(self, data):
359397 self .closeContext ()
360398 self .data = data
361399 self .init_combos (self .data )
362- if not self .data :
400+ if self .data is None :
363401 self .discrete_data = None
364- return
365- if any ( attr . is_continuous for attr in data . domain ):
366- self . discrete_data = Discretize ( method = EqualFreq (n = 4 ))(data )
402+ elif any ( attr . is_continuous for attr in data . domain ):
403+ self . discrete_data = Discretize (
404+ method = EqualFreq (n = 4 ), discretize_classes = True )(data )
367405 else :
368406 self .discrete_data = self .data
369407
370- if self .data .domain .class_var is None :
371- self .rb_colors .setDisabled (True )
372- disc_class = False
373- else :
374- self .rb_colors .setDisabled (False )
375- disc_class = self .data .domain .has_discrete_class
376- self .rb_colors .group .button (2 ).setDisabled (not disc_class )
377- self .bar_button .setDisabled (not disc_class )
378- self .interior_coloring = \
379- self .CLASS_DISTRIBUTION if disc_class else self .PEARSON
380408 self .vizrank .stop_and_reset ()
381409 self .vizrank_button .setEnabled (
382- self .data is not None and self .data .domain .class_var is not None
383- and len (self .data .domain .attributes ) >= 1 and len (self .data ) > 1 )
410+ self .data is not None and len (self .data ) > 1 \
411+ and len (self .data .domain .attributes ) >= 1 )
412+
413+ if self .data is None :
414+ return
415+
416+ has_class = self .data .domain .class_var is not None
417+ self .rb_colors .setDisabled (not has_class )
418+ self .interior_coloring = \
419+ self .CLASS_DISTRIBUTION if has_class else self .PEARSON
384420
385421 self .openContext (self .data )
386422
@@ -411,7 +447,7 @@ def clear_selection(self):
411447 self .send_selection ()
412448
413449 def coloring_changed (self ):
414- self .vizrank .stop_and_reset ()
450+ self .vizrank .coloring_changed ()
415451 self .update_graph ()
416452
417453 def reset_graph (self ):
@@ -671,7 +707,7 @@ def add_rect(x0, x1, y0, y1, condition="",
671707 if x1 - x0 + y1 - y0 == 2 :
672708 y1 += 1
673709
674- if class_var and class_var . is_discrete :
710+ if class_var :
675711 colors = [QColor (* col ) for col in class_var .colors ]
676712 else :
677713 colors = None
@@ -922,7 +958,7 @@ def get_conditional_distribution(data, attrs):
922958 dist = defaultdict (int )
923959 cond_dist ["" ] = dist ["" ] = len (data )
924960 all_attrs = [data .domain [a ] for a in attrs ]
925- if data .domain .has_discrete_class :
961+ if data .domain .class_var is not None :
926962 all_attrs .append (data .domain .class_var )
927963
928964 for i in range (1 , len (all_attrs ) + 1 ):
0 commit comments