11from functools import reduce
2+ from types import SimpleNamespace
23
34from AnyQt .QtCore import Qt
45
56import Orange .data
67from Orange .util import Reprable
78from Orange .statistics import distribution
8- from Orange .preprocess import Continuize , Normalize
9- from Orange .preprocess .transformation import \
10- Identity , Indicator , Indicator1 , Normalizer
9+ from Orange .preprocess import Continuize
10+ from Orange .preprocess .transformation import Identity , Indicator , Normalizer
1111from Orange .data .table import Table
1212from Orange .widgets import gui , widget
1313from Orange .widgets .settings import Setting
@@ -34,16 +34,13 @@ class Outputs:
3434 buttons_area_orientation = Qt .Vertical
3535 resizing_enabled = False
3636
37- # continuous treats
38- Leave , NormalizeBySpan , NormalizeBySD = range ( 3 )
37+ Normalize = SimpleNamespace ( Leave = 0 , Standardize = 1 , Center = 2 , Scale = 3 ,
38+ Normalize11 = 4 , Normalize01 = 5 )
3939
40+ settings_version = 2
4041 multinomial_treatment = Setting (0 )
41- zero_based = Setting (1 )
42- continuous_treatment = Setting (Leave )
42+ continuous_treatment = Setting (Normalize .Leave )
4343 class_treatment = Setting (0 )
44-
45- transform_class = Setting (False )
46-
4744 autosend = Setting (True )
4845
4946 multinomial_treats = (
@@ -56,9 +53,13 @@ class Outputs:
5653 ("Divide by number of values" , Continuize .AsNormalizedOrdinal ))
5754
5855 continuous_treats = (
59- ("Leave them as they are" , Continuize .Leave ),
60- ("Normalize by span" , Normalize .NormalizeBySpan ),
61- ("Normalize by standard deviation" , Normalize .NormalizeBySD ))
56+ ("Leave them as they are" , True ),
57+ ("Standardize to μ=0, σ²=1" , False ),
58+ ("Center to μ=0" , False ),
59+ ("Scale to σ²=1" , True ),
60+ ("Normalize to interval [-1, 1]" , False ),
61+ ("Normalize to interval [0, 1]" , False )
62+ )
6263
6364 class_treats = (
6465 ("Leave it as it is" , Continuize .Leave ),
@@ -67,8 +68,6 @@ class Outputs:
6768 ("One class per value" , Continuize .Indicators ),
6869 )
6970
70- value_ranges = ["From -1 to 1" , "From 0 to 1" ]
71-
7271 def __init__ (self ):
7372 super ().__init__ ()
7473
@@ -84,19 +83,12 @@ def __init__(self):
8483 btnLabels = [x [0 ] for x in self .continuous_treats ],
8584 callback = self .settings_changed )
8685
87- box = gui .vBox (self .controlArea , "Categorical Outcomes " )
86+ box = gui .vBox (self .controlArea , "Categorical Outcome(s) " )
8887 gui .radioButtonsInBox (
8988 box , self , "class_treatment" ,
9089 btnLabels = [t [0 ] for t in self .class_treats ],
9190 callback = self .settings_changed )
9291
93- zbbox = gui .vBox (self .controlArea , "Value Range" )
94-
95- gui .radioButtonsInBox (
96- zbbox , self , "zero_based" ,
97- btnLabels = self .value_ranges ,
98- callback = self .settings_changed )
99-
10092 gui .auto_apply (self .buttonsArea , self , "autosend" , box = False )
10193
10294 self .data = None
@@ -120,31 +112,27 @@ def setData(self, data):
120112 self .unconditional_commit ()
121113
122114 def enable_normalization (self ):
123- enable = not (self .data and self .data .is_sparse ())
124- if not enable and self .continuous_treatment in (self .NormalizeBySpan ,
125- self .NormalizeBySD ):
126- self .continuous_treatment = self .Leave
127115 buttons = self .controls .continuous_treatment .buttons
128- buttons [self .NormalizeBySpan ].setEnabled (enable )
129- buttons [self .NormalizeBySD ].setEnabled (enable )
116+ if self .data is not None and self .data .is_sparse ():
117+ if self .continuous_treatment == self .Normalize .Standardize :
118+ self .continuous_treatment = self .Normalize .Scale
119+ else :
120+ self .continuous_treatment = self .Normalize .Leave
121+ for button , (_ , supports_sparse ) \
122+ in zip (buttons , self .continuous_treats ):
123+ button .setEnabled (supports_sparse )
124+ else :
125+ for button in buttons :
126+ button .setEnabled (True )
130127
131128 def constructContinuizer (self ):
132129 conzer = DomainContinuizer (
133- zero_based = self .zero_based ,
134130 multinomial_treatment = self .multinomial_treats [self .multinomial_treatment ][1 ],
135- continuous_treatment = self .continuous_treats [ self . continuous_treatment ][ 1 ] ,
131+ continuous_treatment = self .continuous_treatment ,
136132 class_treatment = self .class_treats [self .class_treatment ][1 ]
137133 )
138134 return conzer
139135
140- # def sendPreprocessor(self):
141- # continuizer = self.constructContinuizer()
142- # self.send("Preprocessor", PreprocessedLearner(
143- # lambda data, weightId=0, tc=(self.targetValue if self.classTreatment else -1):
144- # Table(continuizer(data, weightId, tc)
145- # if data.domain.has_discrete_class
146- # else continuizer(data, weightId), data)))
147-
148136 def commit (self ):
149137 continuizer = self .constructContinuizer ()
150138 if self .data :
@@ -155,16 +143,28 @@ def commit(self):
155143 else :
156144 self .Outputs .data .send (self .data ) # None or empty data
157145
158-
159146 def send_report (self ):
160147 self .report_items (
161148 "Settings" ,
162149 [("Categorical features" ,
163150 self .multinomial_treats [self .multinomial_treatment ][0 ]),
164151 ("Numeric features" ,
165152 self .continuous_treats [self .continuous_treatment ][0 ]),
166- ("Class" , self .class_treats [self .class_treatment ][0 ]),
167- ("Value range" , self .value_ranges [self .zero_based ])])
153+ ("Class" , self .class_treats [self .class_treatment ][0 ])])
154+
155+ @classmethod
156+ def migrate_settings (cls , settings , version ):
157+ if version < 2 :
158+ Normalize = cls .Normalize
159+ cont_treat = settings .pop ("continuous_treatment" , 0 )
160+ zero_based = settings .pop ("zero_based" , True )
161+ if cont_treat == 1 :
162+ if zero_based :
163+ settings ["continuous_treatment" ] = Normalize .Normalize01
164+ else :
165+ settings ["continuous_treatment" ] = Normalize .Normalize11
166+ elif cont_treat == 2 :
167+ settings ["continuous_treatment" ] = Normalize .Standardize
168168
169169
170170class WeightedIndicator (Indicator ):
@@ -179,56 +179,33 @@ def transform(self, c):
179179 return t
180180
181181
182- class WeightedIndicator1 (Indicator1 ):
183- def __init__ (self , variable , value , weight = 1.0 ):
184- super ().__init__ (variable , value )
185- self .weight = weight
186-
187- def transform (self , c ):
188- t = super ().transform (c ) * self .weight
189- if self .weight != 1.0 :
190- t *= self .weight
191- return t
192-
193-
194- def make_indicator_var (source , value_ind , weight = None , zero_based = True ):
195- if zero_based and weight is None :
182+ def make_indicator_var (source , value_ind , weight = None ):
183+ if weight is None :
196184 indicator = Indicator (source , value = value_ind )
197- elif zero_based :
198- indicator = WeightedIndicator (source , value = value_ind , weight = weight )
199- elif weight is None :
200- indicator = Indicator1 (source , value = value_ind )
201185 else :
202- indicator = WeightedIndicator1 (source , value = value_ind , weight = weight )
186+ indicator = WeightedIndicator (source , value = value_ind , weight = weight )
203187 return Orange .data .ContinuousVariable (
204188 "{}={}" .format (source .name , source .values [value_ind ]),
205189 compute_value = indicator
206190 )
207191
208192
209- def dummy_coding (var , base_value = 0 , zero_based = True ):
193+ def dummy_coding (var , base_value = 0 ):
210194 N = len (var .values )
211- return [make_indicator_var (var , i , zero_based = zero_based )
195+ return [make_indicator_var (var , i )
212196 for i in range (N ) if i != base_value ]
213197
214198
215- def one_hot_coding (var , zero_based = True ):
199+ def one_hot_coding (var ):
216200 N = len (var .values )
217- return [make_indicator_var (var , i , zero_based = zero_based )
218- for i in range (N )]
201+ return [make_indicator_var (var , i ) for i in range (N )]
219202
220203
221- def continuize_domain (data_or_domain ,
204+ def continuize_domain (data ,
222205 multinomial_treatment = Continuize .Indicators ,
223206 continuous_treatment = Continuize .Leave ,
224- class_treatment = Continuize .Leave ,
225- zero_based = True ):
226-
227- if isinstance (data_or_domain , Orange .data .Domain ):
228- data , domain = None , data_or_domain
229- else :
230- data , domain = data_or_domain , data_or_domain .domain
231-
207+ class_treatment = Continuize .Leave ):
208+ domain = data .domain
232209 def needs_dist (var , mtreat , ctreat ):
233210 "Does the `var` need a distribution given specified flags"
234211 if var .is_discrete :
@@ -258,14 +235,11 @@ def needs_dist(var, mtreat, ctreat):
258235 dist_iter = iter (dist )
259236
260237 newattrs = [continuize_var (var , next (dist_iter ) if needs_dist else None ,
261- multinomial_treatment , continuous_treatment ,
262- zero_based )
238+ multinomial_treatment , continuous_treatment )
263239 for var , needs_dist in zip (domain .attributes , attr_needs_dist )]
264-
265240 newclass = [continuize_var (var ,
266241 next (dist_iter ) if needs_dist else None ,
267- class_treatment , Continuize .Remove ,
268- zero_based )
242+ class_treatment , Continuize .Remove )
269243 for var , needs_dist in zip (domain .class_vars , cls_needs_dist )]
270244
271245 newattrs = reduce (list .__iadd__ , newattrs , [])
@@ -276,16 +250,16 @@ def needs_dist(var, mtreat, ctreat):
276250def continuize_var (var ,
277251 data_or_dist = None ,
278252 multinomial_treatment = Continuize .Indicators ,
279- continuous_treatment = Continuize .Leave ,
280- zero_based = True ):
281-
253+ continuous_treatment = Continuize .Leave ):
282254 def continuize_continuous ():
283- if continuous_treatment == Normalize . NormalizeBySpan :
284- return [ normalize_by_span ( var , data_or_dist , zero_based )]
285- elif continuous_treatment == Normalize . NormalizeBySD :
286- return [ normalize_by_sd ( var , data_or_dist ) ]
287- else :
255+ dist = _ensure_dist ( var , data_or_dist )
256+ treatments = [ lambda var , _ : var ,
257+ normalize_by_sd , center_to_mean , divide_by_sd ,
258+ normalize_to_11 , normalize_to_01 ]
259+ if dist . shape [ 1 ] == 0 :
288260 return [var ]
261+ new_var = treatments [continuous_treatment ](var , dist )
262+ return [new_var ]
289263
290264 def continuize_discrete ():
291265 if len (var .values ) > 2 and \
@@ -299,16 +273,16 @@ def continuize_discrete():
299273 elif multinomial_treatment == Continuize .AsOrdinal :
300274 return [ordinal_to_continuous (var )]
301275 elif multinomial_treatment == Continuize .AsNormalizedOrdinal :
302- return [ordinal_to_norm_continuous (var , zero_based )]
276+ return [ordinal_to_norm_continuous (var )]
303277 elif multinomial_treatment == Continuize .Indicators :
304- return one_hot_coding (var , zero_based )
278+ return one_hot_coding (var )
305279 elif multinomial_treatment in (
306280 Continuize .FirstAsBase , Continuize .RemoveMultinomial ):
307- return dummy_coding (var , zero_based = zero_based )
281+ return dummy_coding (var )
308282 elif multinomial_treatment == Continuize .FrequentAsBase :
309283 dist = _ensure_dist (var , data_or_dist )
310284 modus = dist .modus ()
311- return dummy_coding (var , base_value = modus , zero_based = zero_based )
285+ return dummy_coding (var , base_value = modus )
312286 elif multinomial_treatment == Continuize .Leave :
313287 return [var ]
314288 raise ValueError ("Invalid value of `multinomial_treatment`" )
@@ -345,68 +319,67 @@ def ordinal_to_continuous(var):
345319 compute_value = Identity (var ))
346320
347321
348- def ordinal_to_norm_continuous (var , zero_based = True ):
322+ def ordinal_to_norm_continuous (var ):
349323 n_values = len (var .values )
350- if zero_based :
351- return normalized_var (var , 0 , 1 / (n_values - 1 ))
352- else :
353- return normalized_var (var , (n_values - 1 ) / 2 , 2 / (n_values - 1 ))
324+ return normalized_var (var , 0 , 1 / (n_values - 1 ))
354325
355326
356- def normalize_by_span (var , data_or_dist , zero_based = True ):
357- dist = _ensure_dist (var , data_or_dist )
358- if dist .shape [1 ] > 0 :
359- v_max , v_min = dist .max (), dist .min ()
360- else :
361- v_max , v_min = 0 , 0
327+ def normalize_by_sd (var , dist ):
328+ mean , sd = dist .mean (), dist .standard_deviation ()
329+ sd = sd if sd > 1e-10 else 1
330+ return normalized_var (var , mean , 1 / sd )
331+
332+
333+ def center_to_mean (var , dist ):
334+ return normalized_var (var , dist .mean (), 1 )
335+
336+
337+ def divide_by_sd (var , dist ):
338+ sd = dist .standard_deviation ()
339+ sd = sd if sd > 1e-10 else 1
340+ return normalized_var (var , 0 , 1 / sd )
341+
342+
343+ def normalize_to_11 (var , dist ):
344+ return normalize_by_span (var , dist , False )
345+
346+
347+ def normalize_to_01 (var , dist ):
348+ return normalize_by_span (var , dist , True )
349+
350+
351+ def normalize_by_span (var , dist , zero_based = True ):
352+ v_max , v_min = dist .max (), dist .min ()
362353 span = (v_max - v_min )
363354 if span < 1e-15 :
364355 span = 1
365-
366356 if zero_based :
367357 return normalized_var (var , v_min , 1 / span )
368358 else :
369359 return normalized_var (var , (v_min + v_max ) / 2 , 2 / span )
370360
371361
372- def normalize_by_sd (var , data_or_dist ):
373- dist = _ensure_dist (var , data_or_dist )
374- if dist .shape [1 ] > 0 :
375- mean , sd = dist .mean (), dist .standard_deviation ()
376- else :
377- mean , sd = 0 , 1
378- sd = sd if sd > 1e-10 else 1
379- return normalized_var (var , mean , 1 / sd )
380-
381-
382362class DomainContinuizer (Reprable ):
383- def __init__ (self , zero_based = True ,
363+ def __init__ (self ,
384364 multinomial_treatment = Continuize .Indicators ,
385365 continuous_treatment = Continuize .Leave ,
386366 class_treatment = Continuize .Leave ):
387- self .zero_based = zero_based
388367 self .multinomial_treatment = multinomial_treatment
389368 self .continuous_treatment = continuous_treatment
390369 self .class_treatment = class_treatment
391370
392371 def __call__ (self , data ):
393372 treat = self .multinomial_treatment
394- if isinstance (data , Orange .data .Domain ):
395- domain , data = data , None
396- else :
397- domain = data .domain
398-
373+ domain = data .domain
399374 if (treat == Continuize .ReportError and
400375 any (var .is_discrete and len (var .values ) > 2 for var in domain )):
401376 raise ValueError ("Domain has multinomial attributes" )
402377
403378 newdomain = continuize_domain (
404- data or domain ,
379+ data ,
405380 self .multinomial_treatment ,
406381 self .continuous_treatment ,
407- self .class_treatment ,
408- self .zero_based
409- )
382+ self .class_treatment )
410383 return newdomain
411384
412385
0 commit comments