1+ """Widget for creating classes from non-numeric attribute by substrings"""
12import numpy as np
23
34from AnyQt .QtWidgets import QGridLayout , QLabel , QLineEdit , QSizePolicy
1314from Orange .widgets .widget import Msg
1415
1516
16- def map_by_substring (a , patterns , case_sensitive , at_beginning ):
17+ def map_by_substring (a , patterns , case_sensitive , match_beginning ):
18+ """
19+ Map values in a using a list of patterns. The patterns are considered in
20+ order of appearance.
21+
22+ Args:
23+ a (np.array): input array of `dtype` `str`
24+ patterns (list of str): list of stirngs
25+ case_sensitive (bool): case sensitive match
26+ match_beginning (bool): match only at the beginning of the string
27+
28+ Returns:
29+ np.array of floats representing indices of matched patterns
30+ """
1731 res = np .full (len (a ), np .nan )
1832 if not case_sensitive :
1933 a = np .char .lower (a )
2034 patterns = (pattern .lower () for pattern in patterns )
2135 for val_idx , pattern in reversed (list (enumerate (patterns ))):
2236 indices = np .char .find (a , pattern )
23- matches = indices == 0 if at_beginning else indices != - 1
37+ matches = indices == 0 if match_beginning else indices != - 1
2438 res [matches ] = val_idx
2539 return res
2640
2741
2842class ValueFromStringSubstring (Transformation ):
43+ """
44+ Transformation that computes a discrete variable from a string variable by
45+ pattern matching.
46+
47+ Given patterns `["abc", "a", "bc", ""]`, string data
48+ `["abcd", "aa", "bcd", "rabc", "x"]` is transformed to values of the new
49+ attribute with indices`[0, 1, 2, 0, 3]`.
50+
51+ Args:
52+ variable (:obj:`~Orange.data.StringVariable`): the original variable
53+ patterns (list of str): list of string patterns
54+ case_sensitive (bool, optional): if set to `True`, the match is case
55+ sensitive
56+ match_beginning (bool, optional): if set to `True`, the pattern must
57+ appear at the beginning of the string
58+ """
2959 def __init__ (self , variable , patterns ,
3060 case_sensitive = False , match_beginning = False ):
3161 super ().__init__ (variable )
@@ -34,6 +64,15 @@ def __init__(self, variable, patterns,
3464 self .match_beginning = match_beginning
3565
3666 def transform (self , c ):
67+ """
68+ Transform the given data.
69+
70+ Args:
71+ c (np.array): an array of type that can be cast to dtype `str`
72+
73+ Returns:
74+ np.array of floats representing indices of matched patterns
75+ """
3776 nans = np .equal (c , None )
3877 c = c .astype (str )
3978 c [nans ] = ""
@@ -44,6 +83,23 @@ def transform(self, c):
4483
4584
4685class ValueFromDiscreteSubstring (Lookup ):
86+ """
87+ Transformation that computes a discrete variable from discrete variable by
88+ pattern matching.
89+
90+ Say that the original attribute has values
91+ `["abcd", "aa", "bcd", "rabc", "x"]`. Given patterns
92+ `["abc", "a", "bc", ""]`, the values are mapped to the values of the new
93+ attribute with indices`[0, 1, 2, 0, 3]`.
94+
95+ Args:
96+ variable (:obj:`~Orange.data.DiscreteVariable`): the original variable
97+ patterns (list of str): list of string patterns
98+ case_sensitive (bool, optional): if set to `True`, the match is case
99+ sensitive
100+ match_beginning (bool, optional): if set to `True`, the pattern must
101+ appear at the beginning of the string
102+ """
47103 def __init__ (self , variable , patterns ,
48104 case_sensitive = False , match_beginning = False ):
49105 super ().__init__ (variable , [])
@@ -52,6 +108,8 @@ def __init__(self, variable, patterns,
52108 self .patterns = patterns # Finally triggers computation of the lookup
53109
54110 def __setattr__ (self , key , value ):
111+ """__setattr__ is overloaded to recompute the lookup table when the
112+ patterns, the original attribute or the flags change."""
55113 super ().__setattr__ (key , value )
56114 if hasattr (self , "patterns" ) and \
57115 key in ("case_sensitive" , "match_beginning" , "patterns" ,
@@ -88,11 +146,20 @@ class Warning(widget.OWWidget.Warning):
88146 def __init__ (self ):
89147 super ().__init__ ()
90148 self .data = None
149+
150+ # The following lists are of the same length as self.activeRules
151+
152+ #: list of pairs with counts of matches for each patter when the
153+ # patterns are applied in order and when applied on the entire set,
154+ # disregarding the preceding patterns
155+ self .match_counts = []
156+
157+ #: list of list of QLineEdit: line edit pairs for each pattern
91158 self .line_edits = []
159+ #: list of QPushButton: list of remove buttons
92160 self .remove_buttons = []
161+ #: list of list of QLabel: pairs of labels with counts
93162 self .counts = []
94- self .match_counts = []
95- self .setSizePolicy (QSizePolicy .Preferred , QSizePolicy .Maximum )
96163
97164 patternbox = gui .vBox (self .controlArea , box = "Patterns" )
98165 box = gui .hBox (patternbox )
@@ -102,6 +169,8 @@ def __init__(self):
102169 model = DomainModel (valid_types = (StringVariable , DiscreteVariable )),
103170 sizePolicy = (QSizePolicy .MinimumExpanding , QSizePolicy .Fixed ))
104171
172+ #: QWidget: the box that contains the remove buttons, line edits and
173+ # count labels. The lines are added and removed dynamically.
105174 self .rules_box = rules_box = QGridLayout ()
106175 patternbox .layout ().addLayout (self .rules_box )
107176 self .add_button = gui .button (None , self , "+" , flat = True ,
@@ -129,17 +198,27 @@ def __init__(self):
129198 gui .rubber (box )
130199 gui .button (box , self , "Apply" , autoDefault = False , callback = self .apply )
131200
201+ # TODO: Resizing upon changing the number of rules does not work
202+ self .setSizePolicy (QSizePolicy .Preferred , QSizePolicy .Maximum )
203+
132204 @property
133205 def active_rules (self ):
206+ """
207+ Returns the class names and patterns corresponding to the currently
208+ selected attribute. If the attribute is not yet in the dictionary,
209+ set the default.
210+ """
134211 return self .rules .setdefault (self .attribute and self .attribute .name ,
135212 [["C1" , "" ], ["C2" , "" ]])
136213
137214 def rules_to_edits (self ):
215+ """Fill the line edites with the rules from the current settings."""
138216 for editr , textr in zip (self .line_edits , self .active_rules ):
139217 for edit , text in zip (editr , textr ):
140218 edit .setText (text )
141219
142220 def set_data (self , data ):
221+ """Input data signal handler."""
143222 self .closeContext ()
144223 self .rules = {}
145224 self .data = data
@@ -156,14 +235,19 @@ def set_data(self, data):
156235 self .apply ()
157236
158237 def update_rules (self ):
238+ """Called when the rules are changed: adjust the number of lines in
239+ the form and fill them, update the counts. The widget does not have
240+ auto-apply."""
159241 self .adjust_n_rule_rows ()
160242 self .rules_to_edits ()
161243 self .update_counts ()
244+ # TODO: Indicator that changes need to be applied
162245
163246 def options_changed (self ):
164247 self .update_counts ()
165248
166249 def adjust_n_rule_rows (self ):
250+ """Add or remove lines if needed and fix the tab order."""
167251 def _add_line ():
168252 self .line_edits .append ([])
169253 n_lines = len (self .line_edits )
@@ -213,21 +297,29 @@ def _fix_tab_order():
213297 _fix_tab_order ()
214298
215299 def add_row (self ):
300+ """Append a new row at the end."""
216301 self .active_rules .append (["" , "" ])
217302 self .adjust_n_rule_rows ()
218303
219304 def remove_row (self ):
305+ """Remove a row."""
220306 remove_idx = self .remove_buttons .index (self .sender ())
221307 del self .active_rules [remove_idx ]
222308 self .update_rules ()
223309
224310 def sync_edit (self , text ):
311+ """Handle changes in line edits: update the active rules and counts"""
225312 edit = self .sender ()
226313 edit .row [edit .col_idx ] = text
227314 self .update_counts ()
228315
229316 def update_counts (self ):
317+ """Recompute and update the counts of matches."""
230318 def _matcher (strings , pattern ):
319+ """Return indices of strings into patterns; consider case
320+ sensitivity and matching at the beginning. The given strings are
321+ assumed to be in lower case if match is case insensitive. Patterns
322+ are fixed on the fly."""
231323 if not self .case_sensitive :
232324 pattern = pattern .lower ()
233325 indices = np .char .find (strings , pattern )
@@ -237,6 +329,15 @@ def _lower_if_needed(strings):
237329 return strings if self .case_sensitive else np .char .lower (strings )
238330
239331 def _string_counts ():
332+ """
333+ Generate pairs of arrays for each rule until running out of data
334+ instances. np.sum over the two arrays in each pair gives the
335+ number of matches of the remaining instances (considering the
336+ order of patterns) and of the original data.
337+
338+ For _string_counts, the arrays contain bool masks referring to the
339+ original data
340+ """
240341 nonlocal data
241342 data = data .astype (str )
242343 data = data [~ np .char .equal (data , "" )]
@@ -251,6 +352,10 @@ def _string_counts():
251352 break
252353
253354 def _discrete_counts ():
355+ """
356+ Generate pairs similar to _string_counts, except that the arrays
357+ contain bin counts for the attribute's values matching the pattern.
358+ """
254359 attr_vals = np .array (attr .values )
255360 attr_vals = _lower_if_needed (attr_vals )
256361 bins = bincount (data , max_val = len (attr .values ) - 1 )[0 ]
@@ -263,11 +368,13 @@ def _discrete_counts():
263368 break
264369
265370 def _clear_labels ():
371+ """Clear all labels"""
266372 for lab_matched , lab_total in self .counts :
267373 lab_matched .setText ("" )
268374 lab_total .setText ("" )
269375
270376 def _set_labels ():
377+ """Set the labels to show the counts"""
271378 for (n_matched , n_total ), (lab_matched , lab_total ) in \
272379 zip (self .match_counts , self .counts ):
273380 n_before = n_total - n_matched
@@ -287,6 +394,7 @@ def _set_labels():
287394 _set_labels ()
288395
289396 def apply (self ):
397+ """Output the transformed data."""
290398 if not self .attribute or not self .active_rules :
291399 self .send ("Data" , None )
292400 return
@@ -307,6 +415,7 @@ def apply(self):
307415
308416
309417def main (): # pragma: no cover
418+ """Simple test for manual inspection of the widget"""
310419 import sys
311420 from AnyQt .QtWidgets import QApplication
312421
0 commit comments