Skip to content

Commit 1bd15e9

Browse files
committed
Create Class: Add docstrings
1 parent cccfb61 commit 1bd15e9

File tree

2 files changed

+117
-8
lines changed

2 files changed

+117
-8
lines changed

Orange/widgets/data/owcreateclass.py

Lines changed: 113 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
"""Widget for creating classes from non-numeric attribute by substrings"""
12
import numpy as np
23

34
from AnyQt.QtWidgets import QGridLayout, QLabel, QLineEdit, QSizePolicy
@@ -13,19 +14,48 @@
1314
from Orange.widgets.widget import Msg
1415

1516

16-
def map_by_substring(a, patterns, case_sensitive, at_beginning):
17+
def map_by_substring(a, patterns, case_sensitive, match_beginning):
18+
"""
19+
Map values in a using a list of patterns. The patterns are considered in
20+
order of appearance.
21+
22+
Args:
23+
a (np.array): input array of `dtype` `str`
24+
patterns (list of str): list of stirngs
25+
case_sensitive (bool): case sensitive match
26+
match_beginning (bool): match only at the beginning of the string
27+
28+
Returns:
29+
np.array of floats representing indices of matched patterns
30+
"""
1731
res = np.full(len(a), np.nan)
1832
if not case_sensitive:
1933
a = np.char.lower(a)
2034
patterns = (pattern.lower() for pattern in patterns)
2135
for val_idx, pattern in reversed(list(enumerate(patterns))):
2236
indices = np.char.find(a, pattern)
23-
matches = indices == 0 if at_beginning else indices != -1
37+
matches = indices == 0 if match_beginning else indices != -1
2438
res[matches] = val_idx
2539
return res
2640

2741

2842
class ValueFromStringSubstring(Transformation):
43+
"""
44+
Transformation that computes a discrete variable from a string variable by
45+
pattern matching.
46+
47+
Given patterns `["abc", "a", "bc", ""]`, string data
48+
`["abcd", "aa", "bcd", "rabc", "x"]` is transformed to values of the new
49+
attribute with indices`[0, 1, 2, 0, 3]`.
50+
51+
Args:
52+
variable (:obj:`~Orange.data.StringVariable`): the original variable
53+
patterns (list of str): list of string patterns
54+
case_sensitive (bool, optional): if set to `True`, the match is case
55+
sensitive
56+
match_beginning (bool, optional): if set to `True`, the pattern must
57+
appear at the beginning of the string
58+
"""
2959
def __init__(self, variable, patterns,
3060
case_sensitive=False, match_beginning=False):
3161
super().__init__(variable)
@@ -34,6 +64,15 @@ def __init__(self, variable, patterns,
3464
self.match_beginning = match_beginning
3565

3666
def transform(self, c):
67+
"""
68+
Transform the given data.
69+
70+
Args:
71+
c (np.array): an array of type that can be cast to dtype `str`
72+
73+
Returns:
74+
np.array of floats representing indices of matched patterns
75+
"""
3776
nans = np.equal(c, None)
3877
c = c.astype(str)
3978
c[nans] = ""
@@ -44,6 +83,23 @@ def transform(self, c):
4483

4584

4685
class ValueFromDiscreteSubstring(Lookup):
86+
"""
87+
Transformation that computes a discrete variable from discrete variable by
88+
pattern matching.
89+
90+
Say that the original attribute has values
91+
`["abcd", "aa", "bcd", "rabc", "x"]`. Given patterns
92+
`["abc", "a", "bc", ""]`, the values are mapped to the values of the new
93+
attribute with indices`[0, 1, 2, 0, 3]`.
94+
95+
Args:
96+
variable (:obj:`~Orange.data.DiscreteVariable`): the original variable
97+
patterns (list of str): list of string patterns
98+
case_sensitive (bool, optional): if set to `True`, the match is case
99+
sensitive
100+
match_beginning (bool, optional): if set to `True`, the pattern must
101+
appear at the beginning of the string
102+
"""
47103
def __init__(self, variable, patterns,
48104
case_sensitive=False, match_beginning=False):
49105
super().__init__(variable, [])
@@ -52,6 +108,8 @@ def __init__(self, variable, patterns,
52108
self.patterns = patterns # Finally triggers computation of the lookup
53109

54110
def __setattr__(self, key, value):
111+
"""__setattr__ is overloaded to recompute the lookup table when the
112+
patterns, the original attribute or the flags change."""
55113
super().__setattr__(key, value)
56114
if hasattr(self, "patterns") and \
57115
key in ("case_sensitive", "match_beginning", "patterns",
@@ -88,11 +146,20 @@ class Warning(widget.OWWidget.Warning):
88146
def __init__(self):
89147
super().__init__()
90148
self.data = None
149+
150+
# The following lists are of the same length as self.activeRules
151+
152+
#: list of pairs with counts of matches for each patter when the
153+
# patterns are applied in order and when applied on the entire set,
154+
# disregarding the preceding patterns
155+
self.match_counts = []
156+
157+
#: list of list of QLineEdit: line edit pairs for each pattern
91158
self.line_edits = []
159+
#: list of QPushButton: list of remove buttons
92160
self.remove_buttons = []
161+
#: list of list of QLabel: pairs of labels with counts
93162
self.counts = []
94-
self.match_counts = []
95-
self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum)
96163

97164
patternbox = gui.vBox(self.controlArea, box="Patterns")
98165
box = gui.hBox(patternbox)
@@ -102,6 +169,8 @@ def __init__(self):
102169
model=DomainModel(valid_types=(StringVariable, DiscreteVariable)),
103170
sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))
104171

172+
#: QWidget: the box that contains the remove buttons, line edits and
173+
# count labels. The lines are added and removed dynamically.
105174
self.rules_box = rules_box = QGridLayout()
106175
patternbox.layout().addLayout(self.rules_box)
107176
self.add_button = gui.button(None, self, "+", flat=True,
@@ -129,17 +198,27 @@ def __init__(self):
129198
gui.rubber(box)
130199
gui.button(box, self, "Apply", autoDefault=False, callback=self.apply)
131200

201+
# TODO: Resizing upon changing the number of rules does not work
202+
self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum)
203+
132204
@property
133205
def active_rules(self):
206+
"""
207+
Returns the class names and patterns corresponding to the currently
208+
selected attribute. If the attribute is not yet in the dictionary,
209+
set the default.
210+
"""
134211
return self.rules.setdefault(self.attribute and self.attribute.name,
135212
[["C1", ""], ["C2", ""]])
136213

137214
def rules_to_edits(self):
215+
"""Fill the line edites with the rules from the current settings."""
138216
for editr, textr in zip(self.line_edits, self.active_rules):
139217
for edit, text in zip(editr, textr):
140218
edit.setText(text)
141219

142220
def set_data(self, data):
221+
"""Input data signal handler."""
143222
self.closeContext()
144223
self.rules = {}
145224
self.data = data
@@ -156,14 +235,19 @@ def set_data(self, data):
156235
self.apply()
157236

158237
def update_rules(self):
238+
"""Called when the rules are changed: adjust the number of lines in
239+
the form and fill them, update the counts. The widget does not have
240+
auto-apply."""
159241
self.adjust_n_rule_rows()
160242
self.rules_to_edits()
161243
self.update_counts()
244+
# TODO: Indicator that changes need to be applied
162245

163246
def options_changed(self):
164247
self.update_counts()
165248

166249
def adjust_n_rule_rows(self):
250+
"""Add or remove lines if needed and fix the tab order."""
167251
def _add_line():
168252
self.line_edits.append([])
169253
n_lines = len(self.line_edits)
@@ -213,21 +297,29 @@ def _fix_tab_order():
213297
_fix_tab_order()
214298

215299
def add_row(self):
300+
"""Append a new row at the end."""
216301
self.active_rules.append(["", ""])
217302
self.adjust_n_rule_rows()
218303

219304
def remove_row(self):
305+
"""Remove a row."""
220306
remove_idx = self.remove_buttons.index(self.sender())
221307
del self.active_rules[remove_idx]
222308
self.update_rules()
223309

224310
def sync_edit(self, text):
311+
"""Handle changes in line edits: update the active rules and counts"""
225312
edit = self.sender()
226313
edit.row[edit.col_idx] = text
227314
self.update_counts()
228315

229316
def update_counts(self):
317+
"""Recompute and update the counts of matches."""
230318
def _matcher(strings, pattern):
319+
"""Return indices of strings into patterns; consider case
320+
sensitivity and matching at the beginning. The given strings are
321+
assumed to be in lower case if match is case insensitive. Patterns
322+
are fixed on the fly."""
231323
if not self.case_sensitive:
232324
pattern = pattern.lower()
233325
indices = np.char.find(strings, pattern)
@@ -237,6 +329,15 @@ def _lower_if_needed(strings):
237329
return strings if self.case_sensitive else np.char.lower(strings)
238330

239331
def _string_counts():
332+
"""
333+
Generate pairs of arrays for each rule until running out of data
334+
instances. np.sum over the two arrays in each pair gives the
335+
number of matches of the remaining instances (considering the
336+
order of patterns) and of the original data.
337+
338+
For _string_counts, the arrays contain bool masks referring to the
339+
original data
340+
"""
240341
nonlocal data
241342
data = data.astype(str)
242343
data = data[~np.char.equal(data, "")]
@@ -251,6 +352,10 @@ def _string_counts():
251352
break
252353

253354
def _discrete_counts():
355+
"""
356+
Generate pairs similar to _string_counts, except that the arrays
357+
contain bin counts for the attribute's values matching the pattern.
358+
"""
254359
attr_vals = np.array(attr.values)
255360
attr_vals = _lower_if_needed(attr_vals)
256361
bins = bincount(data, max_val=len(attr.values) - 1)[0]
@@ -263,11 +368,13 @@ def _discrete_counts():
263368
break
264369

265370
def _clear_labels():
371+
"""Clear all labels"""
266372
for lab_matched, lab_total in self.counts:
267373
lab_matched.setText("")
268374
lab_total.setText("")
269375

270376
def _set_labels():
377+
"""Set the labels to show the counts"""
271378
for (n_matched, n_total), (lab_matched, lab_total) in \
272379
zip(self.match_counts, self.counts):
273380
n_before = n_total - n_matched
@@ -287,6 +394,7 @@ def _set_labels():
287394
_set_labels()
288395

289396
def apply(self):
397+
"""Output the transformed data."""
290398
if not self.attribute or not self.active_rules:
291399
self.send("Data", None)
292400
return
@@ -307,6 +415,7 @@ def apply(self):
307415

308416

309417
def main(): # pragma: no cover
418+
"""Simple test for manual inspection of the widget"""
310419
import sys
311420
from AnyQt.QtWidgets import QApplication
312421

Orange/widgets/data/tests/test_owcreateclass.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,22 @@ def test_map_by_substring(self):
2222
np.testing.assert_equal(
2323
map_by_substring(self.arr,
2424
["abc", "a", "bc", ""],
25-
case_sensitive=True, at_beginning=False),
25+
case_sensitive=True, match_beginning=False),
2626
[0, 1, 2, 0, 3])
2727
np.testing.assert_equal(
2828
map_by_substring(self.arr,
2929
["abc", "a", "Bc", ""],
30-
case_sensitive=True, at_beginning=False),
30+
case_sensitive=True, match_beginning=False),
3131
[0, 1, 3, 0, 3])
3232
np.testing.assert_equal(
3333
map_by_substring(self.arr,
3434
["abc", "a", "Bc", ""],
35-
case_sensitive=False, at_beginning=False),
35+
case_sensitive=False, match_beginning=False),
3636
[0, 1, 2, 0, 3])
3737
np.testing.assert_equal(
3838
map_by_substring(self.arr,
3939
["abc", "a", "bc", ""],
40-
case_sensitive=False, at_beginning=True),
40+
case_sensitive=False, match_beginning=True),
4141
[0, 1, 2, 3, 3])
4242
np.testing.assert_equal(
4343
map_by_substring(self.arr, ["", ""], False, False),

0 commit comments

Comments
 (0)