Skip to content

Commit 3629531

Browse files
committed
Edit Domain: Add option to remove compute_value
1 parent 1ad65b9 commit 3629531

File tree

2 files changed

+187
-62
lines changed

2 files changed

+187
-62
lines changed

Orange/widgets/data/oweditdomain.py

Lines changed: 96 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class Categorical(
9595
("name", str),
9696
("categories", Tuple[str, ...]),
9797
("annotations", AnnotationsType),
98+
("linked", bool)
9899
])): pass
99100

100101

@@ -104,20 +105,23 @@ class Real(
104105
# a precision (int, and a format specifier('f', 'g', or '')
105106
("format", Tuple[int, str]),
106107
("annotations", AnnotationsType),
108+
("linked", bool)
107109
])): pass
108110

109111

110112
class String(
111113
_DataType, NamedTuple("String", [
112114
("name", str),
113115
("annotations", AnnotationsType),
116+
("linked", bool)
114117
])): pass
115118

116119

117120
class Time(
118121
_DataType, NamedTuple("Time", [
119122
("name", str),
120123
("annotations", AnnotationsType),
124+
("linked", bool)
121125
])): pass
122126

123127

@@ -175,10 +179,14 @@ def __call__(self, var):
175179
return var._replace(annotations=self.annotations)
176180

177181

178-
Transform = Union[Rename, CategoriesMapping, Annotate]
179-
TransformTypes = (Rename, CategoriesMapping, Annotate)
182+
class Unlink(_DataType, namedtuple("Unlink", [])):
183+
"""Unlink variable from its source, that is, remove compute_value"""
180184

181-
CategoricalTransformTypes = (CategoriesMapping, )
185+
186+
Transform = Union[Rename, CategoriesMapping, Annotate, Unlink]
187+
TransformTypes = (Rename, CategoriesMapping, Annotate, Unlink)
188+
189+
CategoricalTransformTypes = (CategoriesMapping, Unlink)
182190

183191

184192
# Reinterpret vector transformations.
@@ -221,7 +229,7 @@ def __call__(self, vector: DataVector) -> StringVector:
221229
if isinstance(var, String):
222230
return vector
223231
return StringVector(
224-
String(var.name, var.annotations),
232+
String(var.name, var.annotations, False),
225233
lambda: as_string(vector.data()),
226234
)
227235

@@ -241,19 +249,19 @@ def data() -> MArray:
241249
a = categorical_to_string_vector(d, var.values)
242250
return MArray(as_float_or_nan(a, where=a.mask), mask=a.mask)
243251
return RealVector(
244-
Real(var.name, (6, 'g'), var.annotations), data
252+
Real(var.name, (6, 'g'), var.annotations, var.linked), data
245253
)
246254
elif isinstance(var, Time):
247255
return RealVector(
248-
Real(var.name, (6, 'g'), var.annotations),
256+
Real(var.name, (6, 'g'), var.annotations, var.linked),
249257
lambda: vector.data().astype(float)
250258
)
251259
elif isinstance(var, String):
252260
def data():
253261
s = vector.data()
254262
return MArray(as_float_or_nan(s, where=s.mask), mask=s.mask)
255263
return RealVector(
256-
Real(var.name, (6, "g"), var.annotations), data
264+
Real(var.name, (6, "g"), var.annotations, var.linked), data
257265
)
258266
raise AssertionError
259267

@@ -266,22 +274,10 @@ def __call__(self, vector: DataVector) -> CategoricalVector:
266274
var, _ = vector
267275
if isinstance(var, Categorical):
268276
return vector
269-
if isinstance(var, Real):
270-
data, values = categorical_from_vector(vector.data())
271-
return CategoricalVector(
272-
Categorical(var.name, values, var.annotations),
273-
lambda: data
274-
)
275-
elif isinstance(var, Time):
277+
if isinstance(var, (Real, Time, String)):
276278
data, values = categorical_from_vector(vector.data())
277279
return CategoricalVector(
278-
Categorical(var.name, values, var.annotations),
279-
lambda: data
280-
)
281-
elif isinstance(var, String):
282-
data, values = categorical_from_vector(vector.data())
283-
return CategoricalVector(
284-
Categorical(var.name, values, var.annotations),
280+
Categorical(var.name, values, var.annotations, var.linked),
285281
lambda: data
286282
)
287283
raise AssertionError
@@ -295,7 +291,7 @@ def __call__(self, vector: DataVector) -> TimeVector:
295291
return vector
296292
elif isinstance(var, Real):
297293
return TimeVector(
298-
Time(var.name, var.annotations),
294+
Time(var.name, var.annotations, var.linked),
299295
lambda: vector.data().astype("M8[us]")
300296
)
301297
elif isinstance(var, Categorical):
@@ -305,15 +301,15 @@ def data():
305301
dt = pd.to_datetime(s, errors="coerce").values.astype("M8[us]")
306302
return MArray(dt, mask=d.mask)
307303
return TimeVector(
308-
Time(var.name, var.annotations), data
304+
Time(var.name, var.annotations, var.linked), data
309305
)
310306
elif isinstance(var, String):
311307
def data():
312308
s = vector.data()
313309
dt = pd.to_datetime(s, errors="coerce").values.astype("M8[us]")
314310
return MArray(dt, mask=s.mask)
315311
return TimeVector(
316-
Time(var.name, var.annotations), data
312+
Time(var.name, var.annotations, var.linked), data
317313
)
318314
raise AssertionError
319315

@@ -532,6 +528,17 @@ def __init__(self, parent=None, **kwargs):
532528
)
533529
form.addRow("Name:", self.name_edit)
534530

531+
self.unlink_var_cb = QCheckBox(
532+
"Unlink variable from its source variable", self,
533+
toolTip="Make Orange forget that the variable is derived from "
534+
"another.\n"
535+
"Use this for instance when you want to consider variables "
536+
"with the same name but from different sources as the same "
537+
"variable."
538+
)
539+
self.unlink_var_cb.toggled.connect(self._set_unlink)
540+
form.addRow("", self.unlink_var_cb)
541+
535542
vlayout = QVBoxLayout(margin=0, spacing=1)
536543
self.labels_edit = view = QTreeView(
537544
objectName="annotation-pairs-edit",
@@ -616,17 +623,23 @@ def set_data(self, var, transform=()):
616623
if var is not None:
617624
name = var.name
618625
annotations = var.annotations
626+
unlink = False
619627
for tr in transform:
620628
if isinstance(tr, Rename):
621629
name = tr.name
622630
elif isinstance(tr, Annotate):
623631
annotations = tr.annotations
632+
elif isinstance(tr, Unlink):
633+
unlink = True
624634
self.name_edit.setText(name)
625635
self.labels_model.set_dict(dict(annotations))
626636
self.add_label_action.actionGroup().setEnabled(True)
637+
self.unlink_var_cb.setChecked(unlink)
627638
else:
628639
self.add_label_action.actionGroup().setEnabled(False)
629640

641+
self.unlink_var_cb.setDisabled(var is None or not var.linked)
642+
630643
def get_data(self):
631644
"""Retrieve the modified variable.
632645
"""
@@ -639,6 +652,8 @@ def get_data(self):
639652
tr.append(Rename(name))
640653
if self.var.annotations != labels:
641654
tr.append(Annotate(labels))
655+
if self.var.linked and self.unlink_var_cb.isChecked():
656+
tr.append(Unlink())
642657
return self.var, tr
643658

644659
def clear(self):
@@ -647,6 +662,7 @@ def clear(self):
647662
self.var = None
648663
self.name_edit.setText("")
649664
self.labels_model.setRowCount(0)
665+
self.unlink_var_cb.setChecked(False)
650666

651667
@Slot()
652668
def on_name_changed(self):
@@ -661,6 +677,10 @@ def on_label_selection_changed(self):
661677
selected = self.labels_edit.selectionModel().selectedRows()
662678
self.remove_label_action.setEnabled(bool(len(selected)))
663679

680+
def _set_unlink(self, unlink):
681+
self.unlink_var_cb.setChecked(unlink)
682+
self.variable_changed.emit()
683+
664684

665685
class GroupItemsDialog(QDialog):
666686
"""
@@ -1157,7 +1177,7 @@ def __init__(self, *args, **kwargs):
11571177
hlayout.addStretch(10)
11581178
vlayout.addLayout(hlayout)
11591179

1160-
form.insertRow(1, "Values:", vlayout)
1180+
form.insertRow(2, "Values:", vlayout)
11611181

11621182
QWidget.setTabOrder(self.name_edit, self.values_edit)
11631183
QWidget.setTabOrder(self.values_edit, button1)
@@ -2030,23 +2050,32 @@ def state(i):
20302050
model.data(midx, TransformRole))
20312051

20322052
state = [state(i) for i in range(model.rowCount())]
2033-
if all(tr is None or not tr for _, tr in state) \
2034-
and self.output_table_name in ("", data.name):
2053+
input_vars = data.domain.variables + data.domain.metas
2054+
if self.output_table_name in ("", data.name) \
2055+
and not any(requires_transform(var, trs)
2056+
for var, (_, trs) in zip(input_vars, state)):
20352057
self.Outputs.data.send(data)
20362058
self.info.set_output_summary(len(data),
20372059
format_summary_details(data))
20382060
return
20392061

2040-
output_vars = []
2041-
input_vars = data.domain.variables + data.domain.metas
20422062
assert all(v_.vtype.name == v.name
20432063
for v, (v_, _) in zip(input_vars, state))
2064+
output_vars = []
2065+
unlinked_vars = []
2066+
unlink_domain = False
20442067
for (_, tr), v in zip(state, input_vars):
20452068
if tr:
20462069
var = apply_transform(v, data, tr)
2070+
if requires_unlink(v, tr):
2071+
unlinked_var = var.copy(compute_value=None)
2072+
unlink_domain = True
2073+
else:
2074+
unlinked_var = var
20472075
else:
2048-
var = v
2076+
unlinked_var = var = v
20492077
output_vars.append(var)
2078+
unlinked_vars.append(unlinked_var)
20502079

20512080
if len(output_vars) != len({v.name for v in output_vars}):
20522081
self.Error.duplicate_var_name()
@@ -2058,15 +2087,23 @@ def state(i):
20582087
nx = len(domain.attributes)
20592088
ny = len(domain.class_vars)
20602089

2061-
Xs = output_vars[:nx]
2062-
Ys = output_vars[nx: nx + ny]
2063-
Ms = output_vars[nx + ny:]
2064-
# Move non primitive Xs, Ys to metas (if they were changed)
2065-
Ms += [v for v in Xs + Ys if not v.is_primitive()]
2066-
Xs = [v for v in Xs if v.is_primitive()]
2067-
Ys = [v for v in Ys if v.is_primitive()]
2068-
domain = Orange.data.Domain(Xs, Ys, Ms)
2090+
def construct_domain(vars_list):
2091+
# Move non primitive Xs, Ys to metas (if they were changed)
2092+
Xs = [v for v in vars_list[:nx] if v.is_primitive()]
2093+
Ys = [v for v in vars_list[nx: nx + ny] if v.is_primitive()]
2094+
Ms = vars_list[nx + ny:] + \
2095+
[v for v in vars_list[:nx + ny] if not v.is_primitive()]
2096+
return Orange.data.Domain(Xs, Ys, Ms)
2097+
2098+
domain = construct_domain(output_vars)
20692099
new_data = data.transform(domain)
2100+
if unlink_domain:
2101+
unlinked_domain = construct_domain(unlinked_vars)
2102+
new_data = new_data.from_numpy(
2103+
unlinked_domain,
2104+
new_data.X, new_data.Y, new_data.metas, new_data.W,
2105+
new_data.attributes, new_data.ids
2106+
)
20702107
if self.output_table_name:
20712108
new_data.name = self.output_table_name
20722109
self.Outputs.data.send(new_data)
@@ -2236,7 +2273,7 @@ def i(text):
22362273
def text(text):
22372274
return "<span>{}</span>".format(escape(text))
22382275
assert trs
2239-
rename = annotate = catmap = None
2276+
rename = annotate = catmap = unlink = None
22402277
reinterpret = None
22412278

22422279
for tr in trs:
@@ -2246,6 +2283,8 @@ def text(text):
22462283
annotate = tr
22472284
elif isinstance(tr, CategoriesMapping):
22482285
catmap = tr
2286+
elif isinstance(tr, Unlink):
2287+
unlink = tr
22492288
elif isinstance(tr, ReinterpretTransformTypes):
22502289
reinterpret = tr
22512290

@@ -2258,6 +2297,8 @@ def text(text):
22582297
header = "{} → {}".format(var.name, rename.name)
22592298
else:
22602299
header = var.name
2300+
if unlink is not None:
2301+
header += "(unlinked from source)"
22612302

22622303
values_section = None
22632304
if catmap is not None:
@@ -2323,14 +2364,15 @@ def abstract(var):
23232364
(key, str(value))
23242365
for key, value in var.attributes.items()
23252366
))
2367+
linked = var.compute_value is not None
23262368
if isinstance(var, Orange.data.DiscreteVariable):
2327-
return Categorical(var.name, tuple(var.values), annotations)
2369+
return Categorical(var.name, tuple(var.values), annotations, linked)
23282370
elif isinstance(var, Orange.data.TimeVariable):
2329-
return Time(var.name, annotations)
2371+
return Time(var.name, annotations, linked)
23302372
elif isinstance(var, Orange.data.ContinuousVariable):
2331-
return Real(var.name, (var.number_of_decimals, 'f'), annotations)
2373+
return Real(var.name, (var.number_of_decimals, 'f'), annotations, linked)
23322374
elif isinstance(var, Orange.data.StringVariable):
2333-
return String(var.name, annotations)
2375+
return String(var.name, annotations, linked)
23342376
else:
23352377
raise TypeError
23362378

@@ -2359,6 +2401,17 @@ def apply_transform(var, table, trs):
23592401
return var
23602402

23612403

2404+
def requires_unlink(var: Orange.data.Variable, trs: List[Transform]) -> bool:
2405+
return trs is not None \
2406+
and any(isinstance(tr, Unlink) for tr in trs) \
2407+
and (var.compute_value is not None or len(trs) > 1)
2408+
2409+
2410+
def requires_transform(var: Orange.data.Variable, trs: List[Transform]) -> bool:
2411+
return trs and not all (isinstance(tr, Unlink) for tr in trs) \
2412+
or requires_unlink(var, trs)
2413+
2414+
23622415
@singledispatch
23632416
def apply_transform_var(var, trs):
23642417
# type: (Orange.data.Variable, List[Transform]) -> Orange.data.Variable

0 commit comments

Comments
 (0)