Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions Orange/data/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,13 @@ def __reduce__(self):
# Use make to unpickle variables.
return make_variable, (self.__class__, self._compute_value, self.name), self.__dict__

def copy(self, compute_value=None, *, name=None, **kwargs):
_CopyComputeValue = object()

def copy(self, compute_value=_CopyComputeValue, *, name=None, **kwargs):
if compute_value is self._CopyComputeValue:
compute_value = self.compute_value
var = type(self)(name=name or self.name,
compute_value=compute_value or self.compute_value,
compute_value=compute_value,
sparse=self.sparse, **kwargs)
var.attributes = dict(self.attributes)
return var
Expand Down Expand Up @@ -590,7 +594,8 @@ def repr_val(self, val):

str_val = repr_val

def copy(self, compute_value=None, *, name=None, **kwargs):
def copy(self, compute_value=Variable._CopyComputeValue,
*, name=None, **kwargs):
# pylint understand not that `var` is `DiscreteVariable`:
# pylint: disable=protected-access
number_of_decimals = kwargs.pop("number_of_decimals", None)
Expand Down Expand Up @@ -826,7 +831,8 @@ def __reduce__(self):
self.values, self.ordered), \
__dict__

def copy(self, compute_value=None, *, name=None, values=None, **_):
def copy(self, compute_value=Variable._CopyComputeValue,
*, name=None, values=None, **_):
# pylint: disable=arguments-differ
if values is not None and len(values) != len(self.values):
raise ValueError(
Expand Down Expand Up @@ -956,7 +962,7 @@ def __init__(self, *args, have_date=0, have_time=0, **kwargs):
self.have_date = have_date
self.have_time = have_time

def copy(self, compute_value=None, *, name=None, **_):
def copy(self, compute_value=Variable._CopyComputeValue, *, name=None, **_):
return super().copy(compute_value=compute_value, name=name,
have_date=self.have_date, have_time=self.have_time)

Expand Down
52 changes: 46 additions & 6 deletions Orange/widgets/data/owconcatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

"""

from collections import OrderedDict, namedtuple
from collections import OrderedDict, namedtuple, defaultdict
from functools import reduce
from itertools import chain, count
from typing import List
Expand Down Expand Up @@ -72,11 +72,13 @@ class Warning(widget.OWWidget.Warning):
#: User specified name for the "Source ID" attr
source_attr_name = settings.Setting("Source ID")

ignore_compute_value = settings.Setting(False)

want_main_area = False
resizing_enabled = False

domain_opts = ("Union of attributes appearing in all tables",
"Intersection of attributes in all tables")
domain_opts = ("all variables that appear in input tables",
"only variables that appear in all tables")

id_roles = ("Class attribute", "Attribute", "Meta attribute")

Expand All @@ -88,14 +90,14 @@ def __init__(self):
self.primary_data = None
self.more_data = OrderedDict()

self.mergebox = gui.vBox(self.controlArea, "Domain Merging")
self.mergebox = gui.vBox(self.controlArea, "Variable Merging")
box = gui.radioButtons(
self.mergebox, self, "merge_type",
callback=self._merge_type_changed)

gui.widgetLabel(
box, self.tr("When there is no primary table, " +
"the domain should be:"))
"the output should contain:"))

for opts in self.domain_opts:
gui.appendRadioButton(box, self.tr(opts))
Expand All @@ -108,6 +110,12 @@ def __init__(self):
"is no conflict between input classes."))
label.setWordWrap(True)

gui.separator(box)
gui.checkBox(
box, self, "ignore_compute_value",
"Treat variables with the same name as the same variable,\n"
"even if they are computed using different formulae.",
callback=self.apply, stateWhenDisabled=False)
###
box = gui.vBox(
self.controlArea, self.tr("Source Identification"),
Expand Down Expand Up @@ -207,7 +215,10 @@ def apply(self):
tables = [self.primary_data] + list(self.more_data.values())
domain = self.primary_data.domain
elif self.more_data:
tables = self.more_data.values()
if self.ignore_compute_value:
tables = self._dumb_tables()
else:
tables = self.more_data.values()
domains = [table.domain for table in tables]
domain = self.merge_domains(domains)

Expand Down Expand Up @@ -240,6 +251,35 @@ def apply(self):

self.Outputs.data.send(data)

def _dumb_tables(self):
def enumerated_parts(domain):
return enumerate((domain.attributes, domain.class_vars, domain.metas))

compute_value_groups = defaultdict(set)
for table in self.more_data.values():
for part, part_vars in enumerated_parts(table.domain):
for var in part_vars:
desc = (var.name, type(var), part)
compute_value_groups[desc].add(var.compute_value)
to_dumbify = {desc
for desc, compute_values in compute_value_groups.items()
if len(compute_values) > 1}

dumb_tables = []
for table in self.more_data.values():
dumb_domain = Orange.data.Domain(
*[[var.copy(compute_value=None)
if (var.name, type(var), part) in to_dumbify
else var
for var in part_vars]
for part, part_vars in enumerated_parts(table.domain)])
dumb_table = type(table).from_numpy(
dumb_domain,
table.X, table.Y, table.metas, table.W,
table.attributes, table.ids)
dumb_tables.append(dumb_table)
return dumb_tables

def _merge_type_changed(self, ):
if self.incompatible_types():
self.Error.bow_concatenation()
Expand Down
119 changes: 118 additions & 1 deletion Orange/widgets/data/tests/test_owconcatenate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring, abstract-method, protected-access
import unittest
from unittest.mock import patch
from unittest.mock import patch, Mock

import numpy as np

Expand Down Expand Up @@ -421,6 +421,123 @@ def test_summary(self):
self.assertEqual(info._StateInfo__output_summary.brief, "")
self.assertEqual(info._StateInfo__output_summary.details, no_output)

def _create_compute_values(self):
a1, a2, a3, a4, c1 = self.iris.domain.variables

def times2(*_):
return 2

na1 = a1.copy()
na2 = a2.copy(compute_value=times2)
na3 = a3.copy(compute_value=lambda *_: 3)
na4 = a4.copy(compute_value=lambda *_: 4)
nc1 = c1.copy(compute_value=lambda *_: 5)

ma1 = a1.copy()
ma2 = a2.copy(compute_value=times2)
ma3 = a3.copy(compute_value=lambda x: 6)
ma4 = a4.copy(compute_value=lambda x: 7)

table_n = self.iris.transform(Domain([na1, na2, na3, na4], nc1))
table_m = self.iris.transform(Domain([ma1, ma2, ma3], None, [ma4]))
return table_n, table_m

def test_dumb_tables(self):
self.widget.apply = Mock()
table_n, table_m = self._create_compute_values()
na1, na2, na3, na4, nc1 = table_n.domain.variables
ma1, ma2, ma3 = table_m.domain.attributes
ma4 = table_m.domain.metas[0]

self.send_signal(self.widget.Inputs.additional_data, table_n, 1)
self.send_signal(self.widget.Inputs.additional_data, table_m, 2)

# pylint: disable=unbalanced-tuple-unpacking
dtable_n, dtable_m = self.widget._dumb_tables()
dna1, dna2, dna3, dna4, dnc1 = dtable_n.domain.variables
dma1, dma2, dma3 = dtable_m.domain.attributes
dma4 = dtable_m.domain.metas[0]

# No copying: same name and no compute value
self.assertIs(na1, dna1)
self.assertIs(ma1, dma1)

# No copying: same name and same compute value
self.assertIs(na2, dna2)
self.assertIs(ma2, dma2)

# Copy: same name and different compute value
self.assertIsNot(na3, dna3)
self.assertIsNot(ma3, dma3)
self.assertIsNone(dna3.compute_value)
self.assertIsNone(dma3.compute_value)

# No copying: same name and different compute value, but different part
self.assertIs(na4, dna4)
self.assertIs(ma4, dma4)

# No copying: does not appear in the other table
self.assertIs(nc1, dnc1)

np.testing.assert_equal(table_m.X, dtable_m.X)
np.testing.assert_equal(table_m.Y, dtable_m.Y)
np.testing.assert_equal(table_n.X, dtable_n.X)
np.testing.assert_equal(table_n.metas, dtable_n.metas)

def test_dont_ignore_compute_value(self):
table_n, table_m = self._create_compute_values()
na1, na2, na3, na4, nc1 = table_n.domain.variables
ma3 = table_m.domain.attributes[2]
ma4 = table_m.domain.metas[0]

self.send_signal(self.widget.Inputs.additional_data, table_n, 1)
self.send_signal(self.widget.Inputs.additional_data, table_m, 2)

self.widget.ignore_compute_value = False
self.widget.apply()

output = self.get_output(self.widget.Outputs.data)
attributes = output.domain.attributes
self.assertEqual(len(attributes), 5)
self.assertIs(attributes[0], na1)
self.assertIs(attributes[1], na2)
self.assertIs(attributes[2].compute_value.variable, na3) # renamed
self.assertIs(attributes[3].compute_value.variable, na4) # renamed
self.assertIs(attributes[4].compute_value.variable, ma3) # renamed

self.assertIs(output.domain.class_var, nc1)

self.assertEqual(len(output.domain.metas), 1)
self.assertIs(output.domain.metas[0].compute_value.variable, ma4)

def test_ignore_compute_value(self):
table_n, table_m = self._create_compute_values()
na1, na2, na3, na4, nc1 = table_n.domain.variables
ma3 = table_m.domain.attributes[2]
ma4 = table_m.domain.metas[0]

self.send_signal(self.widget.Inputs.additional_data, table_n, 1)
self.send_signal(self.widget.Inputs.additional_data, table_m, 2)

self.widget.ignore_compute_value = True
self.widget.apply()

output = self.get_output(self.widget.Outputs.data)
attributes = output.domain.attributes
self.assertEqual(len(attributes), 4)
self.assertIs(attributes[0], na1)
self.assertIs(attributes[1], na2)
self.assertIsNot(attributes[2], na3)
self.assertIsNot(attributes[2], ma3)
self.assertIsNone(attributes[2].compute_value, ma3) # renamed
self.assertEqual(attributes[2].name, na3.name)
self.assertIs(attributes[3].compute_value.variable, na4) # renamed

self.assertIs(output.domain.class_var, nc1)

self.assertEqual(len(output.domain.metas), 1)
self.assertIs(output.domain.metas[0].compute_value.variable, ma4) # renamed


if __name__ == "__main__":
unittest.main()