Skip to content

Commit 2bc5b69

Browse files
committed
OWMergeData: Fix ids and duplicated columns in outer join
1 parent 30c9e67 commit 2bc5b69

File tree

2 files changed

+57
-20
lines changed

2 files changed

+57
-20
lines changed

Orange/widgets/data/owmergedata.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,9 @@ def best_match(model, extra_model):
305305
if len(state) == 1 \
306306
and not any(isinstance(v, Variable) for v in state[0]):
307307
l_var, r_var = best_match(box.model_left, box.model_right)
308-
self._try_set_combo(box.rows[0].left_combo, l_var)
309-
self._try_set_combo(box.rows[0].right_combo, r_var)
308+
if l_var is not None:
309+
self._try_set_combo(box.rows[0].left_combo, l_var)
310+
self._try_set_combo(box.rows[0].right_combo, r_var)
310311

311312
@Inputs.data
312313
@check_sql_input
@@ -417,9 +418,10 @@ def merge(self):
417418
if not self._check_uniqueness(left, left_mask, right, right_mask):
418419
return None
419420
method = self._merge_methods[self.merging]
420-
lefti, righti = method(self, left, left_mask, right, right_mask)
421+
lefti, righti, rightu = method(self, left, left_mask, right, right_mask)
421422
reduced_extra_data = self._compute_reduced_extra_data(right_vars)
422-
return self._join_table_by_indices(reduced_extra_data, lefti, righti)
423+
return self._join_table_by_indices(
424+
reduced_extra_data, lefti, righti, rightu)
423425

424426
def _check_pair_types(self, pairs):
425427
for left, right in pairs:
@@ -510,34 +512,32 @@ def _left_join_indices(self, left, left_mask, right, right_mask):
510512
righti = np.fromiter(righti, dtype=np.int64, count=len(data))
511513
lefti = np.arange(len(data), dtype=np.int64)
512514
righti[lefti[~left_mask]] = -1
513-
return lefti, righti
515+
return lefti, righti, None
514516

515517
def _inner_join_indices(self, left, left_mask, right, right_mask):
516518
"""Use _augment_indices to compute the array of indices,
517519
then remove those with no match in the second table"""
518-
lefti, righti = \
520+
lefti, righti, _ = \
519521
self._left_join_indices(left, left_mask, right, right_mask)
520522
mask = righti != [-1]
521-
return lefti[mask], righti[mask]
523+
return lefti[mask], righti[mask], None
522524

523525
def _outer_join_indices(self, left, left_mask, right, right_mask):
524526
"""Use _augment_indices to compute the array of indices,
525527
then add rows in the second table without a match in the first"""
526-
lefti, righti = \
528+
lefti, righti, _ = \
527529
self._left_join_indices(left, left_mask, right, right_mask)
528530
unused = np.full(len(right), True)
529531
unused[righti] = False
530532
if len(right) - 1 not in righti:
531533
# righti can include -1, which sets the last element as used
532534
unused[-1] = True
533-
right_over = np.arange(len(right), dtype=np.int64)[unused]
534-
left_over = np.full(len(right_over), -1, np.int64)
535-
return np.hstack((lefti, left_over)), np.hstack((righti, right_over))
535+
return lefti, righti, np.nonzero(unused)[0]
536536

537537
_merge_methods = [
538538
_left_join_indices, _inner_join_indices, _outer_join_indices]
539539

540-
def _join_table_by_indices(self, reduced_extra, lefti, righti):
540+
def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu):
541541
"""Join (horizontally) self.data and reduced_extra, taking the pairs
542542
of rows given in indices"""
543543
if not lefti.size:
@@ -551,10 +551,20 @@ def _join_table_by_indices(self, reduced_extra, lefti, righti):
551551
string_cols = [i for i, var in enumerate(domain.metas) if var.is_string]
552552
metas = self._join_array_by_indices(
553553
self.data.metas, reduced_extra.metas, lefti, righti, string_cols)
554+
if rightu is not None:
555+
extras = self.extra_data[rightu].transform(domain)
556+
X = np.vstack((X, extras.X))
557+
Y = np.vstack((Y, extras.Y))
558+
metas = np.vstack((metas, extras.metas))
554559
table = Orange.data.Table.from_numpy(domain, X, Y, metas)
555560
table.name = getattr(self.data, 'name', '')
556561
table.attributes = getattr(self.data, 'attributes', {})
557-
table.ids = self.data.ids
562+
if rightu is not None:
563+
table.ids = np.hstack(
564+
(self.data.ids, self.extra_data.ids[rightu]))
565+
else:
566+
table.ids = self.data.ids[lefti]
567+
558568
return table
559569

560570
@staticmethod

Orange/widgets/data/tests/test_owmergedata.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -449,15 +449,19 @@ def test_output_merge_by_ids_outer(self):
449449
Source position (index)"""
450450
domain = self.dataA.domain
451451
result = Table(domain,
452-
np.array([[0, np.nan], [1, 1], [2, 0], [np.nan, 1]]),
453-
np.array([0, 1, 2, np.nan]),
454-
np.array([[0.0, ""], [1.0, "m2"], [np.nan, "m3"],
455-
[np.nan, "m4"]]).astype(object))
456-
self.send_signal(self.widget.Inputs.data, self.dataA[:3, [0, "cls", -1]])
457-
self.send_signal(self.widget.Inputs.extra_data, self.dataA[1:, [1, "cls", -2]])
452+
np.array([[1, 1], [2, 0], [3, np.nan], [np.nan, 0]]),
453+
np.array([1, 2, np.nan, 0]),
454+
np.array([[1.0, "m2"], [np.nan, "m3"],
455+
[0.0, ""], [np.nan, "m1"]]).astype(object))
458456
self.widget.attr_boxes.set_state([(INSTANCEID, INSTANCEID)])
457+
self.widget.merging = 2
459458
self.widget.controls.merging.buttons[self.widget.OuterJoin].click()
460-
self.assertTablesEqual(self.get_output(self.widget.Outputs.data), result)
459+
self.send_signal(self.widget.Inputs.data, self.dataA[1:, [0, "cls", -1]])
460+
self.send_signal(self.widget.Inputs.extra_data, self.dataA[:3, [1, "cls", -2]])
461+
out = self.get_output(self.widget.Outputs.data)
462+
self.assertTablesEqual(out, result)
463+
np.testing.assert_equal(
464+
out.ids, np.hstack((self.dataA.ids[1:], self.dataA.ids[:1])))
461465

462466
def test_output_merge_by_index_left(self):
463467
"""Check output for merging option 'Append columns from Extra Data' by
@@ -580,6 +584,29 @@ def test_output_merge_by_attribute_outer(self):
580584
self.widget.controls.merging.buttons[self.widget.OuterJoin].click()
581585
self.assertTablesEqual(self.get_output(self.widget.Outputs.data), result)
582586

587+
def test_output_merge_by_attribute_outer_same_attr(self):
588+
"""Values of columns from extra aata are copied to left part if they
589+
match"""
590+
name = StringVariable("name")
591+
domainA = Domain([ContinuousVariable("x")], None, [name])
592+
domainB = Domain([ContinuousVariable("y")], None, [name])
593+
xA = np.array([[0], [1], [2]])
594+
mA = np.array([["a"], ["b"], ["c"]])
595+
xB = np.array([[4], [5], [6], [7]])
596+
mB = np.array([["b"], ["d"], ["a"], ["c"]])
597+
dataA = Table(domainA, xA, None, mA)
598+
dataB = Table(domainB, xB, None, mB)
599+
600+
self.send_signal(self.widget.Inputs.data, dataA)
601+
self.send_signal(self.widget.Inputs.extra_data, dataB)
602+
self.widget.attr_boxes.set_state([(name, name)])
603+
self.widget.controls.merging.buttons[self.widget.OuterJoin].click()
604+
out = self.get_output(self.widget.Outputs.data)
605+
np.testing.assert_equal(
606+
out.X,
607+
np.array([[0, 6], [1, 4], [2, 7], [np.nan, 5]]))
608+
self.assertEqual(" ".join(out.metas.flatten()), "a b c d")
609+
583610
def test_output_merge_by_class_left(self):
584611
"""Check output for merging option 'Append columns from Extra Data' by
585612
class variable"""

0 commit comments

Comments
 (0)