Skip to content

Commit f74aa11

Browse files
authored
Merge pull request #2084 from VesnaT/janez-merge-data
OWMergeData: New GUI, heuristic to suggest matching attributes
2 parents 319b91d + f5d5833 commit f74aa11

File tree

11 files changed

+729
-559
lines changed

11 files changed

+729
-559
lines changed

Orange/widgets/data/owmergedata.py

Lines changed: 333 additions & 251 deletions
Large diffs are not rendered by default.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
gender region name
2+
d d s
3+
4+
f 1 Ann
5+
m 2 Peter
6+
m 1 John
7+
m 3 Jose
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
id state continent
2+
d s d
3+
1 UK Europe
4+
2 Russia Europe
5+
3 Mexico America

Orange/widgets/data/tests/test_owmergedata.py

Lines changed: 324 additions & 283 deletions
Large diffs are not rendered by default.

Orange/widgets/visualize/owvenndiagram.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -867,8 +867,6 @@ def unique(seq):
867867
yield item
868868
seen.add(item)
869869

870-
from Orange.widgets.data.owmergedata import group_table_indices
871-
872870

873871
def unique_non_nan(ar):
874872
# metas have sometimes object dtype, but values are numpy floats
@@ -1651,6 +1649,19 @@ def filter_vars(vars):
16511649
return Orange.data.Table.from_table(domain, data)
16521650

16531651

1652+
def group_table_indices(table, key_var):
1653+
"""
1654+
Group table indices based on values of selected columns (`key_vars`).
1655+
1656+
Return a dictionary mapping all unique value combinations (keys)
1657+
into a list of indices in the table where they are present.
1658+
"""
1659+
groups = defaultdict(list)
1660+
for i, inst in enumerate(table):
1661+
groups[str(inst[key_var])].append(i)
1662+
return groups
1663+
1664+
16541665
def test():
16551666
from Orange.evaluation import ShuffleSplit
16561667

Orange/widgets/visualize/tests/test_owvenndiagram.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import unittest
55
import numpy as np
6+
from collections import defaultdict
67

78
from Orange.data import (Table, Domain, StringVariable,
89
DiscreteVariable, ContinuousVariable)
@@ -13,7 +14,8 @@
1314
table_concat,
1415
varying_between,
1516
drop_columns,
16-
OWVennDiagram)
17+
OWVennDiagram,
18+
group_table_indices)
1719
from Orange.tests import test_filename
1820

1921

@@ -174,3 +176,17 @@ def test_no_data(self):
174176
self.send_signal(self.signal_name, self.data[:100], 1)
175177
self.send_signal(self.signal_name, self.data[50:], 2)
176178
self.send_signal(self.signal_name, self.data[:0], 3)
179+
180+
181+
class GroupTableIndicesTest(unittest.TestCase):
182+
def test_group_table_indices(self):
183+
table = Table(test_filename("test9.tab"))
184+
dd = defaultdict(list)
185+
dd["1"] = [0, 1]
186+
dd["huh"] = [2]
187+
dd["hoy"] = [3]
188+
dd["?"] = [4]
189+
dd["2"] = [5]
190+
dd["oh yeah"] = [6]
191+
dd["3"] = [7]
192+
self.assertEqual(dd, group_table_indices(table, "g"))
1.22 KB
Loading
1.9 KB
Loading
53.9 KB
Loading
1.23 KB
Loading

0 commit comments

Comments
 (0)